# TRIAL NOTEBOOK

## Import Libraries

In [None]:
import os
import time
import pandas as pd
import nltk
import numpy as np

## Download NLTK Resources

In [None]:
# !mkdir -p ~/nltk_data
# !git clone https://github.com/nltk/nltk_data.git
# !mv nltk_data/packages/* ~/nltk_data/
# !rm -rf nltk_data

# # Unzip punkt
# !WORK_DIR=$(pwd)
# !cd ~/nltk_data/tokenizers/
# !unzip punkt.zip
# !rm punkt.zip

# !cd $WORK_DIR

## Import helper functions

In [None]:
from utils import load_sessions, read_session, read_file
from operations import build_text

## Read File and Load Events

In [None]:
events = read_file("7c1e066d0c9c4901921cab117a0b7e73.jsonl")
# events = read_file("0a8e182a20df447db4846b1223c619b5.jsonl")

## Build Text Buffer

In [None]:
text_buffer = []

In [None]:
for event in events:
    buffer = build_text(text_buffer, event)
    text_buffer.append(buffer)

## Initialize DataFrame

In [None]:
df = pd.DataFrame()

In [None]:
df["text_buffer"] = text_buffer
df["events"] = events

In [None]:
df.head()

## Extract Sentences

In [None]:
# Add sentences

sentence_buffer = []
num_sentences = []

for text in df["text_buffer"]:
    sentences = nltk.tokenize.sent_tokenize(text)
    sentence_buffer.append(sentences)
    num_sentences.append(len(sentences))

In [None]:
df["sentences"] = sentence_buffer
df["num_sentences"] = num_sentences

In [None]:
df.head()

## Extract Event Names

In [None]:
df["event_name"] = df["events"].apply(lambda x: x["eventName"])

In [None]:
df.head()

## Correct Number of Sentences in accordance to Event Name

In [None]:
df = df.groupby("num_sentences", group_keys=True).apply(lambda x: x)
df = df.sort_index()

In [None]:
df.head()

In [None]:
num_sentences = np.array(df["num_sentences"])
event_names = np.array(df["event_name"])

In [None]:
start_idx = 0
select_flag = False

for idx, event in enumerate(event_names):
    if event == "suggestion-get":
        start_idx = idx
    if event == "suggestion-select":
        select_flag = True
    if select_flag and event == "text-insert":
        if num_sentences[start_idx] == num_sentences[idx]:
            end_idx = idx + 1
        elif num_sentences[start_idx] < num_sentences[idx]:
            end_idx = idx
        for i in range(start_idx, end_idx):
            num_sentences[i] += 1
        select_flag = False

In [None]:
# for idx, _ in enumerate(event_names):
#     print(num_sentences[idx], event_names[idx])

In [None]:
df["num_sentences"] = num_sentences

## Compute Sentence and Event Sequence

In [None]:
# np.unique(df["num_sentences"])

In [None]:
def compute_seq(events):
    
    # Remove suggestion-open, suggestion-hover
    events = np.delete(events, np.where(events == "suggestion-open"))
    events = np.delete(events, np.where(events == "suggestion-hover"))
    
    # Remove text-insert after suggestion-select
    select_flag = False
    new_events = []
    for idx, event in enumerate(events):
        if event == "suggestion-select":
            select_flag = True
        if event == "text-insert" and select_flag:
            select_flag = False
            continue
        new_events.append(event)
    events = np.array(new_events)
    
    # Identify GPT-3 modifications
    select_flag = False
    new_events = []
    for idx, event in enumerate(events):
        if event == "suggestion-select":
            select_flag = True
        if event == "text-insert":
            select_flag = False
        if (event == "cursor-backward" or event == "cursor-select" or event == "text-delete") and select_flag:
            select_flag = False
            event = "gpt3-modify"
        new_events.append(event)
    events = np.array(new_events)
    
    # Remove cursor-forward, cursor-backward, cursor-select
    events = np.delete(events, np.where(events == "cursor-forward"))
    events = np.delete(events, np.where(events == "cursor-backward"))
    events = np.delete(events, np.where(events == "cursor-select"))
    
    # Remove text-delete
    events = np.delete(events, np.where(events == "text-delete"))
    
    # Remove suggestion-close
    events = np.delete(events, np.where(events == "suggestion-close"))
    
    # Identify GTP-3 calls
    events = events.tolist()
    start_idx = 0
    api_flag = False
    pop_idx = []
    for idx, event in enumerate(events):
        if event == "suggestion-get":
            start_idx = idx
            api_flag = True
        if event == "suggestion-select" and api_flag:
            api_flag = False
            for i in range(start_idx, idx):
                pop_idx.append(i)
    events = np.array(events)
    events = np.delete(events, pop_idx)
    
    # Group together text-inserts
    new_events = []
    temp = []
    for event in events:
        if event == "text-insert":
            temp.append(event)
        else:
            if len(temp) != 0:
                new_events.append("text-insert")
            new_events.append(event)
            temp = []
    if len(temp) != 0:
        new_events.append("text-insert")
    events = np.array(new_events)
    
    # Rename sequences
    seq_name_dict = {
        "system-initialize": "prompt",
        "text-insert": "user",
        "suggestion-get": "empty-call",
        "suggestion-select": "gpt3-call",
        "gpt3-modify": "modify-gpt3",
    }
    new_events = [seq_name_dict[event] for event in events]
    events = np.array(new_events)
    
    return events

In [None]:
for num in np.unique(df["num_sentences"]):
    sent = np.array(df[df["num_sentences"] == num]["text_buffer"])[-1]
    event_seq = np.array(df[df["num_sentences"] == num]["event_name"])
    print(num, compute_seq(event_seq))

## Generate Graph

In [None]:
from PIL import Image, ImageDraw, ImageFont
import math

In [None]:
image = Image.new("RGB", (600, 4500), "white")
draw = ImageDraw.Draw(image)
font = ImageFont.load_default()

margin = 25
circle_size = 50
sentence_margin = 80
line_size = 30

In [None]:
def user_circle(x, y):
    x -= 1
    y -= 1
    x_start = sentence_margin + margin + 80 * x
    y_start = margin + 80 * y
    draw.ellipse((x_start, y_start, x_start+circle_size, y_start+circle_size), 
                 fill = (100, 255, 100, 255), outline ='black')

def text(text, y):
    y -= 1
    x_start = margin
    y_start = margin + (circle_size*2/5) + 80 * y
    draw.text((x_start,y_start), text, fill='black', font=font)


def gpt_circle(x, y):
    x -= 1
    y -= 1
    x_start = sentence_margin + margin + 80 * x
    y_start = margin + 80 * y
    draw.ellipse((x_start, y_start, x_start+circle_size, y_start+circle_size), 
                 fill = "red", outline ='black')


def prompt_circle(x, y):
    x -= 1
    y -= 1
    x_start = sentence_margin + margin + 80 * x
    y_start = margin + 80 * y
    draw.ellipse((x_start, y_start, x_start+circle_size, y_start+circle_size), 
                 fill = "black", outline ='black')
    

def modified_circle(x, y):
    x -= 1
    y -= 1
    x_start = sentence_margin + margin + 80 * x
    y_start = margin + 80 * y
    draw.ellipse((x_start, y_start, x_start+circle_size, y_start+circle_size), 
                 fill = "yellow", outline ='black')
    

def empty_circle(x, y):
    x -= 1
    y -= 1
    x_start = sentence_margin + margin + 80 * x
    y_start = margin + 80 * y
    draw.ellipse((x_start, y_start, x_start+circle_size, y_start+circle_size), 
                 fill = "white", outline ='black')
    

def suggestion_open(x, y):
    x -= 1
    y -= 1
    x_start = margin + circle_size + 80 * x
    y_start = margin + (circle_size/2) + 80 * y
    draw.line([(x_start, y_start),
              (x_start+line_size, y_start)], fill='red', width=3)

def user_change(x, y):
    x -= 1
    y -= 1
    x_start = margin + circle_size + 80 * x
    y_start = margin + (circle_size/2) + 80 * y
    draw.line([(x_start, y_start),
              (x_start+line_size, y_start)], fill= (100, 255, 100, 255), width=3)

In [None]:
for idx, num in enumerate(np.unique(df["num_sentences"])):
    event_seq = np.array(df[df["num_sentences"] == num]["event_name"])
    image_text = "Sentence " + str(idx+1)
    text(image_text, idx+1)
    for i, op in enumerate(compute_seq(event_seq)):
        if op == "gpt3-call":
            if i != 0:
                suggestion_open(i+1, idx+1)
            gpt_circle(i+1, idx+1)
        if op == "user":
            if i != 0:
                user_change(i+1, idx+1)
            user_circle(i+1, idx+1)
        if op == "prompt":
            prompt_circle(i+1, idx+1)
        if op == "modify-gpt3":
            if i != 0:
                user_change(i+1, idx+1)
            modified_circle(i+1, idx+1)
        if op == "empty-call":
            if i != 0:
                suggestion_open(i+1, idx+1)
            empty_circle(i+1, idx+1)

In [None]:
display(image)

In [None]:
# LEGEND: CIRCLES
# Prompt     -> black
# GPT-3      -> red (change color; shape triangle)
# User       -> green (change color; shape rectangle)
# Empty call -> white
# Modify     -> yellow

# LEGEND: LINES
# API Call    -> red
# User Edits  -> green (dashed line)

In [None]:
# https://www.color-blindness.com/coblis-color-blindness-simulator/