# Build sentences from JSONL files and export to CSV

In [1]:
# from tqdm import tqdm
# import pandas as pd

# from utils import load_sessions, read_session
# from main import generate_buffer

# sessions = load_sessions()

# file_name = []
# text = []

# err = []

# for sess in tqdm(sessions):
#     events = read_session(sess, verbose=0)
#     try:
#         text_buffer = generate_buffer(events)
#     except:
#         err.append(str(sess.split('/')[-1]) + " is throwing an error!")
#         continue
#     file_name.append(sess.split('/')[-1])
#     text.append(text_buffer[-1])
    
# for e in err:
#     print(e)
    
# df = pd.DataFrame()
# df["file_name"] = file_name
# df["text"] = text
# df.to_csv("writing_sessions.csv")

# Export sentence and summary statistics to CSV

In [4]:
from tqdm import tqdm
import pandas as pd

from utils import load_sessions, read_session
from main import generate_buffer
from events import generate_event_seq
from summary import stats

sessions = load_sessions()

file_name = []
text = []
sentence_metrics_list = []
api_metrics_list = []

err = []

for sess in tqdm(sessions):
    events = read_session(sess, verbose=0)
    try:
        text_buffer = generate_buffer(events)
    except:
        err.append(str(sess.split('/')[-1]) + " is throwing an error!")
        continue
    file_name.append(sess.split('/')[-1])
    text.append(text_buffer[-1])
    event_seq_dict = generate_event_seq(buffer=text_buffer,
                                        events=events)
    sentence_metrics, api_metrics = stats(event_seq_dict)
    sentence_metrics_list.append(sentence_metrics)
    api_metrics_list.append(api_metrics)
    
for e in err:
    print(e)
    
df = pd.DataFrame()

df["file_name"] = file_name
df["text"] = text

for col in sentence_metrics_list[0]:
    df[str(col)] = [x[col] for x in sentence_metrics_list]
    
for col in api_metrics_list[0]:
    df[str(col)] = [x[col] for x in api_metrics_list]

df.to_csv("writing_session_stats.csv")

# Mean

In [7]:
import numpy as np

print("Sentence Metrics")
for col in sentence_metrics_list[0]:
    print("Mean of", col, ":", np.mean(df[col]))
    
print("\nAPI Metrics")
for col in api_metrics_list[0]:
    print("Mean of", col, ":", np.mean(df[col]))

Sentence Metrics
Mean of Total number of sentences : 28.96265560165975
Mean of Number of sentences of initial prompt : 4.421161825726141
Mean of Number of sentences completely authored by the user : 15.452973720608576
Mean of Number of sentences completely authored by GPT-3 : 0.6853388658367912
Mean of Number of sentences authored by GPT-3 with user modification : 23.17081604426003

API Metrics
Mean of Total number of GPT-3 calls made : 12.531120331950207
Mean of Number of times GPT-3 suggestion is used : 8.857538035961273
Mean of Number of times user rejected GPT-3 suggestion : 3.673582295988935
Mean of Number of times GPT-3 suggestion is modified : 1.586445366528354
Mean of Number of times GPT-3 suggestion is used as is : 7.271092669432918


# Median

In [8]:
import numpy as np

print("Sentence Metrics")
for col in sentence_metrics_list[0]:
    print("Median of", col, ":", np.median(df[col]))
    
print("\nAPI Metrics")
for col in api_metrics_list[0]:
    print("Median of", col, ":", np.median(df[col]))

Sentence Metrics
Median of Total number of sentences : 27.0
Median of Number of sentences of initial prompt : 4.0
Median of Number of sentences completely authored by the user : 14.0
Median of Number of sentences completely authored by GPT-3 : 0.0
Median of Number of sentences authored by GPT-3 with user modification : 21.0

API Metrics
Median of Total number of GPT-3 calls made : 10.0
Median of Number of times GPT-3 suggestion is used : 7.0
Median of Number of times user rejected GPT-3 suggestion : 3.0
Median of Number of times GPT-3 suggestion is modified : 1.0
Median of Number of times GPT-3 suggestion is used as is : 5.0


# Standard Deviation

In [10]:
import numpy as np

print("Sentence Metrics")
for col in sentence_metrics_list[0]:
    print("Standard Deviation of", col, ":", np.std(df[col]))
    
print("\nAPI Metrics")
for col in api_metrics_list[0]:
    print("Standard Deviation of", col, ":", np.std(df[col]))

Sentence Metrics
Standard Deviation of Total number of sentences : 10.388909909258523
Standard Deviation of Number of sentences of initial prompt : 2.3909859029112486
Standard Deviation of Number of sentences completely authored by the user : 9.545418191261138
Standard Deviation of Number of sentences completely authored by GPT-3 : 1.8864423445191325
Standard Deviation of Number of sentences authored by GPT-3 with user modification : 10.77324462494979

API Metrics
Standard Deviation of Total number of GPT-3 calls made : 9.204158194377401
Standard Deviation of Number of times GPT-3 suggestion is used : 7.424057788661343
Standard Deviation of Number of times user rejected GPT-3 suggestion : 3.530339833311101
Standard Deviation of Number of times GPT-3 suggestion is modified : 1.796857239727531
Standard Deviation of Number of times GPT-3 suggestion is used as is : 7.233591709071117


# Correlation

In [11]:
df.corr()

  df.corr()


Unnamed: 0,Total number of sentences,Number of sentences of initial prompt,Number of sentences completely authored by the user,Number of sentences completely authored by GPT-3,Number of sentences authored by GPT-3 with user modification,Total number of GPT-3 calls made,Number of times GPT-3 suggestion is used,Number of times user rejected GPT-3 suggestion,Number of times GPT-3 suggestion is modified,Number of times GPT-3 suggestion is used as is
Total number of sentences,1.0,-0.129523,0.768386,0.218429,0.916576,0.3651,0.454037,-0.002934,0.05189,0.453102
Number of sentences of initial prompt,-0.129523,1.0,-0.293402,-0.062613,-0.324912,-0.067734,-0.096746,0.026855,-0.056362,-0.085293
Number of sentences completely authored by the user,0.768386,-0.293402,1.0,-0.076538,0.832895,-0.250058,-0.157757,-0.320189,-0.087056,-0.140286
Number of sentences completely authored by GPT-3,0.218429,-0.062613,-0.076538,1.0,-0.125676,0.422258,0.451338,0.151763,0.199498,0.413665
Number of sentences authored by GPT-3 with user modification,0.916576,-0.324912,0.832895,-0.125676,1.0,0.219229,0.301248,-0.061939,-0.007318,0.310998
Total number of GPT-3 calls made,0.3651,-0.067734,-0.250058,0.422258,0.219229,1.0,0.93199,0.647249,0.192459,0.908722
Number of times GPT-3 suggestion is used,0.454037,-0.096746,-0.157757,0.451338,0.301248,0.93199,1.0,0.326916,0.225656,0.970277
Number of times user rejected GPT-3 suggestion,-0.002934,0.026855,-0.320189,0.151763,-0.061939,0.647249,0.326916,1.0,0.027233,0.328759
Number of times GPT-3 suggestion is modified,0.05189,-0.056362,-0.087056,0.199498,-0.007318,0.192459,0.225656,0.027233,1.0,-0.016807
Number of times GPT-3 suggestion is used as is,0.453102,-0.085293,-0.140286,0.413665,0.310998,0.908722,0.970277,0.328759,-0.016807,1.0
