# Evaluate Pilot with Multiple-Choice Questions

In [1]:
# import
import json
import numpy as np
from util import filters
from datetime import datetime
from util.filters import FORMAT, LIKERT_CONVERTER, CONDITIONS

from scipy.stats import f_oneway
from statsmodels.stats.multicomp import pairwise_tukeyhsd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
file = "pilot_09_06.json"
with open("./prolific_logs/" + file) as f:
    logs = json.load(f)
logs = [logs["logs"][l] for l in logs["logs"]]

### Add Initial Pilot
The initial pilot was identical to the following 60 samples. Except with the difference that there was one less multiple-choice question. 

In [3]:
# OPTIONAL: INTEGRATE OLD LOGS INTO DATA
if True:
    file = "pilot_09_05_complete.json"
    with open("./prolific_logs/" + file) as f:
        old_logs = json.load(f)
    old_logs = [old_logs["logs"][l] for l in old_logs["logs"]]
    old_logs = [l for l in old_logs if l["completedSurvey"] == True]

    # Renumber questions
    for l in old_logs:
        del l["knowledgeAnswers"]["q1"]
        for i in range(1, 9):
            l["knowledgeAnswers"]["q" + str(i)] = l["knowledgeAnswers"]["q" + str(i + 1)]
        del l["knowledgeAnswers"]["q9"]
    
    # Add into new logs
    logs += old_logs

#### -------------------------------------------------------------------------

In [4]:
# helper function
def get_avg_std_err(vals):
    mean = np.mean(vals)
    std_dev = np.std(vals, ddof=1)  # Using ddof=1 for sample standard deviation
    
    # Calculate the standard error using the formula: standard deviation / sqrt(sample size)
    standard_error = std_dev / np.sqrt(len(vals))
    return mean, standard_error

In [5]:
### OVERWRITE WITH MORE RECENT DATA
file = "pilot_09_10_final.json"
with open("./prolific_logs/" + file) as f:
    logs = json.load(f)
logs = [logs["logs"][l] for l in logs["logs"]]

## Data Preprocessing

In [6]:
# only consider completed sessions
print("Before filtering")
print("all logs:", len(logs))
for c in CONDITIONS:
    print(c, len([l for l in logs if l["condition"] == c]))


logs = [l for l in logs if l["completedSurvey"] == True]

print("\nAfter filtering")
print("completed logs:", len(logs))
for c in CONDITIONS:
    print(c, len([l for l in logs if l["condition"] == c]))

Before filtering
all logs: 166
reading 41
teacher-qa-bot 42
llm-qa-bot 41
llm-chatbot 42

After filtering
completed logs: 100
reading 19
teacher-qa-bot 29
llm-qa-bot 21
llm-chatbot 31


In [7]:
def compile_dialog(log):
    messages = []
    for m in log["chatLog"]["current"]["main"]:
        time = datetime.strptime(m["date"], FORMAT)
        if "Student (to the teacher)" in m["text"]:
            continue
        messages.append((time, m["text"]))
    messages.sort()
    for m in messages:
        print(m[1])

In [8]:
# compile dialog
for c in ['llm-chatbot']:  # 'llm-chatbot'  'llm-qa-bot' 'teacher-qa-bot'
    print(c)
    group = [l for l in logs if l["condition"] == c]
    for i, log in enumerate(group):
        print("===========================================================")
        print(i)
        print("===========================================================")
        compile_dialog(log)
        print("")
        print("")

llm-chatbot
0
Teacher (to the student): Hi Ruffle, I am happy to teach you! What would you like to learn about?
['Student (to the teacher): Hello! I\'m excited to learn from you. I\'d like to start with the phrase "form follows function" in the context of cell biology. Could you explain what it means?']
Teacher (to the student): form follows function is a philosophy that industries follow 
Teacher (to the student): That components are directly related to what they are housed in  
Teacher (to the student): An example ribosomes, the Organelles responsible for protein synthesis are found in high numbers in eucalyptic cells and that synthesizes larger amounts of protein 
Teacher (to the student): An example would be ribosomes, the organelles responsible for protein synthesis, are found in high numbers in Eukayotic cells and that synthesizes larger amounts of protein 
Teacher (to the student): ribosomes are responsible for protein synthesis They translate the code provided by the sequence o

In [9]:
CONDITIONS

['reading', 'teacher-qa-bot', 'llm-qa-bot', 'llm-chatbot']