# Peter results
Please note that this code is intended to be run with python 3.13

In [None]:
import pymongo as pm
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import re

## Connect to db

In [None]:
client = pm.MongoClient()   #We are using local host
db = client["experiment"]

##  Analysis

### Fetch data

In [None]:
full_name = []
single_letter = []
abbreviation = []
semantic = []
syntatic = []
abbreviation_regex = "abbrev.html$"
full_name_regex = "normal.html$"
single_letter_regex = "single.html$"
semantic_regex = "..\.semantic.."
syntatic_regex = "..\.syntactic.."


sessions = db["sessions"]
for session in sessions.find({"done": True}):
    for trial in session["trials"]:
        # print(trial)
        # print((trial["file"]))
        if re.search(pattern=full_name_regex, string=trial["file"]):
            full_name.append(trial)
        elif re.search(pattern=abbreviation_regex, string=trial["file"]):
            abbreviation.append(trial)
        elif re.search(pattern=single_letter_regex, string=trial["file"]):
            single_letter.append(trial)
        if re.search(pattern=semantic_regex, string=trial["file"]):
            semantic.append(trial)
        elif re.search(pattern=syntatic_regex, string=trial["file"]):
            syntatic.append(trial)

### Update trials with indicators for type
(Not used, well the extra info is not currently in use)

In [None]:
def update_trials_with_indecators(collection_name) -> None:
    abbreviation_regex = "abbrev.html$"
    full_name_regex = "normal.html$"
    single_letter_regex = "single.html$"
    semantic_regex = "..\.semantic.."
    syntatic_regex = "..\.syntactic.."

    collection = db[collection_name]
    for session in collection.find({"done": True}):
        for trial in session["trials"]:
            variable_type = ""
            challenge_type = ""
            if re.search(pattern=full_name_regex, string=trial["file"]):
                variable_type = "Full variable name"
            elif re.search(pattern=abbreviation_regex, string=trial["file"]):
                variable_type = "Abbreviated variable name"
            elif re.search(pattern=single_letter_regex, string=trial["file"]):
                variable_type = "Single letter variable name"
            
            if re.search(pattern=semantic_regex, string=trial["file"]):
                challenge_type = "Semantic error"
            elif re.search(pattern=syntatic_regex, string=trial["file"]):
                challenge_type = "Syntatic error"

            filter = {"USER_SESSION_ID": session["USER_SESSION_ID"], "trials.created": trial["created"]}
            operation = {"$set":  {'trials.$.variable_type': str(variable_type)}}
            ops = {"$set": {'trials.$.challenge_type': str(challenge_type)}}
            db.sessions.update_one(filter, operation)
            db.sessions.update_one(filter, ops)

In [None]:
update_trials_with_indecators("sessions")

### Begin analysis

In [None]:
def success_rate(bucket: list) -> [list, list]:
    success = 0
    fail = 0
    for drop in bucket:
        if drop["state"] == "success":
            success += 1
        elif drop["state"] == "failure":
            fail += 1
    return [[success, fail], ["success", "failure"]]

In [None]:
full_name_DF = pd.DataFrame(full_name)
abbreviation_DF = pd.DataFrame(abbreviation)
single_letter_DF = pd.DataFrame(single_letter)

all_DF = pd.DataFrame((full_name+abbreviation+single_letter))

In [None]:
palette_color = sns.color_palette('bright')
figure, axes = plt.subplots(1, 3)

# Creating chart for full names
data, keys = success_rate(full_name)
axes[0].pie(data, autopct='%.0f%%')
axes[0].set_title("Full names")

# Creating chart for abbreviations
data, keys = success_rate(abbreviation)
axes[1].pie(data, autopct='%.0f%%')
axes[1].set_title("Abbreviated names")

# Creating chart for single letter names
data, keys = success_rate(single_letter)
axes[2].pie(data, autopct='%.0f%%')
axes[2].set_title("Single letter names")

# Setup figure specific information
figure.tight_layout()
figure.legend(labels=keys)
figure.suptitle("Comparison between different variable names")
plt.show()

figure.savefig("./figures/Variable-names.png", bbox_inches='tight')

In [None]:
palette_color = sns.color_palette('bright')
figure, axes = plt.subplots(1, 2)

# Creating chart for full names
data, keys = success_rate(semantic)
# axes[0].pie(full_name_DF["state"])
axes[0].pie(data, autopct='%.0f%%')
axes[0].set_title("Semantic problem")

# Creating chart for abbreviations
data, keys = success_rate(syntatic)
axes[1].pie(data, autopct='%.0f%%')
axes[1].set_title("Syntatic problem")

# Setup figure specific information
figure.tight_layout()
figure.legend(labels=keys)
figure.suptitle("Comparison between different variable names")
plt.show()

figure.savefig("./figures/Problem-type.png", bbox_inches='tight')