In [137]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import json, csv
from pprint import pprint

In [14]:
reports = []
for i in range(0,30):
    with open('report'+str(i)+".json", mode='r') as infile:
        report = json.load(infile)
        reports.append(report)

In [19]:
# Identifying patterns of common mistakes

In [74]:
rs = pd.Series(reports)

In [105]:
df = pd.DataFrame(rs,columns=['all'])

In [108]:
df['mode']= df['all'].map(lambda x: x['mode'])

In [109]:
df['log'] = df['all'].map(lambda x: x['log'])

In [134]:
# extracting all file_messages
# per task
tid_list = ["tutorial_1", "tutorial_arith_1", "tutorial_arith_2", "tutorial_sum", "tutorial_text_extraction", "tutorial_filter_1", "task_three_step_arith", "task_number_sort", "task_filter_words_by_length", "task_filter_numbers", "task_extract_and_filter"]
code_list = ['EMPTY_CELL','INCONSISTENT_TYPE','FILTER_WITHOUT_TF','FILTER_AND_EXTRACT','TF_WITHOUT_NUMBER','NO_PROGRAM_FOUND']
df_fails = pd.DataFrame(0, index=tid_list, columns=code_list)
# fails_per_task = {tid:{} for tid in tid_list}
for report in reports: # report per participant
    if report['mode']=="baseline": 
        continue
    for tid in tid_list:
        temp_codes = {code:False for code in code_list}
        for ev in report['log']:
            if ev['tid']==tid and ev['event']=="FAIL_MESSAGE":
                for cd in ev['detail']['code']:
                     temp_codes[cd] = True
        for code in temp_codes:
            if temp_codes[code]==True:
                df_fails[code][tid] += 1

In [135]:
df_fails

Unnamed: 0,EMPTY_CELL,INCONSISTENT_TYPE,FILTER_WITHOUT_TF,FILTER_AND_EXTRACT,TF_WITHOUT_NUMBER,NO_PROGRAM_FOUND
tutorial_1,0,0,0,0,0,0
tutorial_arith_1,0,0,0,0,0,0
tutorial_arith_2,2,0,0,0,0,5
tutorial_sum,0,0,0,0,0,1
tutorial_text_extraction,0,0,0,0,0,0
tutorial_filter_1,2,0,11,0,3,8
task_three_step_arith,2,0,0,0,0,5
task_number_sort,0,0,0,0,0,2
task_filter_words_by_length,1,0,9,0,12,4
task_filter_numbers,0,0,11,0,0,5


In [172]:
mistakes = []
with open('mistakes.csv', mode='r') as infile:
    reader = csv.DictReader(infile, delimiter=',')
    for row in reader:
        mistakes.append(row)

In [173]:
dfm = pd.DataFrame(mistakes)

In [174]:
dfm

Unnamed: 0,detail,feedback,isCritical,mistake,mode,pid,tid,tii
0,"For input+1, user provided 1->2 twice",,,Ambiguous cases,b,25,tutorial_arith_1,2
1,"For sorting, all outputs are reversed input",,,Ambiguous cases,e,15,task_number_sort,8
2,"For sorting, all outputs are reversed input",,,Ambiguous cases,b,19,task_number_sort,8
3,"For sorting, all outputs are reversed input",,,Ambiguous cases,e,27,task_number_sort,8
4,"For sum, all outputs are counts of input",,,Ambiguous cases,e,7,tutorial_sum,4
5,Insufficient to find the correct conditional,,TRUE,Ambiguous cases,b,8,task_filter_numbers,10
6,Insufficient to find the correct conditional,,TRUE,Ambiguous cases,b,12,task_filter_numbers,10
7,Insufficient to find the correct conditional,,TRUE,Ambiguous cases,b,30,task_filter_numbers,10
8,Insufficient to find the correct conditional,,,Ambiguous cases,b,19,task_filter_numbers,10
9,Insufficient to find the correct conditional,,,Ambiguous cases,b,23,task_filter_numbers,10


In [268]:
# COUNTING MISTAKES PER TASK
mistakesPerTask = []
for i in range(7,12):
    mistakesPerTask.append(len(dfm[dfm.tii==str(i)]))
series = pd.Series(mistakesPerTask, index=range(7,12))
print series
# series.describe()

7     30
8     15
9     44
10    42
11    49
dtype: int64


In [280]:
# COUNTING % OF TASKS(tii*pi) THAT WE FOUND MISTAKES
numTasks = 0
numTasksWithMistake = 0
numMistakePerTask = []
for tii in range(7,12):
    row = []
    for pi in range(0,30):
        num = len(dfm[(dfm.tii==str(tii)) & (dfm.pid==str(pi))])
        row.append(num)
        if num>0:
            numTasksWithMistake += 1
        numTasks += 1
    numMistakePerTask.append(row)
print numTasks
print numTasksWithMistake
print "% OF TASKS(TASK*PARTICIPANT) THAT HAS AT LEAST ONE MISTAKE: ", float(numTasksWithMistake) / numTasks

150
111
% OF TASKS(TASK*PARTICIPANT) THAT HAS AT LEAST ONE MISTAKE:  0.74


In [281]:
# COUNTING HOW MANY DATA POINTS (150 = 30 participants done 5 tasks) 
count = 0
datapoints = []
for tii in range(1,12):
    for pi in range(0,30):
        datapoints.append(len(dfm[(dfm.tii==str(tii)) & (dfm.pid==str(pi))]))
        if len(dfm[(dfm.tii==str(tii)) & (dfm.pid==str(pi))])>0:
            count += 1
print count
print float(count) / 150.0
print pd.Series(datapoints).describe()
# mistakesPerDataPoints = {tii:{ pi:len(dfm[(dfm.tii==str(tii)) & (dfm.pid==str(pi))]) for pi in range(0,30) } for tii in range(1,11)}
# percDataPointsContainingMistakes = 

154
1.02666666667
count    330.000000
mean       0.706061
std        0.876037
min        0.000000
25%        0.000000
50%        0.000000
75%        1.000000
max        4.000000
dtype: float64


In [282]:
# MAKING DATAFRAME OF TASK and MISTAKE TYPE
mistakes = dfm['mistake'].unique()
dict_mistake = {tii:{m:0 for m in mistakes} for tii in range(7,12)}
critical_mistakes = {m:0 for m in mistakes} 
for entry in dfm.T.to_dict().values():
    mistake = entry['mistake']
    tii = int(entry['tii'])
    if tii>6:
        dict_mistake[tii][mistake] += 1
    if entry['isCritical']:
        critical_mistakes[mistake] += 1
print critical_mistakes
        
# print dict_mistake
df_mistakes = pd.DataFrame(dict_mistake)
df_mistakes['total'] = df_mistakes.sum(axis=1)
print df_mistakes

{'Trying formula': 7, 'Unnecessary steps': 6, 'Ambiguous cases': 11, 'Wrong input': 1, 'Wrong step': 7, 'Wrong program': 2, 'Empty cases': 0, 'Missing step': 30}
                    7  8   9  10  11  total
Ambiguous cases    10  7   0  12   0     29
Empty cases         1  0   1   0   0      2
Missing step        4  0  33  17  38     92
Trying formula      3  3   2   2   1     11
Unnecessary steps   3  1   3   5   3     15
Wrong input         0  0   0   0   1      1
Wrong program       2  0   0   0   1      3
Wrong step          7  4   5   6   5     27


In [259]:
dfm['mistake'].value_counts()
len(dfm['mistake'])

250

In [261]:
dfm[dfm.isCritical=='TRUE']['mistake'].value_counts()
float(len(dfm[dfm.isCritical=='TRUE']['mistake'])) / len(dfm['mistake'])

0.256