In [139]:
import pandas as pd
import seaborn as sns
from statsmodels.stats.inter_rater import fleiss_kappa, aggregate_raters

# Grade Analysis

In [140]:
grade_df = pd.read_csv('Menagerie/data/grades.csv', index_col=0)
grade_df

Unnamed: 0,assignment_number,comments,skill,participant_id,batch,grade
0,18.0,The code use meaningful identifier names and a...,Readability,15,1,B+
1,620.0,Code is well organized into logical classes an...,Readability,15,1,A+
2,466.0,"The code is well-organized, consistently inden...",Readability,15,1,A-
3,591.0,The code is well organized and readable. Inden...,Readability,15,1,A-
4,587.0,The code is well organized and readable. Inden...,Readability,15,1,A-
...,...,...,...,...,...,...
15,58.0,,Documentation,14,1,A++
16,353.0,,Documentation,14,1,A++
17,422.0,,Documentation,14,1,A++
18,480.0,,Documentation,14,1,A++


In [147]:
grade_df = grade_df[~grade_df['grade'].isnull()]
grade_df

Unnamed: 0,assignment_number,comments,skill,participant_id,batch,grade
0,18.0,The code use meaningful identifier names and a...,Readability,15,1,B+
1,620.0,Code is well organized into logical classes an...,Readability,15,1,A+
2,466.0,"The code is well-organized, consistently inden...",Readability,15,1,A-
3,591.0,The code is well organized and readable. Inden...,Readability,15,1,A-
4,587.0,The code is well organized and readable. Inden...,Readability,15,1,A-
...,...,...,...,...,...,...
15,58.0,,Documentation,14,1,A++
16,353.0,,Documentation,14,1,A++
17,422.0,,Documentation,14,1,A++
18,480.0,,Documentation,14,1,A++


## Inter-Rater Analysis

In [148]:
# TODO Discuss with Neil what to do with this mistake
# 105 duplicated accidentally between the two batches... in additional to the purposeful duplication.
skill_df = grade_df[grade_df['skill'] == 'Readability'][['participant_id', 'assignment_number', 'grade']]
group_df = skill_df[skill_df['participant_id'].isin([21, 22, 23, 24])].sort_values(by='participant_id')
group_df[group_df['assignment_number'] == 105]
# pivot = group_df.pivot(index='assignment_number', columns='participant_id', values='grade')

Unnamed: 0,participant_id,assignment_number,grade
4,21,105.0,A+
8,21,105.0,A+
4,22,105.0,A
8,22,105.0,A
4,23,105.0,B
8,23,105.0,C-
4,24,105.0,D+
8,24,105.0,B-


In [175]:
def calculate_fleiss_kappa(skill, participant_list):
    skill_df = grade_df[grade_df['skill'] == skill][['participant_id', 'assignment_number', 'grade']]
    group_df = skill_df[skill_df['participant_id'].isin(participant_list)].sort_values(by='participant_id')
    pivot = group_df.pivot(index='assignment_number', columns='participant_id', values='grade')
    if any(pivot.isna().any(axis=1)):
        print('Missing Grade: ', pivot[pivot.isna().any(axis=1)])
        pivot.dropna(inplace=True)
  
    return fleiss_kappa(aggregate_raters(pivot)[0], 'fleiss')

In [177]:
correctness_fleiss_scores = []
for i in range(1, 28, 4):
    try:
        correctness_fleiss_scores.append(
            calculate_fleiss_kappa('Correctness', list(range(i, i + 4))))
    except ValueError:
        print('Error duplicate entries check data for: ', list(range(i, i +4)))

correctness_fleiss_scores

Missing Grade:  participant_id      13  14  15 16
assignment_number                
306.0              NaN  A+  B+  A
Error duplicate entries check data for:  [21, 22, 23, 24]


[0.042672741078208086,
 -0.024103909079555397,
 0.0034396207120944264,
 0.041807014067762986,
 0.11335983616431755,
 0.0464963981663392]

In [145]:
readability_fleiss_scores = []
for i in range(1, 28, 4):
    try:
        readability_fleiss_scores.append(
            calculate_fleiss_kappa('Readability', list(range(i, i + 4))))
    except ValueError:
        print('Error duplicate entries check data for: ', list(range(i, i +4)))
    
readability_fleiss_scores

Error duplicate entries check data for:  [21, 22, 23, 24]


[-0.0028740903809698452,
 -0.07043300501765469,
 -0.06931542965756152,
 -0.057512084443129115,
 -0.010287685578759318,
 0.07704985578903992]

In [178]:
code_elegance_fleiss_scores = []
for i in range(1, 28, 4):
    try:
        code_elegance_fleiss_scores.append(
            calculate_fleiss_kappa('Code Elegance', list(range(i, i + 4))))
    except ValueError:
        print('Error duplicate entries check data for: ', list(range(i, i +4)))

code_elegance_fleiss_scores

Error duplicate entries check data for:  [21, 22, 23, 24]


[-0.0032569323958238114,
 -0.013182330197582748,
 -0.026385862987170158,
 0.01619600012677883,
 0.03511883646683222,
 0.08114856429463169]

In [179]:
documentation_fleiss_scores = []
for i in range(1, 28, 4):
    try:
        documentation_fleiss_scores.append(
            calculate_fleiss_kappa('Documentation', list(range(i, i + 4))))
    except ValueError:
        print('Error duplicate entries check data for: ', list(range(i, i +4)))

documentation_fleiss_scores

Missing Grade:  participant_id      9  10  11  12
assignment_number                
117.0              NaN  B  A+  A-
Error duplicate entries check data for:  [21, 22, 23, 24]
Missing Grade:  participant_id      25   26   27   28
assignment_number                    
394.0              A++  A++  NaN  A++


[0.00179555068626558,
 -0.07136305265088976,
 -0.05906313645621182,
 -0.0011551790527532205,
 0.0038314176245210336,
 0.0632548618219037]