In [None]:
from helper import *

config = Config()
config.setFromEnv()

versionControl = 'V1'
promptVersion='P5'
fullName = f'{versionControl}-{promptVersion}'
config.setSaveDetails(fullName)

config.simpleCourseName = 'Movement Science'

saveName = f"{versionControl}-{promptVersion}"

In [None]:
resultsDF = convertPicklesToDF('pickle', config)
errorDF = convertPicklesToDF('error', config)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_theme(style="whitegrid", palette="deep")

filterDF = resultsDF#[resultsDF['assignment_id']!=1916709]
sns.jointplot(data=filterDF, x='score', y='peerGPT_score_real', hue='assignment_id', height=5, marker=".", s=50, palette=sns.color_palette()[:6])
plt.plot([0,40],[0,40], lw=1, color='#313232', linestyle='dashed')
# plt.plot([1,46],[0,40], lw=1, color='#aaaaaa', linestyle='dashed')
# plt.plot([0,40],[1,46], lw=1, color='#aaaaaa', linestyle='dashed')
plt.show()


In [None]:
mergedCriterionData = pd.DataFrame()

for index,row in resultsDF.iterrows():
    criterionData = row['data_peerGPT']
    for col in ['submitter_id', 'assignment_id']:
        criterionData[col] = row[col]
    mergedCriterionData = pd.concat([mergedCriterionData, criterionData])

mergedCriterionData.to_excel(os.path.join(config.ExcelDumpFolder, saveName+'CriterionData.xlsx'))

saveDF = resultsDF.copy()
del saveDF['data_peerGPT']
saveDF.to_excel(os.path.join(config.ExcelDumpFolder, saveName+'ScoreData.xlsx'))
saveDF

## Generate Histogram spread of scores by Assignment

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rcParams
sns.set_theme(style="darkgrid", palette="dark")
rcParams['figure.figsize'] = 5,3


peerGPTGradesDF = saveDF.copy().drop(['score', 'peerGPT_score'], 
                                            axis=1, errors='ignore') 
peerGPTGradesDF = peerGPTGradesDF.rename(columns={'peerGPT_score_real':'score'})
peerGPTGradesDF['Grader Type'] = 'peerGPT'

gradersDF = saveDF.copy().drop(['peerGPT_score', 'peerGPT_score_real'], 
                                            axis=1, errors='ignore') 
gradersDF['Grader Type'] = gradersDF['grader_id'].apply(lambda id: f'Grader ID: {id}')

allGradesDF = pd.concat([gradersDF, peerGPTGradesDF])

fig, axes = plt.subplots(nrows=len(saveDF['grader_id'].unique()), ncols=len(saveDF['assignment_id'].unique()), \
                       figsize=(18,9), layout="constrained")

for col, assignmentID in enumerate(saveDF['assignment_id'].unique()):
    for row, graderID in enumerate(sorted(saveDF['grader_id'].unique())):
        subsetDF = allGradesDF[(allGradesDF['assignment_id']==assignmentID) & (allGradesDF['grader_id']==graderID)]
        # display(subsetDF)
        upperX = int(subsetDF['points_possible'].iloc[0])
        minScore = min(saveDF[(saveDF['assignment_id']==assignmentID)]['score'])
        lowerX = int(minScore-minScore%2)
        xTickStep = 1 if upperX-lowerX < 10 else 2

        g = sns.histplot(ax=axes[row,col], data=subsetDF, x='score', hue='Grader Type', kde=True, multiple="dodge", palette=sns.color_palette()[:2])
        # g = sns.scatterplot(data=subsetDF, x='score', hue='Grader Type', palette=sns.color_palette()[:2])
        g.set_xlim(lowerX,upperX)
        g.set_xticks(range(lowerX,upperX+1, xTickStep))
        # g.set_title(f'Assignment ID: {assignmentID}')
        # g.set_xlabel('Points Awarded')
        g.set_xlabel('Score Distribution', fontsize=8)
        g.set_ylabel('')
        g.legend([],[], frameon=False)

pad = 5
for ax, col in zip(axes[0], saveDF['assignment_id'].unique()):
    ax.set_title(f'Assignment ID: {col}')
for ax, row in zip(axes[:,0], sorted(saveDF['grader_id'].unique())):
        ax.annotate(f'Grader ID: {row}', xy=(0, 0.5), xytext=(-ax.yaxis.labelpad - pad, 0),
                    xycoords=ax.yaxis.label, textcoords='offset points',
                    size='large', ha='right', va='center', rotation=90)

# plt.show()
plt.savefig(os.path.join(config.ChartDumpFolder, saveName+f'-HistogramPlotSpread.png'), dpi=300, bbox_inches='tight')


## Generate Scatterplot spread of scores by Assignment

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rcParams
sns.set_theme(style="darkgrid", palette="dark")
rcParams['figure.figsize'] = 4,4

outlierFactor = 0.15

gradesDF = saveDF.copy()
gradesDF['Score Difference'] = gradesDF['peerGPT_score_real']-gradesDF['score']
gradesDF['Outlier'] = (gradesDF['Score Difference']/(gradesDF['points_possible']*outlierFactor)).apply(lambda scoreDiff: 'Outlier' if abs(scoreDiff)>1 else 'In range')

fig, axes = plt.subplots(nrows=len(saveDF['grader_id'].unique()), ncols=len(saveDF['assignment_id'].unique()), \
                       figsize=(15,10), layout="constrained")

for col, assignmentID in enumerate(saveDF['assignment_id'].unique()):
    for row, graderID in enumerate(sorted(saveDF['grader_id'].unique())):
        subsetDF = gradesDF[(gradesDF['assignment_id']==assignmentID) & (gradesDF['grader_id']==graderID)]
        # display(subsetDF)
        upperX = int(subsetDF['points_possible'].iloc[0])
        minScore = min(saveDF[(saveDF['assignment_id']==assignmentID)]['score'])
        lowerX = int(minScore-minScore%2)
        xTickStep = 1 if upperX-lowerX < 10 else 2
        shiftValue = int(subsetDF['points_possible'].iloc[0])*outlierFactor

        g = sns.scatterplot(ax=axes[row,col], data=subsetDF, x='score', y='peerGPT_score_real', hue='Outlier', hue_order=gradesDF['Outlier'].unique(), palette=sns.color_palette()[:2])
        axes[row,col].plot([lowerX,upperX],[lowerX,upperX], lw=1, color='#313232', linestyle='dashed')
        axes[row,col].plot([lowerX+shiftValue,upperX+shiftValue],[lowerX,upperX], lw=1, color='#aaaaaa', linestyle='dashed')
        axes[row,col].plot([lowerX,upperX],[lowerX+shiftValue,upperX+shiftValue], lw=1, color='#aaaaaa', linestyle='dashed')
        g.set_xlim(lowerX,upperX+0.5)
        g.set_xticks(range(lowerX,upperX+1, xTickStep))
        g.set_ylim(lowerX,upperX+0.5)
        g.set_yticks(range(lowerX,upperX+1, xTickStep))
        # g.set_title(f'Assignment ID: {assignmentID},\nGrader ID: {graderID}')
        # g.set_xlabel('Human Grader Score')
        # g.set_ylabel('peerGPT Score')
        g.set_xlabel('Grader Score',fontsize=8)
        g.set_ylabel('peerGPT Score', fontsize=8, rotation=90)
        g.legend([],[], frameon=False)

pad = 5
for ax, col in zip(axes[0], saveDF['assignment_id'].unique()):
    ax.set_title(f'Assignment ID: {col}')
for ax, row in zip(axes[:,0], sorted(saveDF['grader_id'].unique())):
        ax.annotate(f'Grader ID: {row}', xy=(0, 0.5), xytext=(-ax.yaxis.labelpad - pad, 0),
                    xycoords=ax.yaxis.label, textcoords='offset points',
                    size='large', ha='right', va='center', rotation=90)

# plt.show()
plt.savefig(os.path.join(config.ChartDumpFolder, saveName+f'-ScatterPlotSpread.png'), dpi=300, bbox_inches='tight')

## Generate Histogram spread of scores by Criterion per Assignment

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rcParams
import textwrap
sns.set_theme(style="darkgrid", palette="dark")

pad = 5

for col, assignmentID in enumerate(resultsDF['assignment_id'].unique()):

    saveFolder = os.path.join(config.ChartDumpFolder, saveName)
    if not os.path.exists(saveFolder):
        os.mkdir(saveFolder)

    assgnDF = resultsDF[(resultsDF['assignment_id']==assignmentID)]
    critDataDF = pd.DataFrame()
    for index,row in assgnDF.iterrows():
        criterionData = row['data_peerGPT']
        for col in ['submitter_id', 'assignment_id', 'grader_id']:
            criterionData[col] = row[col]
        critDataDF = pd.concat([critDataDF, criterionData])

    peerGPTGradesDF = critDataDF.copy().drop(['points_grade'], 
                                            axis=1, errors='ignore') 
    peerGPTGradesDF = peerGPTGradesDF.rename(columns={'peerGPT_criterion_score':'points_grade'})
    peerGPTGradesDF['Grader Type'] = 'peerGPT'

    gradersDF = critDataDF.copy().drop(['peerGPT_criterion_score'], 
                                                axis=1, errors='ignore') 
    gradersDF['Grader Type'] = gradersDF['grader_id'].apply(lambda id: f'Grader ID: {id}')

    allCritDF = pd.concat([gradersDF, peerGPTGradesDF])

    fig, axes = plt.subplots(nrows=len(allCritDF['grader_id'].unique()), ncols=len(allCritDF['description_rubric'].unique()), \
                                        figsize=(len(allCritDF['description_rubric'].unique())*3,len(allCritDF['grader_id'].unique())*3), layout="constrained")
    fig.suptitle(f'Assignment ID: {assignmentID}')
    
    for col, descRubric in enumerate(sorted(allCritDF['description_rubric'].unique())):
        for row, graderID in enumerate(sorted(allCritDF['grader_id'].unique())):
            subsetDF = allCritDF[(allCritDF['grader_id']==graderID) & (allCritDF['description_rubric']==descRubric)].fillna(0)
            # display(subsetDF)
            upperX = int(subsetDF['points_rubric'].iloc[0])
            lowerX = int(min(allCritDF[(allCritDF['description_rubric']==descRubric)]['points_grade']))
            xTickStep = 1

            try:
                g = sns.histplot(ax=axes[row,col], data=subsetDF, x='points_grade', hue='Grader Type', kde=True, multiple="dodge", palette=sns.color_palette()[:2])
            except:
                g = sns.histplot(ax=axes[row,col], data=subsetDF, x='points_grade', hue='Grader Type', kde=False, multiple="dodge", palette=sns.color_palette()[:2])
            g.set_xlim(lowerX,upperX)
            g.set_xticks(range(lowerX,upperX+1, xTickStep))
            # g.set_title(f'Assignment ID: {assignmentID}')
            # g.set_xlabel('Points Awarded')
            g.set_xlabel('Score Distribution', fontsize=8)
            g.set_ylabel('')
            g.legend([],[], frameon=False)

    for ax, col in zip(axes[0], sorted(allCritDF['description_rubric'].unique())):
        newLine = '\n'.join(textwrap.wrap(col, width=24))
        ax.set_title(f'{newLine}')
    # for ax, row in zip(axes[:,0], sorted(allCritDF['grader_id'].unique())):
    #     ax.set_ylabel(f'Grader ID: {row}', rotation=90)
    for ax, row in zip(axes[:,0], sorted(allCritDF['grader_id'].unique())):
        ax.annotate(f'Grader ID: {row}', xy=(0, 0.5), xytext=(-ax.yaxis.labelpad - pad, 0),
                    xycoords=ax.yaxis.label, textcoords='offset points',
                    size='large', ha='right', va='center', rotation=90)

    # plt.show()
    # break
    plt.savefig(os.path.join(saveFolder, f'{assignmentID}-HistogramPlotSpread.png'), dpi=300, bbox_inches='tight')

## Generate Scatterplot spread of scores by Criterion per Assignment

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rcParams
import textwrap
sns.set_theme(style="darkgrid", palette="dark")

outlierFactor = 0.15
pad = 5

for col, assignmentID in enumerate(resultsDF['assignment_id'].unique()):

    saveFolder = os.path.join(config.ChartDumpFolder, saveName)
    if not os.path.exists(saveFolder):
        os.mkdir(saveFolder)

    assgnDF = resultsDF[(resultsDF['assignment_id']==assignmentID)]
    critDataDF = pd.DataFrame()
    for index,row in assgnDF.iterrows():
        criterionData = row['data_peerGPT']
        for col in ['submitter_id', 'assignment_id', 'grader_id']:
            criterionData[col] = row[col]
        critDataDF = pd.concat([critDataDF, criterionData])

    allCritDF = critDataDF.copy()
    allCritDF['Score Difference'] = allCritDF['peerGPT_criterion_score']-allCritDF['points_grade']
    allCritDF['Outlier'] = (allCritDF['Score Difference']/(allCritDF['points_rubric']*outlierFactor)).apply(lambda scoreDiff: 'Outlier' if abs(scoreDiff)>1 else 'In range')

    fig, axes = plt.subplots(nrows=len(allCritDF['grader_id'].unique()), ncols=len(allCritDF['description_rubric'].unique()), \
                                        figsize=(len(allCritDF['description_rubric'].unique())*3,len(allCritDF['grader_id'].unique())*3), layout="constrained")
    fig.suptitle(f'Assignment ID: {assignmentID}')
    
    for col, descRubric in enumerate(sorted(allCritDF['description_rubric'].unique())):
        for row, graderID in enumerate(sorted(allCritDF['grader_id'].unique())):
            subsetDF = allCritDF[(allCritDF['assignment_id']==assignmentID) & (allCritDF['grader_id']==graderID)]
            # display(subsetDF)
            upperX = int(subsetDF['points_rubric'].iloc[0])+1
            lowerX = int(min(allCritDF[(allCritDF['assignment_id']==assignmentID)]['points_grade']))
            xTickStep = 1
            shiftValue = int(subsetDF['points_rubric'].iloc[0])*outlierFactor

            g = sns.scatterplot(ax=axes[row,col], data=subsetDF, x='points_grade', y='peerGPT_criterion_score', hue='Outlier', hue_order=allCritDF['Outlier'].unique(), palette=sns.color_palette()[:2])
            axes[row,col].plot([lowerX,upperX],[lowerX,upperX], lw=1, color='#313232', linestyle='dashed')
            axes[row,col].plot([lowerX+shiftValue,upperX+shiftValue],[lowerX,upperX], lw=1, color='#aaaaaa', linestyle='dashed')
            axes[row,col].plot([lowerX,upperX],[lowerX+shiftValue,upperX+shiftValue], lw=1, color='#aaaaaa', linestyle='dashed')
            g.set_xlim(lowerX-0.1,upperX+0.1)
            g.set_xticks(range(lowerX,upperX+1, xTickStep))
            g.set_ylim(lowerX-0.1,upperX+0.1)
            g.set_yticks(range(lowerX,upperX+1, xTickStep))
            # g.set_title(f'Assignment ID: {assignmentID},\nGrader ID: {graderID}')
            # g.set_xlabel('Human Grader Score')
            # g.set_ylabel('peerGPT Score')
            g.set_xlabel('Grader Score',fontsize=8)
            g.set_ylabel('peerGPT Score', fontsize=8, rotation=90)
            g.legend([],[], frameon=False)

    for ax, col in zip(axes[0], sorted(allCritDF['description_rubric'].unique())):
        newLine = '\n'.join(textwrap.wrap(col, width=24))
        ax.set_title(f'{newLine}')
    # for ax, row in zip(axes[:,0], sorted(allCritDF['grader_id'].unique())):
    #     ax.set_ylabel(, rotation=90)
    for ax, row in zip(axes[:,0], sorted(allCritDF['grader_id'].unique())):
        ax.annotate(f'Grader ID: {row}', xy=(0, 0.5), xytext=(-ax.yaxis.labelpad - pad, 0),
                    xycoords=ax.yaxis.label, textcoords='offset points',
                    size='large', ha='right', va='center', rotation=90)

    # plt.show()
    # break
    plt.savefig(os.path.join(saveFolder, f'{assignmentID}-ScatterPlotSpread.png'), dpi=300, bbox_inches='tight')


## Track Difference in scores per Assignment

In [None]:
saveDF['Score Difference'] = saveDF['peerGPT_score_real']-saveDF['score']
excludeDF = saveDF

meanDiffDF = {}
for group in excludeDF.groupby(['grader_id','assignment_id']):
    if group[0][0] not in meanDiffDF:
        meanDiffDF[group[0][0]] = {}
    meanDiffDF[group[0][0]][group[0][1]] = group[1]["Score Difference"].mean()

pd.DataFrame(meanDiffDF).to_excel(os.path.join(config.ExcelDumpFolder, saveName+' - Grader - peerGPT Score Difference.xlsx'))
pd.DataFrame(meanDiffDF)

In [None]:
config = Config()
config.setFromEnv()

gradeCSVFile = f'{config.courseName}gradings.csv'
rubricCSVFile = f'{config.courseName}rubrics.csv'
assignmentCSVFile = f'{config.courseName}assignments.csv'

gradeDataDF = pd.read_csv(os.path.join(config.CSVDataFolder,gradeCSVFile))
rubricDataDF = pd.read_csv(os.path.join(config.CSVDataFolder,rubricCSVFile))
assignmentDataDF = pd.read_csv(os.path.join(config.CSVDataFolder,assignmentCSVFile))

gradeDataDF['data'] = gradeDataDF['data'].apply(lambda dataJSON: json.loads(dataJSON))
rubricDataDF['data'] = rubricDataDF['data'].apply(lambda dataJSON: json.loads(dataJSON))

gradeRubricDF = gradeDataDF.merge(rubricDataDF, on='rubric_id', suffixes=('_grade', '_rubric'))
gradeRubricAssignmentDF = gradeRubricDF.merge(assignmentDataDF, on='assignment_id', 
                                                suffixes=('', '_assignment'))

gradeRubricAssignmentDF = gradeRubricAssignmentDF[['submitter_id', 'grader_id', 'score', 'rubric_id', 
                                                    'assignment_id', 'assignment_title', 'data_grade', 
                                                    'data_rubric', 'points_possible', 
                                                    'assignment_description', 'cleaned_description']]

for index,row in gradeRubricAssignmentDF[(gradeRubricAssignmentDF['assignment_id']==1916709) & (gradeRubricAssignmentDF['score']==40.0)].iterrows():
    print(row)
    display(pd.DataFrame(row['data_grade']))
    display(pd.DataFrame(row['data_rubric']))
    break