# Part 1: Prepping Submissions & Custom Criteria Descriptions

You should only need to run the code blocks in Part 1 once for an assignment.

In [None]:
!pip3 install openai

In [None]:
!pip3 install pdf2docx

## Build custom file for manual criterion descriptions

After you populate the `custom_description` column, save the file manually again with the 'UTF-8' encoding.
For me on Windows, if I don't explicitly state that, it won't load properly.

In [None]:
from simpleHelper import *

pathToAssgnCSV = os.path.join('data','MOVESCI_110_FA_2023_2060736_assignmentsJK.csv')
pathToGradingsCSV = os.path.join('data','MOVESCI_110_FA_2023_2060736_gradings.csv')
pathToRubricsCSV = os.path.join('data','MOVESCI_110_FA_2023_2060736_rubrics.csv')

pathToCriterionCSV = os.path.join('data','2060736_criterion.csv')


makeBlankCriterionFile(pathToAssgnCSV, pathToGradingsCSV, pathToRubricsCSV, pathToCriterionCSV)

## Generate text versions of submission files

You will need to run this notebook cell to convert the submissions into a format usable by the notebook.

The `data/submissions_xxxxxx` should contain the unzipped submission folders that are exported from Canvas. The folder name can be anything, you just need to specify it in the `originalSubmissionFolder` variable. You will also need the Assignment ID in variable `assignmentID`.

A folder for converted submissions is automatically made called `data/Converted submissions_xxxxxx` where xxxxxx is the Assignment ID.

In [None]:
from simpleHelper import *

originalSubmissionFolder = os.path.join('data', 'submissions_2060736')
assignmentID = 2060736

convertSubmissions(originalSubmissionFolder, assignmentID)

# Part 2: Getting ChatGPT responses for submissions

You need to provide the full file path to each of the 4 CSV files. You also need to specify your OpenAI key details, the course code, and the assignment ID.
The `courseName` variable simply is the real name of the course.

In [None]:
from simpleHelper import *
from tqdm import tqdm

pathToAssgnCSV = os.path.join('data','coursename_courseid_assignmentsJK.csv')
pathToGradingsCSV = os.path.join('data','coursename_courseid_gradings.csv')
pathToRubricsCSV = os.path.join('data','coursename_courseid__rubrics.csv')

pathToCriterionCSV = os.path.join('data','courseid__criterion.csv')

courseCode = 626158
assignmentID = 2060736
courseName = 'Movement Science'

overWriteSave = False

customDescMode = True
critDescDF = pd.read_csv(pathToCriterionCSV).drop_duplicates()

saveFolder = 'data/saves'
errorFolder = 'data/error'
if not os.path.exists(saveFolder):
        os.mkdir(saveFolder)
if not os.path.exists(errorFolder):
        os.mkdir(errorFolder)
    
gradeRubricAssignmentDF = getGRAData(pathToAssgnCSV, pathToGradingsCSV, pathToRubricsCSV)
print(gradeRubricAssignmentDF)
print(f"gradeRubricAssignmentDF size {gradeRubricAssignmentDF.shape[0]}")
print(assignmentID)

gradeRubricAssignmentDF = gradeRubricAssignmentDF[gradeRubricAssignmentDF['assignment_id']==assignmentID]

print(f"gradeRubricAssignmentDF size {gradeRubricAssignmentDF.shape[0]}")

for index, row in (pbar := tqdm(gradeRubricAssignmentDF.iterrows(), total=gradeRubricAssignmentDF.shape[0])):
    pbar.set_description(f"Processing: {assignmentID}-{row['submitter_id']}")
    if checkIfSaved(row['assignment_id'], row['submitter_id'], saveFolder, errorFolder) and not overWriteSave:
        # print(f"Already saved: {assignmentID}-{row['submitter_id']}")
        continue
    else:
        dataDict, runSuccess = processGRARow(row, courseName, customDescMode, critDescDF)
        saveOutputasPickle(dataDict, runSuccess, saveFolder, errorFolder)

print(f"gradeRubricAssignmentDF size {gradeRubricAssignmentDF.shape[0]}")


In [None]:
!pip3 install "pandas<2.0.0"

# Part 3: Analyzing the results and making charts and tables

In [None]:
resultsDF = convertPicklesToDF(saveFolder)
errorDF = convertPicklesToDF(errorFolder)

excelFolder = 'data/excelJK'
if not os.path.exists(excelFolder):
        os.mkdir(excelFolder)
chartFolder = 'data/chartsJK'
if not os.path.exists(chartFolder):
        os.mkdir(chartFolder)

saveName = f'{courseName}-{assignmentID}'
        
mergedCriterionData = getCriterionDataDF(resultsDF, saveName, excelFolder)

# getScoreSpread(resultsDF, saveName, chartFolder)

In [None]:
meanDiffDF, meanDiffPercentDF = saveGraderPeerGPTMeanScoreDiff(resultsDF, saveName, excelFolder)

display(meanDiffPercentDF)

In [None]:
from simpleHelper import buildFullInfoDF
#fullInfoDF, rubricOrderDict = buildFullInfoDF(gradeRubricAssignmentDF, resultsDF, saveName, excelFolder)
print("sss")
critDataDF = pd.DataFrame()
for index,row in resultsDF.iterrows():
    criterionData = row['data_peerGPT']
    for col in ['submitter_id', 'assignment_id', 'grader_id']:
        criterionData[col] = row[col]
    critDataDF = pd.concat([critDataDF, criterionData])
allCritDF = critDataDF.drop(['mastery_points','ignore_for_scoring','title','peerGPT_criterion_id','description_grade'],
                        axis=1, errors='ignore')

print(f"allCritDF size {allCritDF.shape[0]}")

meanInfoList = []
for group in allCritDF.groupby(['assignment_id','criterion_id','grader_id']):
    meanInfoList.append({'assignment_id':group[0][0], 'criterion_id':group[0][1], 'grader_id':group[0][2], \
                        'Grader Mean':group[1]['points_grade'].mean(), \
                        'Grader Std. Dev.':group[1]['points_grade'].std(), \
                        'peerGPT Mean':group[1]['peerGPT_criterion_score'].mean(), \
                        'peerGPT Std. Dev.':group[1]['peerGPT_criterion_score'].std(), \
                        # 'Correlation Score':group[1]['peerGPT_criterion_score'].corr(group[1]['points_grade']), \
                        })
meanInfoDF = pd.DataFrame(meanInfoList)
meanInfoDF['Mean Difference'] = meanInfoDF['peerGPT Mean'] - meanInfoDF['Grader Mean']
print(f"gradeRubricAssignmentDF size {gradeRubricAssignmentDF.shape[0]}")
assignmentDF = gradeRubricAssignmentDF[['assignment_id', 'assignment_title']].drop_duplicates()
print(f"assignmentDF size {assignmentDF.shape[0]}")

baseInfoDF = allCritDF[['assignment_id', 'criterion_id', 'description_rubric', 'points_rubric']].drop_duplicates()
baseInfoDF = baseInfoDF.merge(assignmentDF, on='assignment_id')
print(f"baseInfoDF size {baseInfoDF.shape[0]}")

globalMeanList = [{'assignment_id':group[0][0], 'criterion_id':group[0][1], \
                'All Graders Mean':group[1]['points_grade'].mean(), \
                'All Graders Std. Dev.':group[1]['points_grade'].std(), \
                'Global peerGPT Mean':group[1]['peerGPT_criterion_score'].mean(), \
                'Global peerGPT Std. Dev.':group[1]['peerGPT_criterion_score'].std()} \
                    for group in allCritDF.groupby(['assignment_id', 'criterion_id'])]
globalMeanDF = pd.DataFrame(globalMeanList)

baseInfoDF = baseInfoDF.merge(globalMeanDF, on=['assignment_id', 'criterion_id'])

fullInfoDF = meanInfoDF.merge(baseInfoDF, on=['assignment_id', 'criterion_id'])
fullInfoDF['Mean Difference %'] = 100*fullInfoDF['Mean Difference'].div(fullInfoDF['points_rubric'])
print(fullInfoDF)
fullInfoDF['Grader Mean Diff. %'] = 100*(fullInfoDF['All Graders Mean'] - fullInfoDF['Grader Mean']).div(fullInfoDF['points_rubric'])
print(fullInfoDF)
print(f"sSaving file at: {os.path.join(excelFolder, saveName+' - Grader Difference Table.xlsx')}")
fullInfoDF.to_excel(os.path.join(excelFolder, saveName+' - Grader Difference Table.xlsx'))

rubricInfo = gradeRubricAssignmentDF[['assignment_id', 'data_rubric']].drop_duplicates('assignment_id').reset_index(drop=True)
rubricOrderDict = {}
for index, row in rubricInfo.iterrows():
    rubricOrderDict[row['assignment_id']] = pd.DataFrame(row['data_rubric'])['description'].tolist()

In [None]:
getZScoreAndCI(fullInfoDF, saveName, excelFolder, confidence=0.93)

In [None]:
getMeanDiffPercentCharts(fullInfoDF, rubricOrderDict, courseName, chartFolder)