## Select Assignment Number

To load the correct settings, the number for the current assignment is set. 

In [1]:
ASSIGNMENT_NR = 2

## Load Packages and Global Settings

In [16]:
# Package imports
from canvasapi import Canvas
from canvasapi.requester import Requester
from canvas_connector.utils.canvas_utils import download_assignment_submissions
from collections import defaultdict
from openai import OpenAI
import os
import pickle as pkl
import zipfile
import shutil
import re

# Local imports
from scripts.jsonify import jsonify, jsonify_resources, analyze_jsonify_results
from scripts.utils import ensure_folder_exists, create_file_list, parsed_submissions_quality_check, deduplicate_files_with_manual_fixes, load_latest_jsonified_student_submission, load_jsonified_resources
from scripts.llm_utils import create_openai_message, prompt_gpt, format_prompt
from scripts.utils import extract_html_content

In [3]:
# Load global settings
from scripts.settings import *

# Load assignment specific settings
ASSIGNMENT = ASSIGNMENTS[ASSIGNMENT_NR]
ASSIGNMENT_ID = ASSIGNMENT["canvas"]["assignment_id"]
QUIZ_ID = ASSIGNMENT["canvas"]["quiz_id"]
R_QUIZ_QUESTION_ID = ASSIGNMENT["canvas"]["r_quiz_question_id"]
ADV_QUIZ_QUESTION_ID = ASSIGNMENT["canvas"]["adv_quiz_question_id"]
LOCK_GRADES_DATE = ASSIGNMENT["lock_grades_date"]

In [4]:
# Initialize Canvas API
canvas_client = Canvas(os.getenv("CANVAS_API_URL"), os.getenv("CANVAS_API_KEY"))
canvas_requester = Requester(os.getenv("CANVAS_API_URL"), os.getenv("CANVAS_API_KEY"))

# Initialize OpenAI API
if USE_UVA_OPENAI:
    openai_client = OpenAI(api_key=os.getenv("UVA_OPENAI_API_KEY"), 
                           base_url=os.getenv("UVA_OPENAI_BASE_URL"))
    if MODEL == "gpt-4o":
        MODEL = "gpt4o" # OpenAI API uses a different model name
else:
    openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

## Jsonify Resources

To ensure the latest changes to rubrics, assignment, example solutions, or goals are captured, the resources are jsonified.

In [5]:
analyze_jsonify_results(jsonify_resources(ASSIGNMENT_NR, RESOURCES_PATH))

questions: 28 (R: 18, Radv: 2, Python: 8)
rubrics: 28 (R: 18, Radv: 2, Python: 8)
solutions: 28 (R: 18, Radv: 2, Python: 8)
goals: 28 (R: 18, Radv: 2, Python: 8)


## Download and Prepare Submissions

All assignment submissions are downloaded and jsonified.

In [6]:
# Download assignment submissions
user_whitelist = [513294]
user_blacklist = []
out_paths = download_assignment_submissions(canvas_requester, COURSE_ID, ASSIGNMENT_ID, user_whitelist, user_blacklist)

# Jsonify submissions
for out_path in out_paths:
    jsonify(out_path, ".".join(out_path.split(".")[0:-1]) + ".json")

Some submissions may be formated incorrectly, despite instructing students how to format them and to validate them here before submitting: https://lukekorthals.shinyapps.io/pips-submission-validator/ 

Therefore, perform a quality check to make sure submissions were correctly parsed.

In [7]:
quality_check_df = parsed_submissions_quality_check(ASSIGNMENT_NR, ASSIGNMENT_ID)

print(f"Found")
print(f"- {len(quality_check_df[quality_check_df["all_indicators_found"]])} complete submissions")
print(f"- {len(quality_check_df[~quality_check_df["all_indicators_found"]])} incomplete submissions")
print(f"- {len(quality_check_df[quality_check_df["contains_additional_indicators"]])} submissions with additional indicators")

Found
- 1 complete submissions
- 0 incomplete submissions
- 0 submissions with additional indicators


Open the raw submissions by students with missing indicators to check if they are really missing or just not recognized. 

In [8]:
# Students with missing indicators 
quality_check_df[~quality_check_df["all_indicators_found"]]

Unnamed: 0,user_id,found_indicators,missing_indicators,additional_indicators,all_indicators_found,contains_additional_indicators


Open the raw submissions by students with additional indicators and see if you udnerstand what went wrong and if you can fix it. 

In [9]:
# Students with missing indicators 
quality_check_df[quality_check_df["contains_additional_indicators"]]

Unnamed: 0,user_id,found_indicators,missing_indicators,additional_indicators,all_indicators_found,contains_additional_indicators


If you need to fix anything (e.g., because a student wrote #R 1 instead of #R1), copy the raw submission and append `_ManualFixes` before the file extension. Then rejsonify the manual fixes. The remainder of the pipeline will prefer files with ManualFixes over raw files. 

After jsonifying any files with ManualFixes, recheck the `quality_check_df`.

In [10]:
# Jsonify submissions with manual fixes
files_with_fixes = create_file_list(SUBMISSIONS_PATH, ["_ManualFixes"],[".json"])
for file in files_with_fixes:
    jsonify(file, ".".join(file.split(".")[0:-1]) + ".json")

## Grade and Feedback submissions

In [19]:
# Get user IDs
user_ids = [user.split("-")[1] for user in os.listdir(SUBMISSIONS_PATH) if user.startswith("user")]

# Get jsonified resources for this week
resources = load_jsonified_resources(ASSIGNMENT_NR, RESOURCES_PATH)

# Loop over all users
for user_id in user_ids:
    if user_id != "513294":
        continue

    # Get student submission
    submission = load_latest_jsonified_student_submission(ASSIGNMENT_ID, user_id, SUBMISSIONS_PATH)

    # Initilize dicts
    grading_dict = {}
    feedback_dict = {}

    # Loop over all questions
    for indicator in resources["questions"]:

        # Extract relevant information
        question = resources["questions"][indicator]
        solution = resources["solutions"][indicator]
        rubric = resources["rubrics"][indicator]
        answer = submission[indicator]
        goal = resources["goals"][indicator]

        # Prompt for grading
        formated_user_prompt = format_prompt(PROMPTS["grading"]["user_prompt"], {"task": question, "solution": solution, "rubric": rubric, "answer": answer})
        messages = create_openai_message("system", PROMPTS["grading"]["system_prompt"])
        messages += create_openai_message("user", formated_user_prompt)
        pkl_out_path = f"submissions/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_reports/pickled_completions/user-{user_id}_assignment-{ASSIGNMENT_ID}_task-{indicator}_prompt-grading_completion.pkl"
        completion = prompt_gpt(openai_client,
                                MODEL, 
                                messages, 
                                pkl_out_path=pkl_out_path, 
                                n=N_CHOICES_GRADING,
                                temperature=GRADING_TEMPERATURE)
        
        grading_dict[indicator] = completion.choices[0].message.content # TODO which choice to extraxct?
        
        # Prompt for feedback
        formated_user_prompt = format_prompt(PROMPTS["feedback_questionwise"]["user_prompt"], {"task": question, "answer": answer, "goal": goal})
        messages = create_openai_message("system", PROMPTS["feedback_questionwise"]["system_prompt"])
        messages += create_openai_message("user", formated_user_prompt)
        pkl_out_path = f"submissions/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_reports/pickled_completions/user-{user_id}_assignment-{ASSIGNMENT_ID}_task-{indicator}_prompt-feedback-questionwise_completion.pkl"
        completion = prompt_gpt(openai_client,
                                MODEL, 
                                messages, 
                                pkl_out_path=pkl_out_path, 
                                n=N_CHOICES_FEEDBACK,
                                temperature=FEEDBACK_TEMPERATURE)
        
        feedback_dict[indicator] = completion.choices[0].message.content # TODO which choice to extraxct?
        
        break
    
    # Prompt for feedback summary
    feedback = "\n\n\n".join([f"{key}\n{extract_html_content(value, 'feedback')}" for key, value in feedback_dict.items()])
    formated_user_prompt = format_prompt(PROMPTS["feedback_summary"]["user_prompt"], {"feedback": feedback})
    messages = create_openai_message("system", PROMPTS["feedback_summary"]["system_prompt"])
    messages += create_openai_message("user", formated_user_prompt)
    pkl_out_path = f"submissions/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_reports/pickled_completions/user-{user_id}_assignment-{ASSIGNMENT_ID}_prompt-feedback-summary_completion.pkl"
    completion = prompt_gpt(openai_client,
                            MODEL, 
                            messages, 
                            pkl_out_path=pkl_out_path, 
                            n=N_CHOICES_FEEDBACK,
                            temperature=FEEDBACK_TEMPERATURE)
