## Select Assignment Number

To load the correct settings, the number for the current assignment is set. 

In [1]:
ASSIGNMENT_NR = 2

## Load Packages and Global Settings

In [2]:
# Package imports
from canvasapi import Canvas
from canvasapi.requester import Requester
from canvas_connector.utils.canvas_utils import download_assignment_submissions
from collections import defaultdict
from datetime import datetime
import numpy as np
from openai import OpenAI
import os
import pandas as pd
import pickle as pkl
import zipfile
import shutil
import re

# Local imports
from scripts.canvas_utils import update_canvas_grade, post_canvas_comments
from scripts.jsonify import jsonify, jsonify_resources, analyze_jsonify_results
from scripts.utils import ensure_folder_exists, create_file_list, parsed_submissions_quality_check, deduplicate_files_with_manual_fixes, load_latest_jsonified_student_submission, load_jsonified_resources
from scripts.llm_utils import create_openai_message, prompt_gpt, format_with_default
from scripts.utils import extract_html_content, get_sum_points_for_pattern, get_weighted_points, deduplicate_highest_attempt
from scripts.llm_report_utils import start_report_with_header, add_messages_to_report, add_text_to_report

In [3]:
# Load global settings
from scripts.settings import *

# Load assignment specific settings
ASSIGNMENT = ASSIGNMENTS[ASSIGNMENT_NR]
ASSIGNMENT_ID = ASSIGNMENT["canvas"]["assignment_id"]
QUIZ_ID = ASSIGNMENT["canvas"]["quiz_id"]
R_QUIZ_QUESTION_ID = ASSIGNMENT["canvas"]["r_quiz_question_id"]
ADV_QUIZ_QUESTION_ID = ASSIGNMENT["canvas"]["adv_quiz_question_id"]
LOCK_GRADES_DATE = ASSIGNMENT["lock_grades_date"]

In [4]:
# Initialize Canvas API
canvas_client = Canvas(os.getenv("CANVAS_API_URL"), os.getenv("CANVAS_API_KEY"))
canvas_requester = Requester(os.getenv("CANVAS_API_URL"), os.getenv("CANVAS_API_KEY"))

# Initialize OpenAI API
if USE_UVA_OPENAI:
    openai_client = OpenAI(api_key=os.getenv("UVA_OPENAI_API_KEY"), 
                           base_url=os.getenv("UVA_OPENAI_BASE_URL"))
    if MODEL == "gpt-4o":
        MODEL = "gpt4o" # OpenAI API uses a different model name
else:
    openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

## Jsonify Resources

To ensure the latest changes to rubrics, assignment, example solutions, or goals are captured, the resources are jsonified.

In [5]:
analyze_jsonify_results(jsonify_resources(ASSIGNMENT_NR, RESOURCES_PATH))

questions: 28 (R: 18, Radv: 2, Python: 8)
rubrics: 28 (R: 18, Radv: 2, Python: 8)
solutions: 28 (R: 18, Radv: 2, Python: 8)
goals: 28 (R: 18, Radv: 2, Python: 8)
weights: 28 (R: 18, Radv: 2, Python: 8)


## Download and Prepare Submissions

All assignment submissions are downloaded and jsonified.

In [6]:
# Download assignment submissions
user_whitelist = [513294]
user_blacklist = []
out_paths = download_assignment_submissions(canvas_requester, COURSE_ID, ASSIGNMENT_ID, user_whitelist, user_blacklist)

# Jsonify submissions
for out_path in out_paths:
    jsonify(out_path, ".".join(out_path.split(".")[0:-1]) + ".json")

Some submissions may be formated incorrectly, despite instructing students how to format them and to validate them here before submitting: https://lukekorthals.shinyapps.io/pips-submission-validator/ 

Therefore, perform a quality check to make sure submissions were correctly parsed.

In [7]:
quality_check_df = parsed_submissions_quality_check(ASSIGNMENT_NR, ASSIGNMENT_ID)

print(f"Found")
print(f"- {len(quality_check_df[quality_check_df["all_indicators_found"]])} complete submissions")
print(f"- {len(quality_check_df[~quality_check_df["all_indicators_found"]])} incomplete submissions")
print(f"- {len(quality_check_df[quality_check_df["contains_additional_indicators"]])} submissions with additional indicators")

Found
- 1 complete submissions
- 0 incomplete submissions
- 0 submissions with additional indicators


Open the raw submissions by students with missing indicators to check if they are really missing or just not recognized. 

In [8]:
# Students with missing indicators 
quality_check_df[~quality_check_df["all_indicators_found"]]

Unnamed: 0,user_id,found_indicators,missing_indicators,additional_indicators,all_indicators_found,contains_additional_indicators


Open the raw submissions by students with additional indicators and see if you udnerstand what went wrong and if you can fix it. 

In [9]:
# Students with missing indicators 
quality_check_df[quality_check_df["contains_additional_indicators"]]

Unnamed: 0,user_id,found_indicators,missing_indicators,additional_indicators,all_indicators_found,contains_additional_indicators


If you need to fix anything (e.g., because a student wrote #R 1 instead of #R1), copy the raw submission and append `_ManualFixes` before the file extension. Then rejsonify the manual fixes. The remainder of the pipeline will prefer files with ManualFixes over raw files. 

After jsonifying any files with ManualFixes, recheck the `quality_check_df`.

In [10]:
# Jsonify submissions with manual fixes
files_with_fixes = create_file_list(SUBMISSIONS_PATH, ["_ManualFixes"],[".json"])
for file in files_with_fixes:
    jsonify(file, ".".join(file.split(".")[0:-1]) + ".json")

## Prompt LLM for Grading and Feedback
The assignments of all students are graded and feedbacked by the LLM.

In [14]:
def add_prompt_and_response_to_report(llm_report_out_path: str = None,
                                      level_2_header: str = None,
                                      details_label: str = "Details",
                                      prompt_messages: list = None,
                                      completion_messages: list = None,
                                      ):
    
    # Add level 2 header
    if level_2_header is not None:
        add_text_to_report(llm_report_out_path, f"## Question {level_2_header}\n")
    
    # Start details
    add_text_to_report(llm_report_out_path, f"<details>\n\t<summary>{details_label}</summary>\n\n")
    

    # Add prompt messages
    if prompt_messages is not None:
        add_messages_to_report(llm_report_out_path, prompt_messages, header="#### Prompts\n")

    # Add completion messages
    if completion_messages is not None:
        add_messages_to_report(llm_report_out_path, completion_messages, header="#### Completion Choices\n")

    # End details
    
    add_text_to_report(llm_report_out_path, f"\n\n</details>\n\n")

In [None]:
# Get user IDs
user_ids = [user.split("-")[1] for user in os.listdir(SUBMISSIONS_PATH) if user.startswith("user")]

# Get jsonified resources for this week
resources = load_jsonified_resources(ASSIGNMENT_NR, 
                                     RESOURCES_PATH, 
                                     ["questions", "solutions", "rubrics", "goals", "weights"])

# Load llm completion report templates
header_template = open("resources/llm_report/llm_report_header_template.txt", "r").read()

# Loop over all users
user_ids = ["513294"]
for user_id in user_ids:
    if user_id != "513294":
        continue

    # Get student submission
    submission, attempt = load_latest_jsonified_student_submission(ASSIGNMENT_ID, user_id, SUBMISSIONS_PATH)

    # Initilize dicts
    grading_dict = {}
    feedback_dict = {}

    # Initialize report
    llm_report_out_path = f"submissions/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_LLMCompletionReport.md"
    add_text_to_report(llm_report_out_path,
                       text=format_with_default(header_template,
                                                {"model": MODEL,
                                                 "grading_temperature": GRADING_TEMPERATURE,
                                                 "feedback_temperature": FEEDBACK_TEMPERATURE,
                                                 "n_choices_grading": N_CHOICES_GRADING,
                                                 "n_choices_feedback": N_CHOICES_FEEDBACK,
                                                 "student_id": user_id,
                                                 "assignment_id": ASSIGNMENT_ID}),
                        start_new=True)
    # Loop over all questions
    i = 0
    for indicator in resources["questions"]:
        i += 1
        if i > 200:
            break
        print(indicator)

        # Extract relevant information
        question = resources["questions"][indicator]
        solution = resources["solutions"][indicator]
        rubric = resources["rubrics"][indicator]
        answer = submission[indicator]
        goal = resources["goals"][indicator]

        # Prompt for grading
        formated_user_prompt = format_with_default(PROMPTS["grading"]["user_prompt"], {"task": question, "solution": solution, "rubric": rubric, "answer": answer})
        messages = create_openai_message("system", PROMPTS["grading"]["system_prompt"])
        messages += create_openai_message("user", formated_user_prompt)
        pkl_out_path = f"submissions/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/pickled_completions/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_task-{indicator}_prompt-grading_completion.pkl"
        completion = prompt_gpt(openai_client,
                                MODEL, 
                                messages, 
                                pkl_out_path=pkl_out_path, 
                                n=N_CHOICES_GRADING,
                                temperature=GRADING_TEMPERATURE)
        
        # Add first choice to grading dict
        grading_dict[indicator] = completion.choices[0].message.content # TODO which choice to extraxct?

        # Add chat completions to report
        completion_messages = [{"role": choice.message.role, "content": choice.message.content} for choice in completion.choices]
        add_prompt_and_response_to_report(llm_report_out_path,
                                            indicator,
                                            "Grading",
                                            messages,
                                            completion_messages)
        
        # Prompt for feedback
        formated_user_prompt = format_with_default(PROMPTS["feedback_questionwise"]["user_prompt"], {"task": question, "answer": answer, "goal": goal})
        messages = create_openai_message("system", PROMPTS["feedback_questionwise"]["system_prompt"])
        messages += create_openai_message("user", formated_user_prompt)
        pkl_out_path = f"submissions/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/pickled_completions/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_task-{indicator}_prompt-feedback-questionwise_completion.pkl"
        completion = prompt_gpt(openai_client,
                                MODEL, 
                                messages, 
                                pkl_out_path=pkl_out_path, 
                                n=N_CHOICES_FEEDBACK,
                                temperature=FEEDBACK_TEMPERATURE)
        
        # Add first choice to feedback dict
        feedback_dict[indicator] = completion.choices[0].message.content # TODO which choice to extraxct?

        # Add chat completions to report
        completion_messages = [{"role": choice.message.role, "content": choice.message.content} for choice in completion.choices]
        add_prompt_and_response_to_report(llm_report_out_path,
                                            None,
                                            "Feedback",
                                            messages,
                                            completion_messages)
        
    # Prompt for feedback summary
    feedback = "\n\n\n".join([f"{key}\n{extract_html_content(value, 'feedback')}" for key, value in feedback_dict.items()])
    formated_user_prompt = format_with_default(PROMPTS["feedback_summary"]["user_prompt"], {"feedback": feedback})
    messages = create_openai_message("system", PROMPTS["feedback_summary"]["system_prompt"])
    messages += create_openai_message("user", formated_user_prompt)
    pkl_out_path = f"submissions/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/pickled_completions/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_prompt-feedback-summary_completion.pkl"
    completion = prompt_gpt(openai_client,
                            MODEL, 
                            messages, 
                            pkl_out_path=pkl_out_path, 
                            n=N_CHOICES_FEEDBACK,
                            temperature=FEEDBACK_TEMPERATURE)

    # Add chat completions to report
    completion_messages = [{"role": choice.message.role, "content": choice.message.content} for choice in completion.choices]
    add_prompt_and_response_to_report(llm_report_out_path,
                                        "Feedback Summary",
                                        "Feedback",
                                        messages,
                                        completion_messages)

    # Get LLM grade
    points = {key: float(extract_html_content(value, "points")) for key, value in grading_dict.items()}
    points_w = get_weighted_points(points, resources["weights"])
    points_r = round(get_sum_points_for_pattern(points_w, r"#R(\d+)") * MAX_GRADE, 2)
    points_radv = round(get_sum_points_for_pattern(points_w, r"#Radv(\d+)") * MAX_GRADE, 2)
    points_py = round(get_sum_points_for_pattern(points_w, r"#Python(\d+)") * MAX_GRADE, 2)
    points_adv = points_radv if points_radv > 0 else points_py
    used_adv = "You were graded based on Radv." if points_radv > 0 else "You were graded based on Python."
    grade = round(points_r + points_adv, 2)

    # Save grade
    df = pd.DataFrame({**points, 
              "points_r": points_r, 
              "points_radv": points_radv, 
              "points_py": points_py, 
              "points_adv": points_adv, 
              "used_adv": used_adv,
              "grade": grade}, 
              index=[user_id])
    df.to_csv(f"submissions/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_grader-llm_grades.csv")

#R1
#R2
#R3
#R4
#R5
#R6
#R7
#R8
#R9
#R10
#R11


In [None]:
completion

In [None]:
# Get Canvas objects 
course = canvas_client.get_course(COURSE_ID)
assignment = course.get_assignment(ASSIGNMENT_ID)
quiz = course.get_quiz(QUIZ_ID)
quiz_submissions = [quiz_submission for quiz_submission in quiz.get_submissions()]

# Load text for comments to canvas
comment_preliminary_grade = open("resources/canvas_comments/canvas-comment_preliminary_grade.txt", "r").read()
comment_feedback_received = open("resources/canvas_comments/canvas-comment_feedback_received.txt", "r").read()

# Get grading files
grading_files = create_file_list(SUBMISSIONS_PATH, [f"ass-{ASSIGNMENT_ID}", "grader-llm_grades.csv"],[".json"])
grading_files = deduplicate_highest_attempt(grading_files)

for f in grading_files:
    
    user_id = int(re.compile(r"user-(\d+)").search(f).group(1))
    if datetime.today() >= datetime.strptime(LOCK_GRADES_DATE, "%Y-%m-%d") and user_id != "513294":
        print("WARNING GRADES ARE LOCKED AND NO UPDATES TO CANVAS ARE MADE!")
        continue

    dat = pd.read_csv(f)

    canvas_submission = assignment.get_submission(user = user_id)

    # Update Canvas grade
    update_canvas_grade(user_id,
                        R_QUIZ_QUESTION_ID,
                        ADV_QUIZ_QUESTION_ID,
                        quiz_submissions,
                        dat.points_r.values[0],
                        dat.points_adv.values[0],
                        dat.used_adv.values[0],
                        dat.grade.values[0],
                        canvas_submission)

    # Post comments with grade and feedback
    post_canvas_comments(canvas_submission, 
                            comments=[comment_preliminary_grade, comment_feedback_received])

