## Select Assignment Number

To load the correct settings, the number for the current assignment is set. 

In [1]:
ASSIGNMENT_NR = 1

## Load Packages and Global Settings

In [10]:
# Package imports
from canvasapi import Canvas
from canvasapi.requester import Requester
from canvas_connector.utils.canvas_utils import download_assignment_submissions
from collections import defaultdict
from datetime import datetime
from markdown_pdf import MarkdownPdf, Section
import numpy as np
from openai import OpenAI
import os
import pandas as pd
import pickle as pkl
import zipfile
import shutil
import re

# Local imports
from scripts.canvas_utils import update_canvas_grade, post_canvas_comments
from scripts.jsonify import jsonify, jsonify_resources, analyze_jsonify_results
from scripts.utils import ensure_folder_exists, create_file_list, parsed_submissions_quality_check, deduplicate_files_with_manual_fixes, load_latest_jsonified_student_submission, load_jsonified_resources
from scripts.llm_utils import create_openai_message, prompt_gpt, format_with_default, format_and_compile_openai_messages
from scripts.utils import extract_html_content, get_sum_points_for_pattern, get_weighted_points, deduplicate_highest_attempt
from scripts.llm_report_utils import start_report_with_header, add_messages_to_report, add_text_to_report, add_prompt_and_response_to_report

In [3]:
# Load global settings
from scripts.settings import *

# Load assignment specific settings
ASSIGNMENT = ASSIGNMENTS[ASSIGNMENT_NR]
ASSIGNMENT_ID = ASSIGNMENT["canvas"]["assignment_id"]
QUIZ_ID = ASSIGNMENT["canvas"]["quiz_id"]
R_QUIZ_QUESTION_ID = ASSIGNMENT["canvas"]["r_quiz_question_id"]
ADV_QUIZ_QUESTION_ID = ASSIGNMENT["canvas"]["adv_quiz_question_id"]
LOCK_GRADES_DATE = ASSIGNMENT["lock_grades_date"]

In [4]:
# Initialize Canvas API
canvas_client = Canvas(os.getenv("CANVAS_API_URL"), os.getenv("CANVAS_API_KEY"))
canvas_requester = Requester(os.getenv("CANVAS_API_URL"), os.getenv("CANVAS_API_KEY"))

# Initialize OpenAI API
if USE_UVA_OPENAI:
    openai_client = OpenAI(api_key=os.getenv("UVA_OPENAI_API_KEY"), 
                           base_url=os.getenv("UVA_OPENAI_BASE_URL"))
    if MODEL == "gpt-4o":
        MODEL = "gpt4o" # OpenAI API uses a different model name
else:
    openai_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

## Jsonify Resources

To ensure the latest changes to rubrics, assignment, example solutions, or goals are captured, the resources are jsonified.

In [5]:
analyze_jsonify_results(jsonify_resources(ASSIGNMENT_NR, RESOURCES_PATH))

questions: 48 (R: 30, Radv: 8, Python: 10)
rubrics: 48 (R: 30, Radv: 8, Python: 10)
solutions: 48 (R: 30, Radv: 8, Python: 10)
goals: 48 (R: 30, Radv: 8, Python: 10)
weights: 48 (R: 30, Radv: 8, Python: 10)


## Download and Prepare Submissions

All assignment submissions are downloaded and jsonified.

In [6]:
# Download assignment submissions
user_whitelist = [513294]
user_blacklist = []
out_paths = download_assignment_submissions(canvas_requester, COURSE_ID, ASSIGNMENT_ID, user_whitelist, user_blacklist, SUBMISSIONS_PATH + "/user-{user_id}/assignment-{assignment_id}/user-{user_id}_ass-{assignment_id}_try-{attempt}_que-{question_id}_att-{attachment_id}")

# Jsonify submissions
for out_path in out_paths:
    jsonify(out_path, ".".join(out_path.split(".")[0:-1]) + ".json")

Some submissions may be formated incorrectly, despite instructing students how to format them and to validate them here before submitting: https://lukekorthals.shinyapps.io/pips-submission-validator/ 

Therefore, perform a quality check to make sure submissions were correctly parsed.

In [7]:
quality_check_df = parsed_submissions_quality_check(ASSIGNMENT_NR, ASSIGNMENT_ID)

print(f"Found")
print(f"- {len(quality_check_df[quality_check_df["all_indicators_found"]])} complete submissions")
print(f"- {len(quality_check_df[~quality_check_df["all_indicators_found"]])} incomplete submissions")
print(f"- {len(quality_check_df[quality_check_df["contains_additional_indicators"]])} submissions with additional indicators")

Found
- 1 complete submissions
- 0 incomplete submissions
- 0 submissions with additional indicators


Open the raw submissions by students with missing indicators to check if they are really missing or just not recognized. 

In [None]:
# Students with missing indicators 
quality_check_df[~quality_check_df["all_indicators_found"]]

Open the raw submissions by students with additional indicators and see if you udnerstand what went wrong and if you can fix it. 

In [None]:
# Students with missing indicators 
quality_check_df[quality_check_df["contains_additional_indicators"]]

If you need to fix anything (e.g., because a student wrote #R 1 instead of #R1), copy the raw submission and append `_ManualFixes` before the file extension. Then rejsonify the manual fixes. The remainder of the pipeline will prefer files with ManualFixes over raw files. 

After jsonifying any files with ManualFixes, recheck the `quality_check_df`.

In [10]:
# Jsonify submissions with manual fixes
files_with_fixes = create_file_list(SUBMISSIONS_PATH, ["_ManualFixes"],[".json"])
for file in files_with_fixes:
    jsonify(file, ".".join(file.split(".")[0:-1]) + ".json")

## Prompt LLM for Grading and Feedback
The assignments of all students are graded and feedbacked by the LLM.

In [8]:
# Get user IDs
user_ids = [user.split("-")[1] for user in os.listdir(SUBMISSIONS_PATH) if user.startswith("user")]

# Get jsonified resources for this week
resources = load_jsonified_resources(ASSIGNMENT_NR, 
                                     RESOURCES_PATH, 
                                     ["questions", "solutions", "rubrics", "goals", "weights"])

# Load llm completion report templates
header_template = open("resources/llm_report/llm_report_header_template.txt", "r").read()

# Prepare unformatted messages
unformatted_grading_messages = [("system", PROMPTS["grading"]["system_prompt"]), 
                                ("user", PROMPTS["grading"]["user_prompt"])]
unformatted_feedback_qw_messages = [("system", PROMPTS["feedback_questionwise"]["system_prompt"]), 
                                    ("user", PROMPTS["feedback_questionwise"]["user_prompt"])]
unformatted_feedback_sum_messages = [("system", PROMPTS["feedback_summary"]["system_prompt"]), 
                                     ("user", PROMPTS["feedback_summary"]["user_prompt"])]

# Loop over all users
for user_id in user_ids:
    # Get student submission
    submission, attempt = load_latest_jsonified_student_submission(ASSIGNMENT_ID, user_id, SUBMISSIONS_PATH)

    # Initilize dicts
    grading_dict = {}
    feedback_dict = {}

    # Initialize report
    llm_report_out_path = f"{SUBMISSIONS_PATH}/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_LLMCompletionReport.md"
    add_text_to_report(llm_report_out_path,
                       text=format_with_default(header_template,
                                                {"model": MODEL,
                                                 "grading_temperature": GRADING_TEMPERATURE,
                                                 "feedback_temperature": FEEDBACK_TEMPERATURE,
                                                 "n_choices_grading": N_CHOICES_GRADING,
                                                 "n_choices_feedback": N_CHOICES_FEEDBACK,
                                                 "student_id": user_id,
                                                 "assignment_id": ASSIGNMENT_ID}),
                        start_new=True)
    # Loop over all questions
    i = 0
    for indicator in resources["questions"]:
        i += 1
        if i > 200:
            break
        print(indicator)

        # Extract relevant information
        formatting_dict = {
            "task": resources["questions"][indicator],
            "solution": resources["solutions"][indicator],
            "rubric": resources["rubrics"][indicator],
            "answer": "\n".join(submission[indicator]),
            "goal": resources["goals"][indicator]
        }
        

        # Prompt for grading
        messages = format_and_compile_openai_messages(unformatted_grading_messages, formatting_dict)
        pkl_out_path = f"{SUBMISSIONS_PATH}/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/pickled_completions/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_task-{indicator}_prompt-grading_completion.pkl"
        completion = prompt_gpt(openai_client,
                                MODEL, 
                                messages, 
                                pkl_out_path=pkl_out_path, 
                                n=N_CHOICES_GRADING,
                                temperature=GRADING_TEMPERATURE)
        
        # Add first choice to grading dict
        grading_dict[indicator] = completion.choices[0].message.content # TODO which choice to extraxct?

        # Add chat completions to report
        add_prompt_and_response_to_report(llm_report_out_path,
                                          indicator,
                                          "Grading",
                                          messages,
                                          completion)
        
        # Save grading to file
        dat = pd.DataFrame({
            "user_id": [user_id],
            "assignment_id": [ASSIGNMENT_ID],
            "attempt": [attempt],
            "grader": [MODEL],
            "question": [indicator],
            "points": [float(extract_html_content(completion.choices[0].message.content, "points"))],
            "explanation": [extract_html_content(completion.choices[0].message.content, "explanation")]
        })
        file_name = f"{SUBMISSIONS_PATH}/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/grading/grading_user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_grader-{MODEL}_que-{indicator}.csv"
        ensure_folder_exists(file_name)
        dat.to_csv(file_name, index=False)
        
        # Prompt for feedback
        messages = format_and_compile_openai_messages(unformatted_feedback_qw_messages, formatting_dict)
        pkl_out_path = f"{SUBMISSIONS_PATH}/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/pickled_completions/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_task-{indicator}_prompt-feedback-questionwise_completion.pkl"
        completion = prompt_gpt(openai_client,
                                MODEL, 
                                messages, 
                                pkl_out_path=pkl_out_path, 
                                n=N_CHOICES_FEEDBACK,
                                temperature=FEEDBACK_TEMPERATURE)
        
        # Add first choice to feedback dict
        feedback_dict[indicator] = completion.choices[0].message.content # TODO which choice to extraxct?

        # Add chat completions to report
        add_prompt_and_response_to_report(llm_report_out_path,
                                          None,
                                          "Feedback",  
                                          messages,
                                          completion)
        
    # Prompt for feedback summary
    feedback = "\n\n\n".join([f"{key}\n{extract_html_content(value, 'feedback')}" for key, value in feedback_dict.items()])
    messages = format_and_compile_openai_messages(unformatted_feedback_sum_messages, {"feedback": feedback})
    pkl_out_path = f"{SUBMISSIONS_PATH}/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/pickled_completions/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_prompt-feedback-summary_completion.pkl"
    completion = prompt_gpt(openai_client,
                            MODEL, 
                            messages, 
                            pkl_out_path=pkl_out_path, 
                            n=N_CHOICES_FEEDBACK,
                            temperature=FEEDBACK_TEMPERATURE)

    # Add chat completions to report
    add_prompt_and_response_to_report(llm_report_out_path,
                                        "Feedback Summary",
                                        "Feedback",
                                        messages,
                                        completion)

    # Get LLM grade
    # This calculation is specific to the PIPS 2025 course
    points = {key: float(extract_html_content(value, "points")) for key, value in grading_dict.items()}
    points_w = get_weighted_points(points, resources["weights"])
    points_r = round(get_sum_points_for_pattern(points_w, r"#R(\d+)") * MAX_GRADE, 2)
    points_radv = round(get_sum_points_for_pattern(points_w, r"#Radv(\d+)") * MAX_GRADE, 2)
    points_py = round(get_sum_points_for_pattern(points_w, r"#Python(\d+)") * MAX_GRADE, 2)
    points_adv = points_radv if points_radv > 0 else points_py
    used_adv = "You were graded based on Radv." if points_radv > 0 else "You were graded based on Python."
    grade = round(points_r + points_adv, 2)

    # Save grade
    dat = pd.DataFrame({"user": [user_id],
                        "assignment": [ASSIGNMENT_ID],
                        "attempt": [attempt],
                        "grader": [MODEL],
                        **points,
                        "points_r": [points_r], 
                        "points_radv": [points_radv], 
                        "points_py": [points_py], 
                        "points_adv": [points_adv], 
                        "used_adv": [used_adv],
                        "grade": [grade]})
    dat.to_csv(f"{SUBMISSIONS_PATH}/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/grading/grading_user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_grader-{MODEL}_que-combined.csv")


    # Generate Feedback report
    pdf = MarkdownPdf()

    # Header, summary, preliminary grade, coding challenge
    section = f"# Feedback Assignment {ASSIGNMENT_ID}\n\n"
    section += "## Summary\n"
    section += extract_html_content(completion.choices[0].message.content, "summary")
    section += "\n\n"
    section += "## Coding Challenge\n"
    section += "We invite you to work on the following personalized coding challenge and submit your result on Canvas. Dont worry about being perfect, this is ungraded and just for your practice.\n\n"
    section += extract_html_content(completion.choices[0].message.content, "coding-challenge")
    pdf.add_section(Section(section))

    # Questionwise feedback
    section = ""
    for key, value in feedback_dict.items():
        section += f"## {key}\n"        
        section += extract_html_content(value, "feedback")
        section += "\n\n--\n\n"

    pdf.add_section(Section(section))

    # Save pdf
    student_feedback_report_out_path = f"{SUBMISSIONS_PATH}/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_LLMFeedback.pdf"
    pdf.save(student_feedback_report_out_path)

    # Generate Grading report
    pdf = MarkdownPdf()

    # Header, summary, preliminary grade, coding challenge
    section = f"# LLM Grading Assignment {ASSIGNMENT_ID}\n\n"
    section += f"**Points R:** {points_r}\n"
    section += f"**Points Radv:** {points_radv}\n"
    section += f"**Points Python:** {points_py}\n"
    section += f"{used_adv}\n"
    section += f"**Preliminary grade:** {grade}/10\n\n"

    
    # Questionwise grading
    for key, value in grading_dict.items():
        # section = ""
        section += f"## {key} ({extract_html_content(value, "points")}/1)\n"
        section += extract_html_content(value, "explanation")
        section += "\n\n"
    pdf.add_section(Section(section))

    # Save pdf
    student_grading_report_out_path = f"{SUBMISSIONS_PATH}/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_LLMGrading.pdf"
    pdf.save(student_grading_report_out_path)

#R1
#R2
#R3
#R4
#R5
#R6
#R7
#R8
#R9
#R10
#R11
#R12
#R13
#R14
#R15
#R16
#R17
#R18
#R19
#R20
#R21
#R22
#R23
#R24
#R25
#R26
#R27
#R28
#R29
#R30
#Radv1
#Radv2
#Radv3
#Radv4
#Radv5
#Radv6
#Radv7
#Radv8
#Python1
#Python2
#Python3
#Python4
#Python5
#Python6
#Python7
#Python8
#Python9
#Python10


In [32]:
grading_dict

{'#R1': '<my-thoughts>\n- The code uses the getwd() function.\n- The code matches the example solution.\n- There are no additional issues.\n</my-thoughts>\n<explanation>No points are deducted as the student used the correct function getwd(). Calculation: 1 - 0 = 1</explanation><points>1</points>',
 '#R2': '<my-thoughts>\n- The code uses setwd, which matches the rubric.\n- The path does not end with the two specified folders .../r_course/week_1.\n</my-thoughts>\n<explanation>Subtracting 0.25 points because the path does not end with the two specified folders .../r_course/week_1. Calculation: 1 - 0.25 = 0.75</explanation><points>0.75</points>',
 '#R3': '<my-thoughts>\n- The code uses list.files() which matches the rubric.\n- The code uses ?list.files which matches the rubric.\n- The student included extra text that was not required.\n</my-thoughts>\n<explanation>No points are deducted as the student correctly used list.files and ?list.files. The extra text does not affect the functionali

In [33]:
# Generate Grading report
pdf = MarkdownPdf()

# Header, summary, preliminary grade, coding challenge
section = f"# LLM Grading Assignment {ASSIGNMENT_ID}\n\n"
section += f"**Points R:** {points_r}\n"
section += f"**Points Radv:** {points_radv}\n"
section += f"**Points Python:** {points_py}\n"
section += f"{used_adv}\n"
section += f"**Preliminary grade:** {grade}/10\n\n"


# Questionwise grading
for key, value in grading_dict.items():
    # section = ""
    section += f"## {key} ({extract_html_content(value, "points")}/1)\n"
    section += extract_html_content(value, "explanation")
    section += "\n\n"
pdf.add_section(Section(section))

# Save pdf
student_grading_report_out_path = f"{SUBMISSIONS_PATH}/user-{user_id}/assignment-{ASSIGNMENT_ID}/llm_outputs/user-{user_id}_ass-{ASSIGNMENT_ID}_try-{attempt}_LLMGrading.pdf"
pdf.save(student_grading_report_out_path)

In [30]:
extract_html_content(completion.choices[0].message.content, "summary")


"This week was all about enhancing your skills in R, especially with regard to using built-in functions, handling data types, managing directories, and applying statistical functions. Overall, you are progressing well, especially in understanding variables and built-in packages. However, there are areas where you can improve, such as sorting data before indexing and managing factors and NA values. Make sure to work on adding more detailed comments to clarify your code and avoid minor mistakes.\n\nI recommend reviewing the feedback, particularly for questions R2, R11, R15, and R27 to refine your skills. To help address your current areas of improvement, I've created a personalized coding challenge for you."

## Upload LLM Grading and feedback to Canvas
For all students who were graded for this assignment, the LLM generated grade is uploaded together with some predetermined comments. 

In [None]:
# Get Canvas objects 
course = canvas_client.get_course(COURSE_ID)
assignment = course.get_assignment(ASSIGNMENT_ID)
quiz = course.get_quiz(QUIZ_ID)
quiz_submissions = [quiz_submission for quiz_submission in quiz.get_submissions()]

# Load text for comments to canvas
comment_preliminary_grade = open(f"{RESOURCES_PATH}/canvas_comments/canvas-comment_preliminary_grade.txt", "r").read()
comment_feedback_received = open(f"{RESOURCES_PATH}/canvas_comments/canvas-comment_feedback_received.txt", "r").read()

# Get grading files
grading_files = create_file_list(SUBMISSIONS_PATH, [f"ass-{ASSIGNMENT_ID}", f"grader-{MODEL}_que-combined.csv"],[".json"])
grading_files = deduplicate_highest_attempt(grading_files)

for f in grading_files:
    
    user_id = int(re.compile(r"user-(\d+)").search(f).group(1))
    if datetime.today() >= datetime.strptime(LOCK_GRADES_DATE, "%Y-%m-%d") and str(user_id) != "513294":
        print("WARNING GRADES ARE LOCKED AND NO UPDATES TO CANVAS ARE MADE!")
        continue

    file_list_indidividual_questions = create_file_list(SUBMISSIONS_PATH, 
                                                        [f"ass-{ASSIGNMENT_ID}", f"grader-{MODEL}", f"user-{user_id}"],
                                                        [".json", "que-combined"])
    explanations = []
    for file in file_list_indidividual_questions:
        df = pd.read_csv(file)
        explanations.append(f"{df.question.values[0]}\n{df.explanation.values[0]}")
    comment_explanation = "Explanations for grading:\n\n" + "\n\n".join(explanations)

    dat = pd.read_csv(f)

    canvas_submission = assignment.get_submission(user = user_id)

    # Update Canvas grade
    update_canvas_grade(user_id,
                        R_QUIZ_QUESTION_ID,
                        ADV_QUIZ_QUESTION_ID,
                        quiz_submissions,
                        dat.points_r.values[0],
                        dat.points_adv.values[0],
                        dat.used_adv.values[0],
                        dat.grade.values[0],
                        canvas_submission)

    # Post comments with grade and feedback
    post_canvas_comments(canvas_submission, comments=[comment_preliminary_grade, 
                                                      comment_explanation,
                                                      comment_feedback_received])