# Testing of the basic grading functionality

In [1]:
import sys
import os
from config_manager import ConfigManager, LLMFactory
from ai_grading_agent import AIGradingAgent
from report_generator import create_combined_report


In [2]:
directory_path = "HW02/Students"
assignment_id = "hw2_california_housing"
output_csv = "HW02/Students/graded_results.csv"

In [3]:
# Config the LLM
config = ConfigManager()
llm = LLMFactory.create_llm(config)
print(f"Using LLM: {llm.get_model_name()}")

provider = config.get('llm_settings.provider', 'not_set')
model = config.get('llm_settings.model', 'not_set')
has_api_key = bool(config.get('llm_settings.api_key', ''))

print(f"Configuration details:")
print(f"  Provider: {provider}")
print(f"  Model: {model}")
print(f"  Has API key: {has_api_key}")

LLMFactory: Creating LLM with provider='anthropic', model='claude-3-5-haiku-latest'
✅ Creating Anthropic LLM
Using LLM: Anthropic-claude-3-5-haiku-latest
Configuration details:
  Provider: anthropic
  Model: claude-3-5-haiku-latest
  Has API key: True


In [4]:
ai_grader = AIGradingAgent(llm)

In [5]:
# Check the parsed content
notebook_files = [f for f in os.listdir(directory_path) if f.endswith('.ipynb')]

for notebook_file in notebook_files:
    notebook_path = os.path.join(directory_path, notebook_file)
    parsed_content = ai_grader.parser.parse_notebook(notebook_path)
    print(f"Parsed content for {notebook_file}:")
    print(parsed_content)

# Print responses
for response in parsed_content['responses']:
    print(f"Response for {response.problem_id}:")
    print(response)

Parsed content for Silvestri_Luciano.ipynb:
{'student_name': 'Silvestri Luciano', 'notebook_path': 'HW02/Students/Silvestri_Luciano.ipynb', 'problems': [{'cell_index': 1, 'content': "---\n## Part 1: From Exploration to Engineering (30 points)\n\nRead chapter 2 of your textbook. It is a fairly long chapter so I would recommend making an outline to keep track of the many steps. There will be a lecture on this next Monday. You can be pretty sure that the next HW will ask you to make a detailed, step-by-step protocol for a general and complete ML workflow, so you might as well start making that now! \n\nStart by documenting the key insights from your in-class exploration. Using your team's findings and discussions:\n\n* What did the basic IDA reveal? What would you propose to do about any problems the IDA uncovered?\n* What did the EDA reveal? What quantitative metrics did you use? What qualitative metrics did you use? What visualizations did you use? \n* What were the raw features? Explai

In [6]:
# Check assignment rubric
assignment_rubric = ai_grader.rubric_manager.load_assignment_rubric(assignment_id)
print(f"Loaded rubric for {assignment_id}:")
print(assignment_rubric)

Loaded rubric for hw2_california_housing:
{'part_1': ProblemRubric(problem_id='part_1', total_points=30, criteria=[GradingCriterion(name='basic_ida', max_points=5, description='Basic IDA findings and proposed solutions', guidelines='5 pts: Clearly describes key findings, identifies issues (missing data, anomalies), and proposes justified fixes\n3-4 pts: Identifies some findings/issues but limited or unclear proposed solutions\n1-2 pts: Minimal or incomplete description, vague fixes\nLook for specific observations about data structure, missing values, data types, and concrete solutions\n'), GradingCriterion(name='eda_summary', max_points=5, description='EDA summary with quantitative/qualitative metrics and visualizations', guidelines='5 pts: Clear explanation of quantitative metrics, qualitative observations, and visualizations used\n3-4 pts: Some metrics or visualizations missing or inadequately explained\n1-2 pts: Very superficial coverage\nAssess coverage of statistical analysis, pat

In [7]:
# Grade each response
notebook_results = []

for response in parsed_content['responses']:
    if response.problem_id in assignment_rubric:
        rubric = assignment_rubric[response.problem_id]
        
        # Add assignment context
        context = f"Assignment: {assignment_id}"
        
        result = ai_grader.grader.grade_response(response, rubric, context)
        result.student_name = "Silvestri, Luciano"
        
        notebook_results.append(result)
        print(f"  Graded {response.problem_id}: {result.total_score}/{result.max_possible} ({result.percentage:.1f}%)")
    else:
        print(f"  Warning: No rubric found for {response.problem_id}")


  Graded part_1: 29/30 (96.7%)
  Graded part_2: 27/30 (90.0%)
  Graded part_3: 36/40 (90.0%)


In [8]:
notebook_results[0].feedback

'Exceptional work demonstrating comprehensive understanding of data analysis and feature engineering for the California Housing dataset. The student provided a thorough, well-structured response that effectively integrates technical implementation with clear, data-driven reasoning.'