In [3]:
from openai import OpenAI
import json
from dotenv import load_dotenv
import os

# 1. Load environment variables first
load_dotenv()  # Looks for .env file automatically 

# Initialize the OpenAI client
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY")) 

# Define the grading rubric
GRADING_RUBRIC = """
You are an expert teacher grading student answers. Evaluate the response based on:
1. **Correctness** (0-4 points): Is the answer factually accurate?
2. **Clarity** (0-3 points): Is the explanation clear and well-structured?
3. **Completeness** (0-3 points): Does it fully address the question?

Question: "{question}"

Grade this answer strictly. Return a JSON format with 'score' (0-10) and 'feedback'.
"""

def grade_student_answer(question, student_answer):
    prompt = GRADING_RUBRIC.format(question=question) + f"\n\nStudent Answer: \"{student_answer}\""
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",  # or "gpt-3.5-turbo" for lower cost
        messages=[
            {"role": "system", "content": "You are a fair and strict examiner."},
            {"role": "user", "content": prompt}
        ],
        response_format={"type": "json_object"}  # Force JSON output
    )
    
    feedback = json.loads(response.choices[0].message.content)
    return feedback

# Example usage
question = "Explain the causes of World War II."
student_answer = "Hitler invaded Poland because he wanted more land."

result = grade_student_answer(question, student_answer)
print(result)

{'score': 1, 'feedback': {'Correctness': 'The answer identifies a key event (the invasion of Poland) but lacks detail on why this invasion occurred and fails to mention other significant causes leading to World War II, such as the Treaty of Versailles, economic factors, or the rise of fascism.', 'Clarity': 'The explanation is very brief and lacks structure. It does not elaborate on the context of the statement, which makes it unclear to someone who might not be familiar with the history.', 'Completeness': 'The answer does not fully address the question. It provides one cause but neglects to mention other critical factors that contributed to the outbreak of World War II.'}}


In [4]:
# Example 2
question = "Explain the causes of World War II."
student_answer = """World War II was a conflict that involved virtually every part of the world during the years 1939–45. 
The principal belligerents were the Axis powers—Germany, Italy, and Japan—and the Allies—France, Great Britain, the United States, the Soviet Union, and, to a lesser extent, China.
It was the biggest conflict in history that had lasted almost six years. 
Nearly some 100 million people had been militarised, and 50 million had been killed (around 3% of the world's population).

The major causes of World War II were numerous. They include the impact of the Treaty of Versailles following WWI, the worldwide economic depression, failure of appeasement, the rise of militarism in Germany and Japan, and the failure of the League of Nations.
"""

result = grade_student_answer(question, student_answer)
print(result)

{'score': 8, 'feedback': {'Correctness': 4, 'Clarity': 2, 'Completeness': 2}, 'comments': {'Correctness': 'The answer accurately identifies the major belligerents and provides a correct list of significant causes of World War II, fulfilling the factual accuracy requirement.', 'Clarity': "While the initial context about World War II is informative, the structure could be improved. The transition from the conflict's overview to the causes feels abrupt. More clear delineation between general information and the causes would enhance clarity.", 'Completeness': 'While the answer lists several important causes, it lacks depth in explaining each cause. Expanding on how each factor contributed to the onset of the war would provide a fuller response.'}}


In [5]:
# Example for not strictly analysing the answer

LENIENT_GRADING_RUBRIC = """
You are a supportive teacher grading student answers. Evaluate the response generously:
1. **Effort** (0-5 points): Did the student attempt a meaningful answer?
2. **Partial Correctness** (0-3 points): Even partially correct ideas earn points.
3. **Clarity** (0-2 points): Basic readability is sufficient.

Question: "{question}"

Return JSON with 'score' (0-10) and 'feedback' (encouraging tone).
Example feedback: "Good attempt! You mentioned [X]. Next time, consider adding [Y]."
"""

def lenient_grade(question, student_answer):
    prompt = LENIENT_GRADING_RUBRIC.format(question=question) + f"\n\nStudent Answer: \"{student_answer}\""
    
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You are a supportive teacher."},
            {"role": "user", "content": prompt}
        ],
        response_format={"type": "json_object"}
    )
    
    return json.loads(response.choices[0].message.content)

# Example usage
question = "Explain the causes of World War II."
student_answer = "There was a war because of Germany."

result = lenient_grade(question, student_answer)
print(result)

{'score': 4, 'feedback': 'Good attempt! You mentioned Germany as a key factor, which is an important part of understanding World War II. Next time, consider adding more details about other causes like the Treaty of Versailles or the rise of totalitarian regimes to strengthen your answer.'}


In [6]:

# Example usage
question = "Explain the causes of World War II."
student_answer = "There was a war because of Nepal."

result = lenient_grade(question, student_answer)
print(result)

{'score': 2, 'feedback': "Thank you for your effort! You made an interesting point about Nepal. Next time, try to connect it with more widely recognized causes of World War II, like the Treaty of Versailles or the rise of totalitarian regimes. You're on the right track!"}


In [7]:
# Example usage
question = "What is the largest ocean in the world"
student_answer = "Indian Ocean"

result = lenient_grade(question, student_answer)
print(result)

{'score': 4, 'feedback': 'Good attempt! You mentioned the Indian Ocean, which is one of the major oceans. However, the largest ocean in the world is actually the Pacific Ocean. Next time, consider exploring the sizes of the different oceans to enhance your answer!'}


In [8]:
# Example usage
question = "What is the largest ocean in the world"
student_answer = "Tilicho"

result = lenient_grade(question, student_answer)
print(result)

{'score': 2, 'feedback': "Thank you for your response! While 'Tilicho' is a notable lake, the largest ocean in the world is actually the Pacific Ocean. Great effort in sharing your thoughts! Next time, you could explore ocean-related topics more deeply to strengthen your answers."}


In [9]:
# Example usage
question = "What is the largest ocean in the world"
student_answer = "America"

result = lenient_grade(question, student_answer)
print(result)

{'score': 2, 'feedback': "Thank you for your response! While you mentioned 'America,' it seems there might have been a mix-up regarding the question. The largest ocean in the world is actually the Pacific Ocean. Keep up the effort, and don't hesitate to explore more about oceans in your studies!"}


In [10]:
# Example usage
question = "What is the largest ocean in the world"
student_answer = "America"

result = grade_student_answer(question, student_answer)
print(result)

{'score': 1, 'feedback': {'Correctness': 'The answer is factually incorrect. America is not an ocean; the largest ocean in the world is the Pacific Ocean.', 'Clarity': 'The response lacks clarity, as it does not provide any context or explanation related to oceans.', 'Completeness': 'The answer does not address the question at all, failing to provide the name of the largest ocean or any relevant information.'}}
