# Automatic Grader using LLMs

### import libraries

In [1]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.schema import BaseOutputParser

In [2]:
# import key
from dotenv import load_dotenv
import os

In [3]:
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

### define chain

In [4]:
# define chatbot
chat_model = ChatOpenAI(openai_api_key=api_key, temperature=0)
chat_model

ChatOpenAI(client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, temperature=0.0, openai_api_key='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', openai_proxy='')

In [5]:
# define prompt template
prompt_template_text = """You are a high school history teacher grading homework assignments. \
Based on the homework question indicated by “**Q:**” and the correct answer indicated by “**A:**”, your task is to determine whether the student's answer is correct. \
Grading is binary; therefore, student answers can be correct or wrong. \
Simple misspellings are okay.

**Q:** {question}
**A:** {correct_answer}

**Student's Answer:** {student_answer}
"""

prompt = PromptTemplate(input_variables=["question", "correct_answer", "student_answer"], template = prompt_template_text)

In [6]:
# define chain
chain = LLMChain(
    llm=chat_model,
    prompt=prompt,
)

### use chain

In [9]:
# define inputs
question = "Who was the 35th president of the United States of America?"
correct_answer = "John F. Kennedy"
student_answer =  "JFK"


In [10]:
# invoke chain
chain.invoke({'question':question, 'correct_answer':correct_answer, 'student_answer':student_answer})

{'question': 'Who was the 35th president of the United States of America?',
 'correct_answer': 'John F. Kennedy',
 'student_answer': 'JFK',
 'text': "Grade: Correct\n\nThe student's answer, JFK, is an abbreviation for John F. Kennedy, who was indeed the 35th president of the United States of America."}

In [11]:
# run chain
chain.run({'question':question, 'correct_answer':correct_answer, 'student_answer':student_answer})

"Grade: Correct\n\nThe student's answer, JFK, is an abbreviation for John F. Kennedy, who was indeed the 35th president of the United States of America."

In [12]:
# run chain in for loop
student_answer_list = ["John F. Kennedy", "JFK", "FDR", "John F. Kenedy", "John Kennedy", "Jack Kennedy", "Jacqueline Kennedy", "Robert F. Kenedy"]

for student_answer in student_answer_list:
    print(student_answer + " - " + str(chain.run({'question':question, 'correct_answer':correct_answer, 'student_answer':student_answer})))
    print('\n')

John F. Kennedy - Correct


JFK - Grade: Correct

The student's answer, JFK, is an abbreviation for John F. Kennedy, who was indeed the 35th president of the United States of America.


FDR - Student's Answer is wrong.


John F. Kenedy - Correct


John Kennedy - Correct


Jack Kennedy - Correct


Jacqueline Kennedy - Student's Answer: Jacqueline Kennedy

Grade: Wrong

Explanation: The student's answer is incorrect. The 35th president of the United States of America was John F. Kennedy, not Jacqueline Kennedy.


Robert F. Kenedy - Student's Answer: Robert F. Kenedy

Grade: Wrong

Explanation: The correct answer is John F. Kennedy, not Robert F. Kenedy.




### add output parser

In [13]:
# define output parser
class GradeOutputParser(BaseOutputParser):
    """Determine whether grade was correct or wrong"""

    def parse(self, text: str):
        """Parse the output of an LLM call."""
        return "wrong" not in text.lower()

In [14]:
# update chain
chain = LLMChain(
    llm=chat_model,
    prompt=prompt,
    output_parser=GradeOutputParser()
)

In [15]:
# grade student answers
for student_answer in student_answer_list:
    print(student_answer + " - " + str(chain.run({'question':question, 'correct_answer':correct_answer, 'student_answer':student_answer})))

John F. Kennedy - True
JFK - True
FDR - False
John F. Kenedy - True
John Kennedy - True
Jack Kennedy - True
Jacqueline Kennedy - False
Robert F. Kenedy - False
