# 02 - OpenAI Interview Response Evaluation
`Author: Abdlazeez Jimoh`

In [1]:
import json
from pprint import pprint

from openai import OpenAI
from typing_extensions import Literal, TypedDict

In [2]:
OPENAI_API_KEY = "YOUR_API_KEY"

In [3]:
class Question(TypedDict):
    question: str
    type: Literal["personal", "role-specific", "behavioral", "situational"]


class Evaluation(TypedDict):
    evaluation: Literal["good", "average", "bad"]
    feedback: str | None
    reason: str | None
    samples: list[str] | None

In [4]:

class OpenAIResponseEvaluationAgent():
    def __init__(self):
        super().__init__()
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        self.system_prompt = """You are an interviewer evaluating a candidate's response to an interview question. Your task is to:
- Evaluate the candidate's response on the scale of "good", "average", and "bad".
- Provide a reason for why it's categorized as good, average, or bad.
- Offer constructive feedback or suggestions for improvement.
- Provide 2 samples of good responses.

You will be provided with an interview question and a candidate response.

Evaluate and provide output in the following JSON format:
{
    "evaluation": "good, average, or bad",
    "reason": "Reason why it's good, average, or bad",
    "feedback": "Feedback or suggestions for improvement",
    "samples": [
        "<Good response 1>", 
        "<Good response 2>"
    ]
}"""
        self.user_prompt = """QUESTION:
{question}

RESPONSE: 
{response}"""

    def __call__(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or None if an error occurred.
        """

        # Generate questions
        evaluation = self._generate(question, response)

        return evaluation

    def run(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or None if an error occurred.
        """

        # Generate questions
        evaluation = self._generate(question, response)

        return evaluation

    def _generate(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or None if an error occurred.
        """

        try:
            output = self.client.chat.completions.create(
                model="gpt-3.5-turbo-1106",
                messages=[
                    {
                        "role": "system",
                        "content": self.system_prompt,
                    },
                    {
                        "role": "user",
                        "content": self.user_prompt.format(question=question, response=response),
                    },
                ],
                temperature=0.5,
                max_tokens=1024,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
            )

            questions = json.loads(output.choices[0].message.content or "{}")

            return questions
        except Exception as e:
            print(e)
            return None

In [5]:
response_evaluator = OpenAIResponseEvaluationAgent()
evaluation = response_evaluator.run(
    "What motivated you to pursue a career in software engineering at a startup in San Francisco?",
    "I've always been interested in technology and startups. I've been working in the tech industry \
    for the past 5 years and I've always wanted to work at a startup. I'm really excited about the \
    opportunity to work at a startup in San Francisco."
)

pprint(evaluation)

{'evaluation': 'average',
 'feedback': 'The candidate should elaborate on what specifically about '
             'startups and technology excites them. They could provide '
             'examples of successful startups they admire or specific '
             'technologies they are passionate about.',
 'reason': "The response provides a basic explanation of the candidate's "
           'interest in technology and startups, but it lacks depth and '
           'specific details.',
 'samples': ["I've always been fascinated by the innovative and fast-paced "
             "environment of startups, and I'm particularly drawn to the "
             "culture of creativity and problem-solving. I've closely followed "
             "the success stories of companies like Airbnb and Uber, and I'm "
             'eager to contribute to a similar journey of growth and '
             'innovation at a startup in San Francisco.',
             'My passion for software engineering stems from a desire to '
  

In [6]:
evaluation = response_evaluator.run(
    "How do you prioritize tasks when leading a team of data scientists and engineers on multiple projects?",
    "I prioritize tasks by importance and urgency. I also make sure to communicate with my team about what needs to be done and when it needs to be done. I also make sure to communicate with my team about what needs to be done and when it needs to be done.",
)

pprint(evaluation)

{'evaluation': 'average',
 'feedback': 'The candidate should provide more specific examples of how they '
             'assess importance and urgency, as well as how they communicate '
             'and coordinate with the team to ensure tasks are completed '
             'efficiently. Additionally, they should consider mentioning any '
             'tools or frameworks they use to aid in task prioritization.',
 'reason': "The candidate's response demonstrates a basic understanding of "
           'task prioritization but lacks depth and specific strategies.',
 'samples': ['I prioritize tasks by first assessing the impact of each task on '
             'our project goals and deadlines. I then consider the urgency of '
             'each task by evaluating the potential consequences of not '
             'completing it on time. To ensure efficient completion, I '
             'regularly meet with the team to discuss priorities, potential '
             'roadblocks, and resource allocati