# 02 - Interview Response Evaluation
`Author: Abdlazeez Jimoh`

In [9]:
import json
from typing import Optional

from openai import OpenAI

from lib.agents import BaseAgent
from lib.configs import OPENAI_API_KEY
from lib.types import Evaluation


class OpenAIResponseEvaluationAgent(BaseAgent):
    def __init__(self):
        super().__init__()
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        self.system_prompt = """You are an interviewer evaluating a candidate's response to an interview question. Your task is to:
- Evaluate the candidate's response on the scale of "good", "average", and "bad".
- Provide a reason for why it's categorized as good, average, or bad.
- Offer constructive feedback or suggestions for improvement.
- Provide 2 samples of good responses.

You will be provided with an interview question and a candidate response.

Evaluate and provide output in the following JSON format:
{
    "evaluation": "good, average, or bad",
    "reason": "Reason why it's good, average, or bad",
    "feedback": "Feedback or suggestions for improvement",
    "samples": [
        "<Good response 1>", 
        "<Good response 2>"
    ]
}"""
        self.user_prompt = """QUESTION:
{question}

RESPONSE: 
{response}"""

    def __call__(self, question: str, response: str) -> Optional[Evaluation]:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Optional[Evaluation]: The evaluation of the candidate's response or None if an error occurred.
        """

        # Generate questions
        evaluation = self._generate(question, response)

        return evaluation

    def run(self, question: str, response: str) -> Optional[Evaluation]:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Optional[Evaluation]: The evaluation of the candidate's response or None if an error occurred.
        """

        # Generate questions
        evaluation = self._generate(question, response)

        return evaluation

    def _generate(self, question: str, response: str) -> Optional[Evaluation]:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Optional[Evaluation]: The evaluation of the candidate's response or None if an error occurred.
        """

        try:
            output = self.client.chat.completions.create(
                model="gpt-3.5-turbo-1106",
                messages=[
                    {
                        "role": "system",
                        "content": self.system_prompt,
                    },
                    {
                        "role": "user",
                        "content": self.user_prompt.format(question=question, response=response),
                    },
                ],
                temperature=0.5,
                max_tokens=1024,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
            )

            questions = json.loads(output.choices[0].message.content or "{}")

            return questions
        except Exception as e:
            print(e)
            return None

In [10]:
from pprint import pprint


response_evaluator = OpenAIResponseEvaluationAgent()
evaluation = response_evaluator.run(
    "What motivated you to pursue a career in software engineering at a startup in San Francisco?",
    "I've always been interested in technology and startups. I've been working in the tech industry for the past 5 years and I've always wanted to work at a startup. I'm really excited about the opportunity to work at a startup in San Francisco."
)

pprint(evaluation)

{'evaluation': 'average',
 'feedback': 'The candidate could improve the response by including specific '
             'experiences or projects that sparked their interest in startups '
             'and technology. Sharing personal anecdotes or examples of '
             'successful projects in the tech industry would make the response '
             'more compelling.',
 'reason': "The response provides a basic explanation of the candidate's "
           'interest in technology and startups, but it lacks specific details '
           'or personal anecdotes.',
 'samples': ["I've always been fascinated by the fast-paced and innovative "
             'environment of startups, and I believe that my passion for '
             'technology and problem-solving aligns perfectly with the culture '
             'of a startup. For instance, during my previous role, I had the '
             'opportunity to work on a project that involved developing a '
             'cutting-edge software solution f

In [12]:
evaluation = response_evaluator.run(
    "How do you prioritize tasks when leading a team of data scientists and engineers on multiple projects?",
    "I prioritize tasks by importance and urgency. I also make sure to communicate with my team about what needs to be done and when it needs to be done. I also make sure to communicate with my team about what needs to be done and when it needs to be done.",
)

pprint(evaluation)

{'evaluation': 'average',
 'feedback': 'The candidate should provide more detailed examples of how they '
             'prioritize tasks, such as using specific project management '
             'tools or techniques. They should also demonstrate an '
             'understanding of balancing the skills and workload of their team '
             'members. Additionally, the candidate should avoid repeating the '
             'same point in their response.',
 'reason': "The candidate's response shows a basic understanding of task "
           'prioritization but lacks depth and specific examples.',
 'samples': ["I prioritize tasks based on the project's overall impact on our "
             'goals and deadlines. I also take into consideration the skills '
             'and availability of my team members to ensure a balanced '
             'workload. For example, for urgent tasks, I may reassign '
             'resources or adjust timelines to meet the deadline.',
             'When leading 