# Interview Response Evaluation
`Author: Abdlazeez Jimoh`

In [1]:
import json
from pprint import pprint
from types import ModuleType
from typing import Any, Literal

import cohere  # type: ignore
import google.generativeai as GooglePalm  # type: ignore
from openai import OpenAI
from openai.types.chat.chat_completion import ChatCompletion
from pydantic import BaseModel

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
COHERE_API_KEY = "YOUR_COHERE_API_KEY"
OPENAI_API_KEY = "YOUR_OPENAI_API_KEY"
PALM_API_KEY = "YOUR_PALM_API_KEY"

In [3]:
class Question(BaseModel):
    question: str
    type: Literal["personal", "role-specific", "behavioral", "situational"]


class Evaluation(BaseModel):
    evaluation: Literal["good", "average", "bad"]
    feedback: str | None
    reason: str | None
    samples: list[str] | None

## OpenAI Interview Response Evaluation

In [4]:
class OpenAIResponseEvaluationAgent:
    def __init__(self) -> None:
        self.client = OpenAI(api_key=OPENAI_API_KEY)
        self.system_prompt = """You are an interviewer evaluating a \
candidate's response to an interview question. Your task is to:
- Evaluate the candidate's response on the scale of "good", "average", and "bad".
- Provide a reason for why it's categorized as good, average, or bad.
- Offer constructive feedback or suggestions for improvement.
- Provide 2 samples of good responses.

You will be provided with an interview question and a candidate response.

Evaluate and provide output in the following JSON format:
{{
    "evaluation": "good, average, or bad",
    "reason": "Reason why it's good, average, or bad",
    "feedback": "Feedback or suggestions for improvement",
    "samples": [
        "<Good response 1>", 
        "<Good response 2>"
    ]
}}"""
        self.user_prompt = """QUESTION:
{question}

RESPONSE: 
{response}"""

    def __call__(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or \
                None if an error occurred.
        """

        # Generate questions
        evaluation: Evaluation | None = self._generate(question, response)

        return evaluation

    def run(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or \
                None if an error occurred.
        """

        # Generate questions
        evaluation: Evaluation | None = self._generate(question, response)

        return evaluation

    def _generate(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or \
                None if an error occurred.
        """

        try:
            output: ChatCompletion = self.client.chat.completions.create(
                model="gpt-3.5-turbo-1106",
                messages=[
                    {
                        "role": "system",
                        "content": self.system_prompt,
                    },
                    {
                        "role": "user",
                        "content": self.user_prompt.format(
                            question=question, response=response
                        ),
                    },
                ],
                temperature=0.5,
                max_tokens=1024,
                top_p=1,
                frequency_penalty=0,
                presence_penalty=0,
            )

            if not output.choices[0].message.content:
                return None

            questions: Evaluation = json.loads(output.choices[0].message.content)

            return questions
        except Exception:
            return None

In [5]:
response_evaluator = OpenAIResponseEvaluationAgent()
evaluation: Evaluation | None = response_evaluator.run(
    "What motivated you to pursue a career in software engineering at a "
    "startup in San Francisco?",
    "I've always been interested in technology and startups. I've been "
    "working in the tech industry for the past 5 years and I've always "
    "wanted to work at a startup. I'm really excited about the "
    "opportunity to work at a startup in San Francisco.",
)

pprint(evaluation)

{'evaluation': 'average',
 'feedback': 'The candidate should provide more specific reasons for their '
             'interest in working at a startup in San Francisco. They could '
             'talk about the innovative environment, the opportunity to make a '
             'big impact, or their passion for working with cutting-edge '
             'technologies.',
 'reason': "The candidate's response shows a basic level of interest in "
           'technology and startups, but it lacks specific details or personal '
           'motivations.',
 'samples': ["I've always been drawn to the fast-paced and innovative "
             "environment of startups, and I'm particularly excited about the "
             'opportunity to work in San Francisco, known for its tech culture '
             'and vibrant startup scene.',
             "I'm motivated to pursue a career in software engineering at a "
             'startup in San Francisco because I thrive in dynamic and agile '
             "work

In [6]:
evaluation: Evaluation | None = response_evaluator.run(
    "How do you prioritize tasks when leading a team of data scientists "
    "and engineers on multiple projects?",
    "I prioritize tasks by importance and urgency. I also make sure to "
    "communicate with my team about what needs to be done and when it "
    "needs to be done. I also make sure to communicate with my team about "
    "what needs to be done and when it needs to be done.",
)

pprint(evaluation)

{'evaluation': 'average',
 'feedback': 'The candidate should provide more specific examples of how they '
             'prioritize tasks, such as using tools like the Eisenhower Matrix '
             'or Agile methodologies. They should also mention considering '
             "team members' strengths and workload when delegating tasks.",
 'reason': "The candidate's response demonstrates a basic understanding of "
           'task prioritization but lacks depth and specific examples.',
 'samples': ['I prioritize tasks by evaluating their impact on project '
             'deadlines, resource availability, and strategic goals. For '
             'example, if a task directly impacts a critical project '
             'milestone, it receives top priority. I also consider team '
             "members' expertise and workload when assigning tasks to ensure a "
             'balanced distribution of work.',
             'When leading a team on multiple projects, I prioritize tasks by '
         

## Google Palm Interview Response Evaluation

In [7]:
class PalmResponseEvaluationAgent:
    def __init__(self) -> None:
        self.client: ModuleType = GooglePalm
        self.client.configure(api_key=PALM_API_KEY)  # type: ignore
        self.system_prompt = """You are an interviewer evaluating a \
candidate's response to an interview question. Your task is to:
- Evaluate the candidate's response on the scale of "good", "average", and "bad".
- Provide a reason for why it's categorized as good, average, or bad.
- Offer constructive feedback or suggestions for improvement.
- Provide 2 samples of good responses.

You will be provided with an interview question and a candidate response.

Evaluate and provide output in the following JSON format:
{{
    "evaluation": "good, average, or bad",
    "reason": "Reason why it's good, average, or bad",
    "feedback": "Feedback or suggestions for improvement",
    "samples": [
        "Good response 1", 
        "Good response 2"
    ]
}}


===
QUESTION:
{question}

RESPONSE: 
{response}"""

    def __call__(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or \
                None if an error occurred.
        """

        # Generate questions
        evaluation: Evaluation | None = self._generate(question, response)

        return evaluation

    def run(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or \
                None if an error occurred.
        """

        # Generate questions
        evaluation: Evaluation | None = self._generate(question, response)

        return evaluation

    def _generate(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or \
                None if an error occurred.
        """

        try:
            output: Any = self.client.generate_text(
                model="models/text-bison-001",
                prompt=self.system_prompt.format(question=question, response=response),
                temperature=1,
                max_output_tokens=1024,
            )

            if not output.result:
                return None

            evaluations: Evaluation = json.loads(output.result)

            return evaluations
        except Exception:
            return None

In [8]:
response_evaluator = PalmResponseEvaluationAgent()
evaluation: Evaluation | None = response_evaluator.run(
    "What motivated you to pursue a career in software engineering at a "
    "startup in San Francisco?",
    "I've always been interested in technology and startups. I've been "
    "working in the tech industry for the past 5 years and I've always "
    "wanted to work at a startup. I'm really excited about the "
    "opportunity to work at a startup in San Francisco.",
)

pprint(evaluation)

{'evaluation': 'good',
 'feedback': 'None',
 'reason': 'The response is well-written and provides a clear and concise '
           'answer to the question. The candidate also provides some '
           'additional information about their experience in the tech industry '
           'and their excitement about the opportunity to work at a startup in '
           'San Francisco.',
 'samples': ["I've always been passionate about technology and I'm excited "
             'about the opportunity to work at a startup where I can have a '
             'direct impact on the product.',
             "I'm looking for a fast-paced environment where I can learn and "
             'grow quickly. I believe that a startup in San Francisco is the '
             'best place for me to achieve my career goals.']}


In [9]:
evaluation: Evaluation | None = response_evaluator.run(
    "How do you prioritize tasks when leading a team of data scientists "
    "and engineers on multiple projects?",
    "I prioritize tasks by importance and urgency. I also make sure to "
    "communicate with my team about what needs to be done and when it "
    "needs to be done. I also make sure to communicate with my team about "
    "what needs to be done and when it needs to be done.",
)

pprint(evaluation)

{'evaluation': 'good',
 'feedback': 'The candidate could provide more detail on how they specifically '
             'prioritize tasks. For example, they could mention using a '
             'specific prioritization framework or tool.',
 'reason': 'The candidate provides a clear and concise answer that addresses '
           'the question. They also mention the importance of communication, '
           'which is a key skill for any leader.',
 'samples': ['I prioritize tasks by using a prioritization framework that '
             'takes into account the importance, urgency, and feasibility of '
             'each task. I also make sure to communicate with my team about '
             'what needs to be done and when it needs to be done.',
             'I use a Kanban board to visualize my tasks and track their '
             "progress. This helps me to stay organized and ensures that I'm "
             'not missing any important deadlines.']}


## Cohere Interview Response Evaluation

In [10]:
class CohereResponseEvaluationAgent:
    def __init__(self) -> None:
        self.client: Any = cohere.Client(api_key=COHERE_API_KEY)  # type: ignore
        self.system_prompt = """You are an interviewer evaluating a \
candidate's response to an interview question. Your task is to:
- Evaluate the candidate's response on the scale of "good", "average", and "bad".
- Provide a reason for why it's categorized as good, average, or bad.
- Offer constructive feedback or suggestions for improvement.
- Provide 2 samples of good responses.

You will be provided with an interview question and a candidate response.

Evaluate and provide output in the following JSON format:
{{
    "evaluation": "good, average, or bad",
    "reason": "Reason why it's good, average, or bad",
    "feedback": "Feedback or suggestions for improvement",
    "samples": [
        "Good response 1", 
        "Good response 2"
    ]
}}


===
QUESTION:
{question}

RESPONSE: 
{response}"""

    def __call__(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or \
                None if an error occurred.
        """

        # Generate questions
        evaluation: Evaluation | None = self._generate(question, response)

        return evaluation

    def run(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or \
                None if an error occurred.
        """

        # Generate questions
        evaluation: Evaluation | None = self._generate(question, response)

        return evaluation

    def _generate(self, question: str, response: str) -> Evaluation | None:
        """
        Evaluate a candidate's response to an interview question.

        Args:
            question (str): The interview question.
            response (str): The candidate's response.

        Returns:
            Evaluation | None: The evaluation of the candidate's response or \
                None if an error occurred.
        """

        try:
            output: Any = self.client.generate(
                model="command",
                prompt=self.system_prompt.format(question=question, response=response),
                temperature=1,
                max_tokens=1024,
                stream=False,
            )

            if not output.generations:
                return None

            if not output.generations[0].text:
                return None

            evaluations: Evaluation = json.loads(output.generations[0].text)

            return evaluations
        except Exception:
            return None

In [11]:
response_evaluator = PalmResponseEvaluationAgent()
evaluation: Evaluation | None = response_evaluator.run(
    "What motivated you to pursue a career in software engineering at a "
    "startup in San Francisco?",
    "I've always been interested in technology and startups. I've been "
    "working in the tech industry for the past 5 years and I've always "
    "wanted to work at a startup. I'm really excited about the "
    "opportunity to work at a startup in San Francisco.",
)

pprint(evaluation)

{'evaluation': 'good',
 'feedback': 'The response could be improved by providing more details about '
             "the candidate's experience in the tech industry and why they are "
             'excited about the opportunity to work at a startup in San '
             'Francisco.',
 'reason': 'The response is well-written and provides a clear and concise '
           'answer to the question.',
 'samples': ["I've always been passionate about technology and startups. I've "
             "been working in the tech industry for the past 5 years, and I've "
             "always wanted to work at a startup because I'm drawn to the "
             "fast-paced, innovative environment. I'm also excited about the "
             'opportunity to work in San Francisco, which is a hub for tech '
             'startups.',
             "I'm a software engineer with a strong track record of success in "
             "the tech industry. I've worked at a variety of startups, and I'm "
             "excite

In [12]:
evaluation: Evaluation | None = response_evaluator.run(
    "How do you prioritize tasks when leading a team of data scientists "
    "and engineers on multiple projects?",
    "I prioritize tasks by importance and urgency. I also make sure to "
    "communicate with my team about what needs to be done and when it "
    "needs to be done. I also make sure to communicate with my team about "
    "what needs to be done and when it needs to be done.",
)

pprint(evaluation)

{'evaluation': 'good',
 'feedback': 'The candidate could provide more detail on how they specifically '
             'prioritize tasks and communicate with their team.',
 'reason': 'The candidate provides a clear and concise answer that '
           'demonstrates their understanding of the importance of '
           'prioritization and communication.',
 'samples': ['I prioritize tasks based on a number of factors, including the '
             'impact of the task, the urgency of the task, and the '
             'availability of resources. I also make sure to communicate with '
             'my team regularly to ensure that everyone is on the same page '
             'and that we are working towards the same goals.',
             'I use a number of tools and techniques to help me prioritize '
             'tasks, including a Kanban board, a project management software, '
             'and regular meetings with my team. I also make sure to stay '
             'up-to-date on the latest ind