# Automating Interviews

## Importing requisities

In [None]:
%pip install langchain google-generativeai pydantic

In [36]:
from pydantic import BaseModel, Field
from typing import Dict, List, Optional, Any
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.messages import HumanMessage
from PIL import Image
import os
import base64
from pathlib import Path
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import ChatPromptTemplate
import json
import fitz  
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
api_key = os.getenv('GOOGLE_API_KEY')

## Pydantic Class

In [None]:
class PersonalInfo(BaseModel):
    name: str = Field(None, description='Name of the candidate')
    email: str = Field(None, description='Email ID of the candidate')
    phone: str = Field(None, description='Contact number of the candidate')
    address: str = Field(None, description='Location (city or state)')

class Education(BaseModel):
    institution: str = Field(None, description='Name of school, college, or university')
    degree: str = Field(None, description='Degree obtained (e.g., Bachelor’s, Master’s, etc.)')
    field_of_study: str = Field(None, description='Major or field of study')
    start_date: str = Field(None, description='Start date of the course (format: YYYY-MM)')
    end_date: str = Field(None, description='End date of the course (format: YYYY-MM)')

class Experience(BaseModel):
    organization: str = Field(None, description='Name of the company')
    role: str = Field(None, description='Position at the company')
    responsibilities: str = Field(None, description='Projects or work description')

class Project(BaseModel):
    title: Optional[str] = Field(None, description='Title of the project')
    outcome: Optional[str] = Field(None, description='Result or outcome of the project')
    approach: Optional[str] = Field(None, description='Methodology or approach used in the project')


class Resume(BaseModel):
    personal_info: PersonalInfo = Field(None, description='Personal details of the candidate')
    educational_qualifications:List[Education] = Field(default_factory=list, description='List of educational qualifications')
    skills: List[str] = Field(description='List of skills relevant to the candidate’s expertise')
    projects: List[Project] = Field(description='List of projects the candidate has worked on')
    


In [None]:
# initializing model
model = ChatGoogleGenerativeAI(model='gemini-2.0-flash')

# pydantic parser
parser = PydanticOutputParser(pydantic_object=Resume)

## Resume Parsing - Image as Input

In [22]:
job_description = """
Job Responsibilities:
Algorithm Development: Assist in the design, implementation, and testing of machine learning models to improve and extend our AI tools.
Data Analysis: Participate in the collection, cleaning, and analysis of data to inform model adjustments and new feature development.
Research: Stay abreast of the latest advancements in machine learning and data science, suggesting applications for new technologies and methodologies within our projects.
Collaboration: Work closely with our AI and product teams to integrate machine learning models seamlessly into our platform, enhancing user experience and product value.
Performance Monitoring: Monitor the performance of our AI tools, conducting rigorous testing and making recommendations for enhancements based on your findings.
Innovation: Contribute to brainstorming sessions aimed at identifying new opportunities for AI application within our services.
"""

In [None]:
# loading images and encoding
def encode_image(image_path):
    with open(image_path, 'rb') as image_file:
        return base64.b64encode(image_file.read()).decode('utf-8')
# prompt template
resume_prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the resume information in structured JSON format as per the provided schema.\n'{format_instructions}'\n"),
    ("human", [
        {
            "type": "text",
            "text": "Extract structured resume data from this image.",
        },
        {
            "type": "image_url",
            "image_url": {"url": "data:image/jpeg;base64,{image_data}"},
        },
    ]),
])

# prompt for most relevant project selection
extract_relevant_project_prompt = ChatPromptTemplate.from_messages([
    ('system', 'Based on the provided job description, select a project which is most relevant to the roles and responsbilities provided in the job description'),
    ('human', [
        {'type': 'text', 'text': 'Job description: {job_description} Projects: {projects}. Return the title of the most relevant project'}
    ])
])
def process_resume_with_job_description(resume_images, job_description):
    final_result = {}
    for image_path in resume_images:
        image_data = encode_image(image_path)
        resume_data = (resume_prompt | model | parser).invoke({
            "format_instructions": parser.get_format_instructions(),
            "image_data": image_data
        })
    projects = resume_data.projects
    if projects:
        project_list = '\n'.join([f'Title: {proj.title}, Outcome: {proj.outcome}, Approach: {proj.approach}' for proj in projects if proj.title])
        relevant_project_response = (extract_relevant_project_prompt | model).invoke({
                "job_description": job_description,
                "projects": project_list
            })
        relevant_project_title = relevant_project_response.content.strip()
        relevant_project = next((proj for proj in projects if proj.title == relevant_project_title), None)
    else:
        relevant_project = None

    final_result = {
        "personal_info": resume_data.personal_info.model_dump() if resume_data.personal_info else {},
        "educational_qualifications": [edu.model_dump() for edu in resume_data.educational_qualifications] if resume_data.educational_qualifications else [],
        "skills": resume_data.skills if resume_data.skills else [],
        "most_relevant_project": relevant_project.model_dump() if relevant_project else {}
    }

    return final_result


resume_images = ["resume/page 1.png", "resume/page 2.png"]


final_output = process_resume_with_job_description(resume_images, job_description)
print(json.dumps(final_output,indent=4))

{
    "personal_info": {},
    "educational_qualifications": [],
    "skills": [
        "Programming: Python, R, SQL",
        "Machine Learning",
        "Deep Learning",
        "Natural Language Processing",
        "Big Data: Spark",
        "Data Handling: Web Scraping, Data Manipulation",
        "Statistics: Hypothesis Testing, Regression Analysis"
    ],
    "most_relevant_project": {}
}


## Resume Parsing - PDF as input

In [24]:
def extract_text_from_pdf(pdf_path):
    document = fitz.open(pdf_path)
    text = ""
    for page in document:
        text += page.get_text()
    document.close()
    return text

resume_prompt = ChatPromptTemplate.from_messages([
    ("system", "Extract the resume information in structured JSON format as per the provided schema.\n'{format_instructions}'\n"),
    ("human", "Extract structured resume data from this text:\n\n{text}"),
])

project_prompt_template = ChatPromptTemplate.from_messages([
    ("system", "Based on the provided job description, select the most relevant project from the list."),
    ("human", "Job Description:\n{job_description}\n\nProjects:\n{projects}\n\nReturn the title of the most relevant project."),
])

def process_resume_with_job_description(pdf_path, job_description):
    extracted_text = extract_text_from_pdf(pdf_path)
    resume_data = (resume_prompt | model | parser).invoke({
        "format_instructions": parser.get_format_instructions(),
        "text": extracted_text
    })

    projects = resume_data.projects
    if projects:
        project_list = "\n".join([f"Title: {proj.title}, Outcome: {proj.outcome}, Approach: {proj.approach}" for proj in projects if proj.title])
    
        relevant_project_response = (project_prompt_template | model).invoke({
            "job_description": job_description,
            "projects": project_list
        })
        
        relevant_project_title = relevant_project_response.content.strip()
        relevant_project = next((proj for proj in projects if proj.title == relevant_project_title), None)
    else:
        relevant_project = None

    final_result = {
       "personal_info": resume_data.personal_info.model_dump() if resume_data.personal_info else {},
"educational_qualifications": [edu.model_dump() for edu in resume_data.educational_qualifications] if resume_data.educational_qualifications else [],
"most_relevant_project": relevant_project.model_dump() if relevant_project else {}

    }
    return final_result



In [27]:
result = process_resume_with_job_description('resume/cv2.pdf',job_description)

In [28]:
print(json.dumps(result, indent=4))

{
    "personal_info": {
        "name": "SPOORTHY S SWAMY",
        "email": "spoorthysswamy@gmail.com",
        "phone": "6362288247",
        "address": "Bangalore, Karnataka"
    },
    "educational_qualifications": [
        {
            "institution": "NMKRV College for Women, Jayanagar, Bangalore",
            "degree": "M.Sc. in Data Science",
            "field_of_study": null,
            "start_date": "2022",
            "end_date": "present"
        },
        {
            "institution": "The National College, Jayanagar, Bangalore",
            "degree": "B.Sc. in PMC",
            "field_of_study": null,
            "start_date": "2018",
            "end_date": "2021"
        },
        {
            "institution": "The Presidency PU College, Sira, Tumkur",
            "degree": null,
            "field_of_study": null,
            "start_date": "2016",
            "end_date": "2018"
        },
        {
            "institution": "The Presidency Public School, Sira, Tum

## Project Interview

In [None]:
def conduct_project_interview(selected_project: Dict[str, Any], job_description: str, max_questions: int = 5) -> Dict[str, Any]:
    conversation_history = []
    system_prompt = """
    You are an expert technical interviewer. Generate relevant follow-up questions based on candidate responses using the
    'peeling the onion' technique. Focus on techniques used, problem-solving, and project impact. Keep the responses natural and contextual.
    Ask one question at a time.
    """

    # initiating the interview - prompt for initial question
    initial_prompt_template = ChatPromptTemplate.from_messages([
        ("system", system_prompt),
        ("human", 
         "Project Details: {project_details}\n\n"
         "Job Description: {job_description}\n\n"
         "Generate an initial open-ended question about this project that allows the candidate to explain their role and contribution. "
         "The question should be conversational and natural."
        ),
    ])

    # generation - first question
    result = (initial_prompt_template | model).invoke({
        "project_details": json.dumps(selected_project),
        "job_description": job_description
    })
    
    initial_question = result.content.strip()
    conversation_history.append({'role': 'interviewer', 'question': initial_question})

    # follow up questions
    for i in range(max_questions - 1):
        print(f"\nInterviewer: {conversation_history[-1]['question']}")
        candidate_response = input("Candidate response (or type 'exit' to end): ").strip()
        # empty input?
        while not candidate_response:
            candidate_response = input("Please provide a response or type 'exit' to end: ").strip()
        if candidate_response.lower() == 'exit':
            break
        conversation_history.append({"role": "candidate", "response": candidate_response})
        # follow up template question
        follow_up_prompt_template = ChatPromptTemplate.from_messages([
            ("system", system_prompt),
            ("human", 
             "Project Details: {project_details}\n\n"
             "Job Description: {job_description}\n\n"
             "Previous Conversation:\n{conversation_history}\n\n"
             "Generate a natural follow-up question that digs deeper into:\n"
             "1. Technical challenges and solutions\n"
             "2. Decision-making process\n"
             "3. Impact and outcomes\n"
             "4. Role and responsibilities\n\n"
             "The question should flow naturally from the candidate's last response."
            ),
        ])
        # follow up question generation
        follow_up_result = (follow_up_prompt_template | model).invoke({
            "project_details": json.dumps(selected_project),
            "job_description": job_description,
            "conversation_history": json.dumps(conversation_history, indent=2)
        })
        follow_up_question = follow_up_result.content.strip()
        conversation_history.append({"role": "interviewer", "question": follow_up_question})

    # concluding interview
    print("\nInterviewer: Thank you for sharing your insights on this project. Lets proceed to the next part of our interview")

    # output
    return {
        "project_name": selected_project.get("title", "Unknown Project"),
        "conversation_history": conversation_history,
        "total_questions": len([x for x in conversation_history if x["role"] == "interviewer"]),
        "duration": len(conversation_history)
    }


selected_project = result.get("most_relevant_project", {})

if selected_project:
    interview_result = conduct_project_interview(selected_project, job_description)

# history
    for exchange in interview_result['conversation_history']:
        role = 'Interviewer' if exchange['role'] == 'interviewer' else 'Candidate'
        print(f"{role}: {exchange.get('question') or exchange.get('response')}")
else:
    print("No relevant project found...")


Interviewer: So, tell me about your experience working on the healthcare chatbot project.  What was your specific role, and what were some of your key contributions?

Interviewer: That's a crucial aspect of building a robust chatbot.  Can you elaborate on a specific challenge you faced during the data cleaning and preprocessing stage, and walk me through your decision-making process in choosing a solution?  I'm particularly interested in the trade-offs you considered.

Interviewer: Okay, so you used schema validation to identify missing medical terms. That's a good approach.  Can you explain how you handled those missing values after identifying them? Did you simply remove the entries, impute values, or use a different strategy? And what were the considerations that led to your chosen method – particularly regarding the potential impact on the model's accuracy and the chatbot's overall performance?

Interviewer: What specific imputation technique did you choose (e.g., mean, median, mo