<a href="https://colab.research.google.com/github/shardul2512/AI-Interview-coach/blob/main/AI_Coding_Interview_Agent_(Python).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import json
import sys # Added for Colab check
from dotenv import load_dotenv
from typing import List, Optional

# Langchain components
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
# Use pydantic.v1 for compatibility
from pydantic.v1 import BaseModel, Field, validator
from langchain_community.document_loaders import PyPDFLoader
# from langchain.text_splitter import RecursiveCharacterTextSplitter # Optional

# --- Pydantic Models for Structured Output ---

class Skill(BaseModel):
    """Represents a single skill extracted from the resume."""
    name: str = Field(description="Name of the skill (e.g., Python, React, SQL)")
    category: Optional[str] = Field(description="Category (e.g., Language, Framework, Tool, Database)", default="Uncategorized")

class Project(BaseModel):
    """Represents a project described in the resume."""
    name: str = Field(description="Name of the project")
    description: str = Field(description="Brief description of the project and the candidate's role/contributions")
    technologies: Optional[List[str]] = Field(description="List of key technologies used", default=[])

class WorkExperience(BaseModel):
    """Represents a work experience entry from the resume."""
    company: str = Field(description="Company name")
    role: str = Field(description="Job title/role")
    duration: Optional[str] = Field(description="Dates of employment (e.g., 'Jan 2020 - Dec 2022')", default="N/A")
    responsibilities: Optional[List[str]] = Field(description="List of key responsibilities or achievements", default=[])

class ResumeData(BaseModel):
    """Overall structure for the parsed resume data."""
    summary: Optional[str] = Field(description="Brief professional summary if available, otherwise empty string", default="")
    skills: Optional[List[Skill]] = Field(description="List of technical skills", default=[])
    projects: Optional[List[Project]] = Field(description="List of personal or academic projects", default=[])
    work_experience: Optional[List[WorkExperience]] = Field(description="List of professional work experiences", default=[])
    candidate_name: Optional[str] = Field(description="Name of the candidate if found", default="Candidate") # Added field

    # Add validator to handle potential None values during parsing if needed
    @validator('skills', 'projects', 'work_experience', pre=True, always=True)
    def ensure_list(cls, v):
        return v if v is not None else []

# --- Core Functions ---

# Updated load_api_key function
def load_api_key():
    """Loads the Google API key, checking .env file first, then Colab secrets."""
    api_key = None
    try:
        load_dotenv()
        api_key = os.getenv("GOOGLE_API_KEY")
    except Exception as e:
        print(f"Note: Error loading .env file (this is expected in Colab if no .env exists): {e}")

    if not api_key:
        print("API key not found in environment variables or .env file. Trying Colab secrets...")
        # Try loading from Colab secrets as a fallback
        try:
            from google.colab import userdata
            api_key = userdata.get('GOOGLE_API_KEY')
            if api_key:
                print("API key loaded successfully from Colab secrets.")
                # Set environment variable for potential downstream use (optional but good practice)
                os.environ['GOOGLE_API_KEY'] = api_key
            else:
                 print("API key not found in Colab secrets.")
        except ImportError:
            # Not running in Colab or Colab userdata is unavailable
            print("Not in a Colab environment or google.colab.userdata failed.")
            api_key = None
        except Exception as e:
            print(f"Error loading from Colab secrets: {e}")
            api_key = None

    if not api_key: # If still not found after checking both
        raise ValueError("GOOGLE_API_KEY not found. Please create a .env file or add the secret 'GOOGLE_API_KEY' in Colab with Notebook access enabled.")

    return api_key


# Updated initialize_llm function
def initialize_llm(api_key):
    """Initializes the Gemini LLM."""
    # Using ChatGoogleGenerativeAI for conversational abilities
    # Adjust temperature: 0.0-0.3 for factual tasks (extraction), 0.5-0.7 for creative tasks (generation)
    # Use the standard model name "gemini-1.0-pro" instead of "gemini-pro"
    print("Initializing LLM with model: gemini-2.0-flash") # Added print statement
    return ChatGoogleGenerativeAI(model="gemini-2.0-flash", google_api_key=api_key, request_timeout=120)

def load_resume_text(pdf_path):
    """Loads text content from a PDF file."""
    try:
        loader = PyPDFLoader(pdf_path)
        # load_and_split() can be used if chunking is needed, but load() is simpler for moderate resumes
        docs = loader.load()
        full_text = "\n".join([doc.page_content for doc in docs])
        if not full_text.strip():
             raise ValueError("Could not extract text from PDF. Ensure it's text-based.")
        return full_text
    except Exception as e:
        print(f"Error loading PDF {pdf_path}: {e}")
        raise

def create_extraction_chain(llm):
    """Creates the Langchain chain for extracting structured data from resume text."""
    parser = JsonOutputParser(pydantic_object=ResumeData)

    extraction_prompt_template = """
    You are an expert resume parser. Analyze the following resume text and extract the information precisely according to the provided JSON schema.
    Focus on:
    - Candidate's Name (if clearly identifiable)
    - Professional Summary/Objective (if present)
    - Work Experiences (Company, Role, Duration, Key Responsibilities/Achievements)
    - Projects (Name, Description, Technologies Used)
    - Technical Skills (Name and attempt to categorize: Language, Framework, Tool, Database, Concept, etc.)

    If information for a field is missing or unclear, use default values specified in the schema (like empty lists or strings, or 'N/A', 'Uncategorized'). Be accurate.

    Schema:
    {schema}

    Resume Text:
    ---
    {resume_text}
    ---

    Extracted JSON:
    """
    extraction_prompt = ChatPromptTemplate.from_template(
        extraction_prompt_template,
        partial_variables={"schema": ResumeData.schema_json(indent=2)}
    )

    # Chain: Prompt -> LLM -> JSON Parser
    return extraction_prompt | llm | parser

def parse_resume(pdf_path, llm):
    """Loads and parses the resume PDF to extract structured data."""
    print(f"Loading resume: {pdf_path}")
    resume_text = load_resume_text(pdf_path)
    print("Resume text loaded, starting extraction...")
    extraction_chain = create_extraction_chain(llm)
    try:
        # Invoke the chain with the resume text
        print("Invoking extraction chain...") # Added print statement
        extracted_data_dict = extraction_chain.invoke({"resume_text": resume_text})
        print("Extraction chain finished.") # Added print statement
        # Validate with Pydantic model
        # Use ** unpacking for pydantic v1/v2 compatibility
        validated_data = ResumeData(**extracted_data_dict)
        print("Resume parsed successfully.")
        return validated_data
    except Exception as e:
        print(f"Error during resume parsing or validation: {e}")
        # Fallback or re-try logic could be added here
        raise # Re-raise the exception to stop execution if parsing fails


# --- Interview Session Class ---

class InterviewSession:
    """Manages the state and flow of the simulated interview."""

    def __init__(self, resume_data: ResumeData, llm):
        """Initializes the session with resume data and the LLM."""
        self.resume_data = resume_data
        self.llm = llm # Use the shared LLM instance
        self.history = [] # Stores (agent_type, question, answer, evaluation) dicts
        self.candidate_name = resume_data.candidate_name or "Candidate"

        # --- Chains for different agents ---
        self._setup_agent_chains()

    def _setup_agent_chains(self):
        """Initializes Langchain chains for each interview agent."""
        # --- Behavioral Question Generation Chain ---
        behavioral_question_prompt = ChatPromptTemplate.from_template(
            """You are a professional interviewer starting a behavioral interview section.
            Based on the candidate's resume details below, generate ONE relevant behavioral question.
            Focus on their experiences and projects. Ask "Tell me about a time..." or "Describe a situation..." questions.
            Alternatively, ask a standard behavioral question (strengths, weaknesses, teamwork, conflict resolution).
            AVOID asking questions that were already asked in the 'Previous Questions' list.

            Candidate Name: {candidate_name}
            Resume Summary: {summary}
            Work Experience: {work_experience}
            Projects: {projects}
            Previous Questions:
            {previous_questions}

            Generate ONE behavioral question for {candidate_name}:"""
        )
        self.behavioral_question_chain = behavioral_question_prompt | self.llm | StrOutputParser()

        # --- Coding Question Generation Chain ---
        coding_question_prompt = ChatPromptTemplate.from_template(
            """You are a technical interviewer preparing a coding-related question.
            Based on the candidate's skills, ask ONE conceptual question about algorithms, data structures, language features, or problem-solving approaches relevant to their skills.
            DO NOT ask for live code implementation. Focus on understanding and explanation.
            Example: "Considering your Python skills, explain the difference between lists and tuples and when you'd use each." or "How would you approach optimizing a database query if you noticed slow performance, given your SQL experience?"
            AVOID asking questions that were already asked in the 'Previous Questions' list.

            Candidate Name: {candidate_name}
            Skills: {skills}
            Previous Questions:
            {previous_questions}

            Generate ONE conceptual coding question for {candidate_name}:"""
        )
        self.coding_question_chain = coding_question_prompt | self.llm | StrOutputParser()

        # --- System Design Chain Removed ---

        # --- Answer Evaluation Chain ---
        evaluation_prompt = ChatPromptTemplate.from_template(
            """You are an interview coach evaluating a candidate's answer during an interview simulation.
            Provide brief, constructive feedback (2-3 sentences). Focus on clarity, relevance, structure (like STAR for behavioral), depth, and technical accuracy (where applicable).
            Be encouraging but also point out specific areas for improvement if needed.

            Interview Stage: {question_type}
            Question Asked: {question}
            Candidate's Answer: {answer}

            Provide feedback on the answer:"""
        )
        self.evaluation_chain = evaluation_prompt | self.llm | StrOutputParser()

        # --- Final Feedback Synthesis Chain ---
        # Note: The final feedback prompt still mentions System Design, but it will be based on empty history for that section.
        final_feedback_prompt = ChatPromptTemplate.from_template(
            """You are an experienced hiring manager summarizing the performance of {candidate_name} in a simulated technical interview.
            Review the entire interview history provided below, including questions, answers, and individual evaluations.
            Synthesize this into comprehensive, constructive feedback.

            Structure the feedback clearly:
            1.  **Overall Summary:** Brief overview of performance.
            2.  **Behavioral Section:** Strengths and areas for improvement.
            3.  **Technical Concepts/Coding Section:** Strengths and areas for improvement.
            4.  **System Design Section:** Strengths and areas for improvement (will likely state no questions asked).
            5.  **Key Recommendations:** Actionable advice for the candidate.

            Be professional, balanced, and encouraging.

            Full Interview History:
            ---
            {interview_history}
            ---

            Generate Comprehensive Final Feedback for {candidate_name}:"""
        )
        self.final_feedback_chain = final_feedback_prompt | self.llm | StrOutputParser()


    def _get_previous_questions(self, agent_type=None):
        """Helper to get questions already asked, optionally filtered by agent type."""
        qs = []
        for item in self.history:
            if agent_type is None or item['agent'] == agent_type:
                qs.append(item['question'])
        return "\n".join(f"- {q}" for q in qs) if qs else "None"

    def add_interaction(self, agent_type, question, answer, evaluation):
        """Adds a question-answer-evaluation cycle to the history."""
        self.history.append({
            "agent": agent_type,
            "question": question,
            "answer": answer,
            "evaluation": evaluation
        })
        print("-" * 20) # Separator after each interaction feedback

    def evaluate_answer(self, question, answer, question_type):
        """Uses the LLM chain to evaluate the candidate's answer."""
        print("\nInterviewer: Thinking...") # Simulate evaluation time
        feedback = self.evaluation_chain.invoke({
            "question_type": question_type,
            "question": question,
            "answer": answer
        })
        print(f"Interviewer Feedback: {feedback}")
        return feedback

    # --- Agent Interaction Methods ---

    def ask_behavioral_question(self):
        """Generates and asks a behavioral question."""
        print("\n--- Behavioral Question ---")
        previous_qs = self._get_previous_questions('behavioral')
        # Prepare context for the prompt
        # Ensure resume data components are not None before accessing .dict() or converting
        work_exp_list = self.resume_data.work_experience if self.resume_data.work_experience else []
        projects_list = self.resume_data.projects if self.resume_data.projects else []
        context = {
            "candidate_name": self.candidate_name,
            "summary": self.resume_data.summary or "N/A",
            # Convert lists of objects to simpler string representations for the prompt
            "work_experience": json.dumps([exp.dict(exclude_none=True) for exp in work_exp_list], indent=2),
            "projects": json.dumps([p.dict(exclude_none=True) for p in projects_list], indent=2),
            "previous_questions": previous_qs
        }
        question = self.behavioral_question_chain.invoke(context)
        print(f"Interviewer: {question}")
        answer = input(f"{self.candidate_name}'s Answer: ")
        evaluation = self.evaluate_answer(question, answer, "Behavioral")
        self.add_interaction("behavioral", question, answer, evaluation)

    def ask_coding_question(self):
        """Generates and asks a conceptual coding question."""
        print("\n--- Technical/Coding Concept Question ---")
        previous_qs = self._get_previous_questions('coding')
        skills_list = [f"{s.name} ({s.category})" for s in self.resume_data.skills] if self.resume_data.skills else ["General Concepts"]
        context = {
             "candidate_name": self.candidate_name,
             "skills": ", ".join(skills_list),
             "previous_questions": previous_qs
        }
        question = self.coding_question_chain.invoke(context)
        print(f"Interviewer: {question}")
        answer = input(f"{self.candidate_name}'s Approach/Explanation: ")
        evaluation = self.evaluate_answer(question, answer, "Coding/Concepts")
        self.add_interaction("coding", question, answer, evaluation)

    # --- ask_system_design_question method removed ---

    # --- Final Feedback ---

    def generate_final_feedback(self):
        """Generates and prints the overall interview feedback."""
        print("\n" + "="*25 + " Generating Final Feedback " + "="*25)
        if not self.history:
            print("No interview interactions recorded to generate feedback.")
            return

        # Format history for the prompt
        history_str = "\n\n".join([
            f"**{item['agent'].upper()} Stage**\n"
            f"Q: {item['question']}\n"
            f"A: {item['answer']}\n"
            f"Feedback: {item['evaluation']}"
            for item in self.history
        ])

        final_feedback = self.final_feedback_chain.invoke({
            "candidate_name": self.candidate_name,
            "interview_history": history_str
        })

        print("\n" + "="*20 + f" Final Interview Feedback for {self.candidate_name} " + "="*20)
        print(final_feedback)
        print("="* (42 + len(self.candidate_name) + 1)) # Match closing border width
        return final_feedback


# --- Main Execution Logic ---

def run_interview(pdf_path):
    """Orchestrates the entire interview process."""
    try:
        # 1. Setup
        # api_key is loaded globally now, llm initialized globally
        api_key = load_api_key() # Ensure API key is loaded
        llm = initialize_llm(api_key) # Initialize LLM with the key

        # 2. Parse Resume
        resume_data = parse_resume(pdf_path, llm)

        # 3. Initialize Interview Session
        session = InterviewSession(resume_data, llm)
        print(f"\nStarting Interview Simulation for {session.candidate_name}...")
        print("="*30)

        # 4. Define Interview Flow (customize as needed)
        # System Design question removed from the flow
        interview_stages = [
            session.ask_behavioral_question,
            session.ask_coding_question,
            session.ask_behavioral_question, # Ask another behavioral
            # session.ask_system_design_question, # <--- REMOVED
            # Add more stages (e.g., another coding question) if desired
        ]

        # 5. Run Interview Stages
        for stage_func in interview_stages:
            try:
                stage_func()
            except Exception as e:
                print(f"\n!! An error occurred during stage {stage_func.__name__}: {e}")
                print("Attempting to continue interview...")
                # Decide if the error is fatal or if you can skip the stage

        # 6. Generate Final Feedback
        session.generate_final_feedback()

        print("\nInterview Simulation Complete.")

    except ValueError as ve:
        print(f"\nConfiguration Error: {ve}") # This is where the error originates
    except FileNotFoundError:
        print(f"\nError: Resume PDF file not found at {pdf_path}")
    except Exception as e:
        print(f"\nAn unexpected error occurred: {e}") # Catch other errors like the 404


# --- Script Entry Point ---
if __name__ == "__main__":
    # --- Configuration ---
    # IMPORTANT: Replace this placeholder with the actual path to the candidate's resume PDF
    # Example for Colab if file 'Sresume.pdf' is uploaded to root:
    resume_pdf_path = "Sresume.pdf"
    # Example for local machine:
    # resume_pdf_path = "/path/on/your/computer/Sresume.pdf"
    # --- End Configuration ---

    # Check if the path is STILL the placeholder
    if resume_pdf_path == "path/to/your/resume.pdf":
        # If it is, print the warning
        print("="*60)
        print("!! PLEASE UPDATE 'resume_pdf_path' in the script with the actual path to the PDF file. !!")
        print("="*60)
    else:
        # Otherwise (if you've changed it), run the interview
        run_interview(resume_pdf_path)


Initializing LLM with model: gemini-2.0-flash
Loading resume: Sresume.pdf
Resume text loaded, starting extraction...
Invoking extraction chain...
Extraction chain finished.
Resume parsed successfully.

Starting Interview Simulation for Shardul Pande...

--- Behavioral Question ---
Interviewer: Tell me about a time you had to adapt your data analysis approach when you encountered unexpected data quality issues or limitations in the available data. What did you do and what was the outcome?
Shardul Pande's Answer: I removed missing data and employed data cleaning and pre processing techniques to encounter this issues

Interviewer: Thinking...
Interviewer Feedback: While you mentioned relevant actions like removing missing data and cleaning, the answer lacks specifics. Use the STAR method to structure your response – describe the Situation, Task, Action (with details on *how* you cleaned and what techniques you used), and Result (quantify the impact if possible). This will make your answer