In [2]:
%%capture
!pip install langchain langchain_core langchain_groq langchain_community langgraph gradio
!pip install gradio pandas uuid chromadb langchain langchain-community langchain-core pymupdf openai-whisper

In [None]:
import gradio as gr
import pandas as pd
import uuid
import random
import chromadb
import datetime
import fitz  # PyMuPDF for PDF reading
import mimetypes
import whisper
import re
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableLambda

# -------------------- LLM Setup --------------------
llm = ChatGroq(
    temperature=0,
    groq_api_key="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",
    model_name="llama-3.3-70b-versatile"
)

model_whisper = whisper.load_model("base")

# -------------------- Question Bank --------------------
question_bank = {
    "General & Motivational": [
        "Why do you want to join this company?",
        "What motivates you to come to work every day?",
        "What makes you different from others?",
        "Where do you see yourself in 5 years?",
        "Why should we hire you?"
    ],
    "Situational & Skill-Based": [
        "Describe a time you handled failure.",
        "Tell me about a tight deadline situation.",
        "Describe a challenge at work and how you overcame it.",
        "Have you solved a problem without full info?",
        "How do you deal with stress?"
    ],
    "Teamwork & Leadership": [
        "How would teammates describe you?",
        "Tell me about a team conflict you resolved.",
        "Have you ever led a team? What was your approach?",
        "How do you give/receive feedback?"
    ],
    "Work Style & Personality": [
        "How do you stay organized?",
        "What's your strength and weakness?",
        "Do you prefer team or solo work?",
        "How do you keep up with trends?"
    ]
}

# -------------------- Loaders --------------------
def load_cv(cv_file=None, cv_url=None):
    if cv_url:
        loader = WebBaseLoader(cv_url)
        return loader.load().pop().page_content
    elif cv_file:
        file_type, _ = mimetypes.guess_type(cv_file.name)
        if file_type == 'application/pdf':
            try:
                doc = fitz.open(cv_file.name)
                text = "\n".join([page.get_text() for page in doc])
                doc.close()
                return text
            except Exception as e:
                return f"Failed to read PDF: {str(e)}"
        else:
            try:
                return cv_file.read().decode("utf-8")
            except:
                return "Unsupported file format or decoding failed."
    return ""

def extract_job_info(job_text_or_url):
    try:
        if job_text_or_url.startswith("http"):
            loader = WebBaseLoader(job_text_or_url)
            page_data = loader.load().pop().page_content
        else:
            page_data = job_text_or_url

        parser = JsonOutputParser()

        prompt = PromptTemplate.from_template("""
        You are a hiring analyst. Analyze the following job description text and return structured job details in JSON format.

        ### INPUT:
        {page_data}

        ### OUTPUT FORMAT:
        {format_instructions}
        """).partial(format_instructions=parser.get_format_instructions())

        chain = prompt | llm | RunnableLambda(lambda x: x.content.strip())
        result = chain.invoke({'page_data': page_data})
        return parser.parse(result)

    except Exception as e:
        print("⚠️ Failed to extract job info:", str(e))
        return {
            "role": "Unknown",
            "experience": "Not specified",
            "skills": [],
            "description": page_data[:500] + "..."
        }

# -------------------- Interview Logic --------------------
interview_data = []

def ask_next_question(question, answer):
    prompt = PromptTemplate.from_template("""
        ### QUESTION:
        {question}

        ### ANSWER:
        {answer}

        ### INSTRUCTION:
        Evaluate the candidate's answer and score it from 0 to 100.

        Then, based on the score, craft human-like, professional, and emotionally intelligent feedback:
        - If the score is >= 85, give praise and note what stood out.
        - If score is 70–84, mention it was solid and how to polish it further.
        - If 50–69, highlight the potential and suggest key improvements.
        - If below 50, be honest but empathetic, give constructive criticism, and encouragement.

        Return this JSON format:
        {{
            "score": 78,
            "feedback": "Your answer was structured and thoughtful. With more specific examples, it could be excellent."
        }}
    """)

    chain = prompt | llm | RunnableLambda(lambda x: x.content.strip())
    raw_output = chain.invoke({"question": question, "answer": answer})

    match = re.search(r"\{.*\}", raw_output, re.DOTALL)
    if match:
        json_part = match.group()
        parser = JsonOutputParser()
        return parser.parse(json_part)
    else:
        raise ValueError(f"Invalid response format: {raw_output}")

def transcribe_audio(audio_path):
    try:
        result = model_whisper.transcribe(audio_path)
        return result["text"].strip()
    except Exception as e:
        return f"(Transcription failed: {str(e)})"

def generate_task_if_needed(score, question, job_data):
    if score >= 50:
        return ""

    task_prompt = PromptTemplate.from_template("""
        ### JOB DESCRIPTION:
        {job_data}

        ### INTERVIEW QUESTION:
        {question}

        ### INSTRUCTION:
        The candidate answered poorly (score < 50).
        Generate a single relevant task (coding/math/business) to evaluate their skills related to this question and the job role.

        Return ONLY the task instructions in plain text.
    """)
    task_text = (task_prompt | llm).invoke({"job_data": str(job_data), "question": question})
    return task_text.content.strip()

def generate_questions(cv_text, job_data):
    intro_question = ["Can you introduce yourself?"]

    random_qs = random.sample(
        [q for group in question_bank.values() for q in group],
        min(5, sum(len(g) for g in question_bank.values()))
    )

    job_prompt = PromptTemplate.from_template("""
    Based on this job description, generate 10 diverse interview questions.

    ### JOB DESCRIPTION:
    {job_data}

    ### OUTPUT:
    10 questions numbered as a list.
    """)

    job_chain = job_prompt | llm | RunnableLambda(lambda x: x.content.strip())
    job_output = job_chain.invoke({"job_data": str(job_data)})
    job_questions = re.findall(r"\d+\.\s*(.+)", job_output)

    cv_prompt = PromptTemplate.from_template("""
    Based on this CV, generate 5 interview questions to assess the candidate's relevance and fit.

    ### CV:
    {cv_text}

    ### OUTPUT:
    5 questions numbered as a list.
    """)

    cv_chain = cv_prompt | llm | RunnableLambda(lambda x: x.content.strip())
    cv_output = cv_chain.invoke({"cv_text": cv_text})
    cv_questions = re.findall(r"\d+\.\s*(.+)", cv_output)

    combined_qs = []
    for i in range(0, 10, 2):
        combined_qs.extend(job_questions[i:i+2])
        if i // 2 < len(cv_questions):
            combined_qs.append(cv_questions[i // 2])

    return intro_question + random_qs + combined_qs

def download_results():
    df = pd.DataFrame(interview_data)
    filename = f"interview_results_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    df.to_csv(filename, index=False)
    return filename

def submit_task_answer(task_answer, state):
    try:
        if not interview_data:
            return "⚠️ No interview data found."
        last_record = interview_data[-1]
        last_record["task_answer"] = task_answer
        return "✅ Task answer submitted and saved."
    except Exception as e:
        return f"❌ Error: {str(e)}"

def generate_final_feedback_on_tasks(state):
    try:
        tasks_data = [record for record in interview_data if record.get("task") and record.get("task_answer")]
        if not tasks_data:
            return "No extra tasks were assigned, so no final feedback is needed."

        task_summaries = "\n\n".join([
            f"Question: {record['question']}\n"
            f"Task: {record['task']}\n"
            f"Candidate's Answer: {record['task_answer']}"
            for record in tasks_data
        ])

        feedback_prompt = PromptTemplate.from_template("""
        The following are extra tasks given to the candidate during the interview and their submitted answers:

        {task_summaries}

        ### INSTRUCTION:
        Write a final assessment focused ONLY on their extra task performance. Evaluate the quality, correctness, effort, and relevance of the task answers. End with a clear hiring recommendation.
        """)

        chain = feedback_prompt | llm | RunnableLambda(lambda x: x.content.strip())
        result = chain.invoke({"task_summaries": task_summaries})
        return result
    except Exception as e:
        return f"⚠️ Failed to generate final feedback: {str(e)}"

# -------------------- Gradio UI --------------------
with gr.Blocks(theme='Respair/Shiki@1.2.1') as app:
    gr.Markdown("""
        # 🎤 AI Interviewer
        Upload your CV, paste a link, add job requirements, and begin an interactive mock interview with voice and text.
    """)

    with gr.Row():
        cv_file = gr.File(label="📄 Upload CV")
        cv_url = gr.Textbox(label="🌐 Or paste CV Link")

    job_input = gr.Textbox(label="🏢 Job Requirements or URL")
    start_btn = gr.Button("🚀 Start Interview")

    question_output = gr.Textbox(label="🧠 Interview Question", lines=2)
    mic_input = gr.Audio(type="filepath", label="🎙️ Speak your answer")
    text_input = gr.Textbox(label="✍️ Or type your answer")
    submit_btn = gr.Button("✅ Submit Answer")
    next_btn = gr.Button("⏭️ Next Question")

    score_output = gr.Textbox(label="📊 Score & Feedback")
    task_output = gr.Textbox(label="🧪 Extra Task (if any)")
    task_answer_input = gr.Textbox(label="✍️ Your Answer to the Task", lines=4)
    submit_task_btn = gr.Button("📨 Submit Task Answer")
    task_status = gr.Textbox(label="📬 Task Submission Status", interactive=False)

    feedback_output = gr.Textbox(label="🧾 Final Feedback")
    download_btn = gr.Button("📥 Download Full Interview Report")
    download_file = gr.File()
    error_box = gr.Textbox(label="🚨 Error Log (if any)", lines=2, interactive=False)
    state = gr.State({})

    def start(cv_file, cv_url, job_input):
        try:
            cv_text = load_cv(cv_file, cv_url)
            job_data = extract_job_info(job_input)
            questions = generate_questions(cv_text, job_data)
            new_state = {
                "cv_text": cv_text,
                "job_data": job_data,
                "questions": questions,
                "current": 0,
                "answers": []
            }
            return questions[0], new_state, ""
        except Exception as e:
            return "❌ Error occurred while starting interview.", {}, str(e)

    def submit_answer(audio_path, text, state):
        try:
            question = state["questions"][state["current"]]
            if not text and audio_path:
                text = transcribe_audio(audio_path)

            answer = text or "(No valid response)"
            result = ask_next_question(question, answer)
            score = result["score"]
            feedback = result["feedback"]
            task_instruction = generate_task_if_needed(score, question, state["job_data"])

            interview_data.append({
                "question": question,
                "answer": answer,
                "score": score,
                "feedback": feedback,
                "task": task_instruction,
                "task_answer": ""  # Always include this for consistency
            })

            state["answers"].append((question, answer, score))
            return f"Score: {score}\nFeedback: {feedback}", task_instruction, "", state, ""
        except Exception as e:
            return "", "", "", state, str(e)

    def go_to_next_question(state):
        try:
            state["current"] += 1
            if state["current"] < len(state["questions"]):
                return state["questions"][state["current"]], state, "", ""
            else:
                final_summary = generate_final_feedback_on_tasks(state)
                return "✅ Interview Complete", state, final_summary, ""
        except Exception as e:
            return "❌ Error moving to next question", state, "", str(e)

    def download():
        try:
            return download_results()
        except Exception as e:
            return str(e)

    start_btn.click(fn=start, inputs=[cv_file, cv_url, job_input], outputs=[question_output, state, error_box])
    submit_btn.click(fn=submit_answer, inputs=[mic_input, text_input, state], outputs=[score_output, task_output, feedback_output, state, error_box])
    next_btn.click(fn=go_to_next_question, inputs=[state], outputs=[question_output, state, feedback_output, error_box])
    submit_task_btn.click(fn=submit_task_answer, inputs=[task_answer_input, state], outputs=[task_status])
    download_btn.click(fn=download, outputs=download_file)

app.launch()

100%|███████████████████████████████████████| 139M/139M [00:02<00:00, 56.7MiB/s]
Error while fetching `HF_TOKEN` secret value from your vault: 'Requesting secret HF_TOKEN timed out. Secrets can only be fetched when running from the Colab UI.'.
You are not authenticated with the Hugging Face Hub in this notebook.
If the error persists, please let us know by opening an issue on GitHub (https://github.com/huggingface/huggingface_hub/issues/new).


theme_schema%401.2.1.json:   0%|          | 0.00/14.5k [00:00<?, ?B/s]

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://31e6472ae7d7371047.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




# fast api


In [None]:
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.responses import FileResponse, JSONResponse
from pydantic import BaseModel
import uuid
import pandas as pd
import datetime
import os
import mimetypes
import fitz  # PyMuPDF
import whisper
import re
import random
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableLambda

app = FastAPI()

# -------------------- Global Vars --------------------
llm = ChatGroq(
    temperature=0,
    groq_api_key="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",
    model_name="llama-3.3-70b-versatile"
)

model_whisper = whisper.load_model("base")
sessions = {}
interview_data = {}

question_bank = {
    "General & Motivational": [...],
    "Situational & Skill-Based": [...],
    "Teamwork & Leadership": [...],
    "Work Style & Personality": [...]
}

# -------------------- Utils --------------------
def load_cv(cv_file, cv_url):
    if cv_url:
        loader = WebBaseLoader(cv_url)
        return loader.load().pop().page_content
    elif cv_file:
        file_type, _ = mimetypes.guess_type(cv_file.filename)
        if file_type == 'application/pdf':
            try:
                doc = fitz.open(stream=cv_file.file.read(), filetype="pdf")
                text = "\n".join([page.get_text() for page in doc])
                doc.close()
                return text
            except Exception as e:
                return f"Failed to read PDF: {str(e)}"
        else:
            try:
                return cv_file.file.read().decode("utf-8")
            except:
                return "Unsupported file format or decoding failed."
    return ""

def extract_job_info(job_text_or_url):
    try:
        if job_text_or_url.startswith("http"):
            loader = WebBaseLoader(job_text_or_url)
            page_data = loader.load().pop().page_content
        else:
            page_data = job_text_or_url

        parser = JsonOutputParser()

        prompt = PromptTemplate.from_template("""
        You are a hiring analyst. Analyze the following job description text and return structured job details in JSON format.

        ### INPUT:
        {page_data}

        ### OUTPUT FORMAT:
        {format_instructions}
        """).partial(format_instructions=parser.get_format_instructions())

        chain = prompt | llm | RunnableLambda(lambda x: x.content.strip())
        result = chain.invoke({'page_data': page_data})
        return parser.parse(result)

    except Exception as e:
        return {
            "role": "Unknown",
            "experience": "Not specified",
            "skills": [],
            "description": page_data[:500] + "..."
        }

def ask_next_question(question, answer):
    prompt = PromptTemplate.from_template("""
        ### QUESTION:
        {question}

        ### ANSWER:
        {answer}

        ### INSTRUCTION:
        Evaluate the candidate's answer and score it from 0 to 100.

        Then, based on the score, craft human-like, professional, and emotionally intelligent feedback:
        - If the score is >= 85, give praise and note what stood out.
        - If score is 70–84, mention it was solid and how to polish it further.
        - If 50–69, highlight the potential and suggest key improvements.
        - If below 50, be honest but empathetic, give constructive criticism, and encouragement.

        Return this JSON format:
        {{
            "score": 78,
            "feedback": "Your answer was structured and thoughtful. With more specific examples, it could be excellent."
        }}
    """)

    chain = prompt | llm | RunnableLambda(lambda x: x.content.strip())
    raw_output = chain.invoke({"question": question, "answer": answer})
    match = re.search(r"\{.*\}", raw_output, re.DOTALL)
    if match:
        parser = JsonOutputParser()
        return parser.parse(match.group())
    else:
        raise ValueError(f"Invalid response format: {raw_output}")

def transcribe_audio(audio_path):
    try:
        result = model_whisper.transcribe(audio_path)
        return result["text"].strip()
    except Exception as e:
        return f"(Transcription failed: {str(e)})"

def generate_task_if_needed(score, question, job_data):
    if score >= 50:
        return ""
    task_prompt = PromptTemplate.from_template("""
        ### JOB DESCRIPTION:
        {job_data}

        ### INTERVIEW QUESTION:
        {question}

        ### INSTRUCTION:
        The candidate answered poorly (score < 50).
        Generate a single relevant task (coding/math/business) to evaluate their skills related to this question and the job role.

        Return ONLY the task instructions in plain text.
    """)
    task_text = (task_prompt | llm).invoke({"job_data": str(job_data), "question": question})
    return task_text.content.strip()

def generate_questions(cv_text, job_data):
    intro_question = ["Can you introduce yourself?"]
    random_qs = random.sample(
        [q for group in question_bank.values() for q in group],
        min(5, sum(len(g) for g in question_bank.values()))
    )
    job_prompt = PromptTemplate.from_template("""
    Based on this job description, generate 10 diverse interview questions.

    ### JOB DESCRIPTION:
    {job_data}

    ### OUTPUT:
    10 questions numbered as a list.
    """)
    job_chain = job_prompt | llm | RunnableLambda(lambda x: x.content.strip())
    job_output = job_chain.invoke({"job_data": str(job_data)})
    job_questions = re.findall(r"\d+\.\s*(.+)", job_output)
    cv_prompt = PromptTemplate.from_template("""
    Based on this CV, generate 5 interview questions to assess the candidate's relevance and fit.

    ### CV:
    {cv_text}

    ### OUTPUT:
    5 questions numbered as a list.
    """)
    cv_chain = cv_prompt | llm | RunnableLambda(lambda x: x.content.strip())
    cv_output = cv_chain.invoke({"cv_text": cv_text})
    cv_questions = re.findall(r"\d+\.\s*(.+)", cv_output)
    combined_qs = []
    for i in range(0, 10, 2):
        combined_qs.extend(job_questions[i:i+2])
        if i // 2 < len(cv_questions):
            combined_qs.append(cv_questions[i // 2])
    return intro_question + random_qs + combined_qs

def save_interview_csv(session_id):
    data = interview_data.get(session_id, [])
    df = pd.DataFrame(data)
    filename = f"interview_results_{session_id}.csv"
    df.to_csv(filename, index=False)
    return filename

# -------------------- FastAPI Endpoints --------------------

class AnswerRequest(BaseModel):
    session_id: str
    audio_path: str = ""
    text: str = ""

@app.post("/start")
async def start_interview(
    job_input: str = Form(...),
    cv_file: UploadFile = File(None),
    cv_url: str = Form(None)
):
    cv_text = load_cv(cv_file, cv_url)
    job_data = extract_job_info(job_input)
    questions = generate_questions(cv_text, job_data)
    session_id = str(uuid.uuid4())
    sessions[session_id] = {
        "cv_text": cv_text,
        "job_data": job_data,
        "questions": questions,
        "current": 0,
        "answers": []
    }
    interview_data[session_id] = []
    return {"session_id": session_id, "question": questions[0]}

@app.post("/answer")
async def submit_answer(data: AnswerRequest):
    state = sessions[data.session_id]
    question = state["questions"][state["current"]]
    answer = data.text or transcribe_audio(data.audio_path)
    result = ask_next_question(question, answer)
    score = result["score"]
    feedback = result["feedback"]
    task = generate_task_if_needed(score, question, state["job_data"])
    interview_data[data.session_id].append({
        "question": question,
        "answer": answer,
        "score": score,
        "feedback": feedback,
        "task": task
    })
    state["answers"].append((question, answer, score))
    return {"score": score, "feedback": feedback, "task": task}

@app.get("/next/{session_id}")
async def next_question(session_id: str):
    state = sessions.get(session_id)
    if not state:
        return JSONResponse(content={"error": "Session not found"}, status_code=404)
    state["current"] += 1
    if state["current"] < len(state["questions"]):
        return {"question": state["questions"][state["current"]]}
    else:
        return {"message": "Interview complete."}

@app.get("/download/{session_id}")
async def download_results(session_id: str):
    filename = save_interview_csv(session_id)
    return FileResponse(filename, media_type='text/csv', filename=filename)

# everything Fastapi


In [None]:
# main.py
from fastapi import FastAPI, UploadFile, File, Form
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import Optional
import uuid
from utils.interview_logic import (
    load_cv,
    extract_job_info,
    generate_questions,
    ask_next_question,
    generate_task_if_needed,
    transcribe_audio,
    download_results,
    session_store
)

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class StartInterviewRequest(BaseModel):
    job_input: str
    cv_url: Optional[str] = None

class SubmitAnswerRequest(BaseModel):
    session_id: str
    answer_text: Optional[str] = None

@app.post("/start")
async def start_interview(
    job_input: str = Form(...),
    cv_file: Optional[UploadFile] = File(None),
    cv_url: Optional[str] = Form(None)
):
    try:
        cv_text = await load_cv(cv_file, cv_url)
        job_data = extract_job_info(job_input)
        questions = generate_questions(cv_text, job_data)

        session_id = str(uuid.uuid4())
        session_store[session_id] = {
            "cv_text": cv_text,
            "job_data": job_data,
            "questions": questions,
            "current": 0,
            "answers": [],
            "results": []
        }

        return {"session_id": session_id, "question": questions[0]}
    except Exception as e:
        return {"error": str(e)}

@app.post("/answer")
async def submit_answer(
    session_id: str = Form(...),
    answer_text: Optional[str] = Form(None),
    audio_file: Optional[UploadFile] = File(None)
):
    try:
        session = session_store.get(session_id)
        if not session:
            return {"error": "Invalid session ID"}

        question = session["questions"][session["current"]]

        if not answer_text and audio_file:
            audio_path = f"temp_audio/{uuid.uuid4()}.mp3"
            with open(audio_path, "wb") as f:
                f.write(await audio_file.read())
            answer_text = transcribe_audio(audio_path)

        if not answer_text:
            return {"error": "No answer provided"}

        result = ask_next_question(question, answer_text)
        task = generate_task_if_needed(result["score"], question, session["job_data"])

        session["results"].append({
            "question": question,
            "answer": answer_text,
            "score": result["score"],
            "feedback": result["feedback"],
            "task": task
        })

        session["answers"].append((question, answer_text, result["score"]))

        return {
            "score": result["score"],
            "feedback": result["feedback"],
            "task": task
        }
    except Exception as e:
        return {"error": str(e)}

@app.get("/next/{session_id}")
async def next_question(session_id: str):
    try:
        session = session_store.get(session_id)
        if not session:
            return {"error": "Invalid session ID"}

        session["current"] += 1
        if session["current"] < len(session["questions"]):
            return {"question": session["questions"][session["current"]]}
        else:
            return {"message": "Interview Complete"}
    except Exception as e:
        return {"error": str(e)}

@app.get("/download/{session_id}")
async def download(session_id: str):
    try:
        session = session_store.get(session_id)
        if not session:
            return {"error": "Invalid session ID"}
        path = download_results(session["results"])
        return {"file_path": path}
    except Exception as e:
        return {"error": str(e)}


# utils/interview_logic.py
import mimetypes
import fitz
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableLambda
import whisper
import re
import random
import pandas as pd
import datetime

llm = ChatGroq(
    temperature=0,
    groq_api_key="your-groq-key",
    model_name="llama-3.3-70b-versatile"
)
model_whisper = whisper.load_model("base")

question_bank = {
    "General & Motivational": [
        "Why do you want to join this company?",
        "What motivates you to come to work every day?",
        "What makes you different from others?",
        "Where do you see yourself in 5 years?",
        "Why should we hire you?"
    ],
    "Situational & Skill-Based": [
        "Describe a time you handled failure.",
        "Tell me about a tight deadline situation.",
        "Describe a challenge at work and how you overcame it.",
        "Have you solved a problem without full info?",
        "How do you deal with stress?"
    ],
    "Teamwork & Leadership": [
        "How would teammates describe you?",
        "Tell me about a team conflict you resolved.",
        "Have you ever led a team? What was your approach?",
        "How do you give/receive feedback?"
    ],
    "Work Style & Personality": [
        "How do you stay organized?",
        "What's your strength and weakness?",
        "Do you prefer team or solo work?",
        "How do you keep up with trends?"
    ]
}

session_store = {}

async def load_cv(cv_file=None, cv_url=None):
    if cv_url:
        loader = WebBaseLoader(cv_url)
        return loader.load().pop().page_content
    elif cv_file:
        file_type, _ = mimetypes.guess_type(cv_file.filename)
        if file_type == 'application/pdf':
            try:
                doc = fitz.open(stream=await cv_file.read(), filetype="pdf")
                text = "\n".join([page.get_text() for page in doc])
                doc.close()
                return text
            except Exception as e:
                return f"Failed to read PDF: {str(e)}"
        else:
            try:
                return (await cv_file.read()).decode("utf-8")
            except:
                return "Unsupported file format or decoding failed."
    return ""

def extract_job_info(job_text_or_url):
    try:
        if job_text_or_url.startswith("http"):
            loader = WebBaseLoader(job_text_or_url)
            page_data = loader.load().pop().page_content
        else:
            page_data = job_text_or_url

        parser = JsonOutputParser()
        prompt = PromptTemplate.from_template("""
        You are a hiring analyst. Analyze the job description and return structured job details in JSON.
        ### INPUT:
        {page_data}
        ### OUTPUT FORMAT:
        {format_instructions}
        """).partial(format_instructions=parser.get_format_instructions())

        chain = prompt | llm | RunnableLambda(lambda x: x.content.strip())
        result = chain.invoke({'page_data': page_data})
        return parser.parse(result)

    except Exception as e:
        return {
            "role": "Unknown",
            "experience": "Not specified",
            "skills": [],
            "description": job_text_or_url[:500] + "..."
        }

def generate_questions(cv_text, job_data):
    intro = ["Can you introduce yourself?"]
    random_qs = random.sample([q for g in question_bank.values() for q in g], 5)

    job_prompt = PromptTemplate.from_template("""
    Based on the job description, generate 10 diverse interview questions.
    ### JOB:
    {job_data}
    ### OUTPUT:
    1. ...
    """)
    job_output = (job_prompt | llm | RunnableLambda(lambda x: x.content.strip())).invoke({"job_data": str(job_data)})
    job_qs = re.findall(r"\d+\.\s*(.+)", job_output)

    cv_prompt = PromptTemplate.from_template("""
    Based on the CV, generate 5 interview questions to assess fit.
    ### CV:
    {cv_text}
    ### OUTPUT:
    1. ...
    """)
    cv_output = (cv_prompt | llm | RunnableLambda(lambda x: x.content.strip())).invoke({"cv_text": cv_text})
    cv_qs = re.findall(r"\d+\.\s*(.+)", cv_output)

    combined = []
    for i in range(0, 10, 2):
        combined.extend(job_qs[i:i+2])
        if i // 2 < len(cv_qs):
            combined.append(cv_qs[i // 2])

    return intro + random_qs + combined

def ask_next_question(question, answer):
    prompt = PromptTemplate.from_template("""
    ### QUESTION:
    {question}

    ### ANSWER:
    {answer}

    ### INSTRUCTION:
    Score (0–100) and give professional feedback.

    Return JSON:
    {
        "score": 78,
        "feedback": "Your answer was structured and thoughtful. Add examples."
    }
    """)

    chain = prompt | llm | RunnableLambda(lambda x: x.content.strip())
    output = chain.invoke({"question": question, "answer": answer})
    match = re.search(r"\{.*\}", output, re.DOTALL)
    parser = JsonOutputParser()
    return parser.parse(match.group()) if match else {"score": 0, "feedback": "Invalid format"}

def generate_task_if_needed(score, question, job_data):
    if score >= 50:
        return ""
    prompt = PromptTemplate.from_template("""
    ### JOB:
    {job_data}
    ### QUESTION:
    {question}
    ### INSTRUCTION:
    Candidate scored < 50. Give one task to test their skill.
    """)
    task = (prompt | llm).invoke({"job_data": str(job_data), "question": question})
    return task.content.strip()

def transcribe_audio(audio_path):
    try:
        result = model_whisper.transcribe(audio_path)
        return result["text"].strip()
    except Exception as e:
        return f"(Transcription failed: {str(e)})"

def download_results(results):
    df = pd.DataFrame(results)
    filename = f"interview_results_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    df.to_csv(filename, index=False)
    return filename
