In [None]:
%%capture
!pip install langchain langchain_core langchain_groq langchain_community langgraph gradio
!pip install gradio pandas uuid chromadb langchain langchain-community langchain-core pymupdf openai-whisper

In [None]:
llm = ChatGroq(
    temperature=0,
    groq_api_key="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",
    model_name="llama-3.3-70b-versatile"
)


In [None]:
import gradio as gr
import pandas as pd
import uuid
import random
import chromadb
import datetime
import fitz
import mimetypes
import whisper
import re
import time
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableLambda

# -------------------- LLM Setup --------------------
llm = ChatGroq(
    temperature=0,
    groq_api_key="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",
    model_name="llama-3.3-70b-versatile"
)

model_whisper = whisper.load_model("base")

# -------------------- Question Bank --------------------
question_bank = {
    "General & Motivational": [
        "Can you introduce yourself?", "Why do you want to join this company?",
        "What motivates you to come to work every day?", "What makes you different from others?",
        "Where do you see yourself in 5 years?", "Why should we hire you?"
    ],
    "Situational & Skill-Based": [
        "Describe a time you handled failure.", "Tell me about a tight deadline situation.",
        "Describe a challenge at work and how you overcame it.",
        "Have you solved a problem without full info?", "How do you deal with stress?"
    ],
    "Teamwork & Leadership": [
        "How would teammates describe you?", "Tell me about a team conflict you resolved.",
        "Have you ever led a team? What was your approach?", "How do you give/receive feedback?"
    ],
    "Work Style & Personality": [
        "How do you stay organized?", "What's your strength and weakness?",
        "Do you prefer team or solo work?", "How do you keep up with trends?"
    ]
}

# -------------------- Utilities --------------------
def load_cv(cv_file=None, cv_url=None):
    if cv_url:
        loader = WebBaseLoader(cv_url)
        return loader.load().pop().page_content
    elif cv_file:
        file_type, _ = mimetypes.guess_type(cv_file.name)
        if file_type == 'application/pdf':
            try:
                doc = fitz.open(cv_file.name)
                text = "\n".join([page.get_text() for page in doc])
                doc.close()
                return text
            except Exception as e:
                return f"Failed to read PDF: {str(e)}"
        else:
            try:
                return cv_file.read().decode("utf-8")
            except:
                return "Unsupported file format or decoding failed."
    return ""

def extract_job_info(job_text_or_url):
    try:
        if job_text_or_url.startswith("http"):
            loader = WebBaseLoader(job_text_or_url)
            page_data = loader.load().pop().page_content
        else:
            page_data = job_text_or_url

        parser = JsonOutputParser()

        prompt = PromptTemplate.from_template("""
        You are a hiring analyst. Analyze the following job description text and return structured job details in JSON format.

        ### INPUT:
        {page_data}

        ### OUTPUT FORMAT:
        {format_instructions}
        """).partial(format_instructions=parser.get_format_instructions())

        chain = prompt | llm | RunnableLambda(lambda x: x.content.strip())
        result = chain.invoke({'page_data': page_data})
        return parser.parse(result)

    except Exception as e:
        print("⚠️ Failed to extract job info:", str(e))
        return {
            "role": "Unknown",
            "experience": "Not specified",
            "skills": [],
            "description": page_data[:500] + "..."
        }

def generate_contextual_question(state):
    prompt = PromptTemplate.from_template("""
        Based on the job description and the candidate's resume, generate a unique and non-repetitive interview question
        that hasn't already been asked. Focus on assessing suitability for the role.

        ### JOB DESCRIPTION:
        {job_data}

        ### CV TEXT:
        {cv_text}

        ### ALREADY ASKED:
        {already_asked}

        Return only the new question.
    """)

    already_asked = "\n".join(state["questions"])

    question = (prompt | llm | RunnableLambda(lambda x: x.content.strip())).invoke({
        "job_data": str(state["job_data"]),
        "cv_text": state["cv_text"],
        "already_asked": already_asked
    })

    return question

def transcribe_audio(audio_path):
    try:
        result = model_whisper.transcribe(audio_path)
        return result["text"].strip()
    except Exception as e:
        return f"(Transcription failed: {str(e)})"

def ask_next_question(question, answer):
    prompt = PromptTemplate.from_template("""
        ### QUESTION:
        {question}

        ### ANSWER:
        {answer}

        ### INSTRUCTION:
        Score the answer from 0 to 100 based on clarity, relevance, and confidence.
        Give short feedback and return JSON:
        {{"score": 85, "feedback": "Good explanation but lacked structure."}}
    """)
    chain = prompt | llm | RunnableLambda(lambda x: x.content.strip())
    raw_output = chain.invoke({"question": question, "answer": answer})

    match = re.search(r"\{.*\}", raw_output, re.DOTALL)
    if match:
        json_part = match.group()
        parser = JsonOutputParser()
        return parser.parse(json_part)
    else:
        raise ValueError(f"Invalid response format: {raw_output}")

def generate_follow_up(score, job_data, cv_text):
    if score < 60:
        task_prompt = PromptTemplate.from_template("""
            ### JOB:
            {job_data}

            ### INSTRUCTION:
            Generate a task (coding/math/business) suitable for this role.
            Return JSON: {{"task_type": "math", "instructions": "Solve this..."}}
        """)
        task_result = (task_prompt | llm | RunnableLambda(lambda x: x.content.strip())).invoke({"job_data": str(job_data)})
        parser = JsonOutputParser()
        task_data = parser.parse(task_result)

        feedback_prompt = PromptTemplate.from_template("""
            ### CV:
            {cv_text}

            ### INSTRUCTION:
            Give feedback on areas of improvement in 3 lines.
        """)
        feedback_result = (feedback_prompt | llm).invoke({"cv_text": cv_text})
        return task_data, feedback_result.content.strip()
    return {}, ""

def download_results():
    df = pd.DataFrame(interview_data)
    filename = f"interview_results_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    df.to_csv(filename, index=False)
    return filename

# -------------------- Interview Logic --------------------
interview_data = []

def start(cv_file, cv_url, job_input, timer_minutes):
    try:
        cv_text = load_cv(cv_file, cv_url)
        job_data = extract_job_info(job_input)

        intro_q = ["Can you introduce yourself?"]
        general_qs = random.sample(question_bank["General & Motivational"][1:], 3)
        static_questions = intro_q + general_qs

        start_time = time.time()
        duration = int(timer_minutes) * 60

        new_state = {
            "cv_text": cv_text,
            "job_data": job_data,
            "questions": static_questions,
            "current": 0,
            "answers": [],
            "start_time": start_time,
            "duration": duration
        }

        return "👋 Hello! Let's start with an easy one: Can you introduce yourself?", new_state, ""
    except Exception as e:
        return "❌ Error occurred while starting interview.", {}, str(e)

def submit_answer(audio_path, text, state):
    try:
        question = state["questions"][state["current"]]
        if not text and audio_path:
            text = transcribe_audio(audio_path)

        answer = text or "(No valid response)"
        result = ask_next_question(question, answer)
        score = result["score"]
        feedback = result["feedback"]

        interview_data.append({
            "question": question,
            "answer": answer,
            "score": score,
            "feedback": feedback
        })

        state["answers"].append((question, answer, score))

        avg_score = sum(s for _, _, s in state["answers"]) / len(state["answers"])
        task, final_feedback = generate_follow_up(avg_score, state["job_data"], state["cv_text"])

        return f"Score: {score}\nFeedback: {feedback}", task.get("instructions", ""), final_feedback, state, ""
    except Exception as e:
        return "", "", "", state, str(e)

def go_to_next_question(state):
    try:
        if time.time() - state["start_time"] > state["duration"]:
            return "⏳ Interview time is over. Thank you!", state, ""

        state["current"] += 1

        if state["current"] < len(state["questions"]):
            return state["questions"][state["current"]], state, ""
        else:
            new_q = generate_contextual_question(state)
            state["questions"].append(new_q)
            return new_q, state, ""
    except Exception as e:
        return "❌ Error getting next question", state, str(e)

def download():
    try:
        return download_results()
    except Exception as e:
        return str(e)

# -------------------- Gradio UI --------------------
with gr.Blocks(theme='Respair/Shiki@1.2.1') as app:
    gr.Markdown("""
        # 🎤 AI Interviewer
        Upload your CV, paste a link, add job requirements, and begin an interactive mock interview with voice and text.
    """)

    with gr.Row():
        cv_file = gr.File(label="📄 Upload CV")
        cv_url = gr.Textbox(label="🌐 Or paste CV Link")

    job_input = gr.Textbox(label="🏢 Job Requirements or URL")
    timer_dropdown = gr.Dropdown(choices=["20", "40", "60", "80"], label="⏱️ Interview Duration (minutes)", value="20")

    start_btn = gr.Button("🚀 Start Interview")

    question_output = gr.Textbox(label="🧠 Interview Question", lines=2)
    mic_input = gr.Audio(type="filepath", label="🎙️ Speak your answer")
    text_input = gr.Textbox(label="✍️ Or type your answer")
    submit_btn = gr.Button("✅ Submit Answer")
    next_btn = gr.Button("➡️ Next Question")
    pass_btn = gr.Button("⏭️ Pass This Question")

    score_output = gr.Textbox(label="📊 Score & Feedback")
    task_output = gr.Textbox(label="🧪 Extra Task (if any)")
    feedback_output = gr.Textbox(label="🧾 Final Feedback")
    download_btn = gr.Button("📥 Download Full Interview Report")
    download_file = gr.File()
    error_box = gr.Textbox(label="🚨 Error Log (if any)", lines=2, interactive=False)
    state = gr.State({})

    start_btn.click(fn=start, inputs=[cv_file, cv_url, job_input, timer_dropdown], outputs=[question_output, state, error_box])
    submit_btn.click(fn=submit_answer, inputs=[mic_input, text_input, state], outputs=[score_output, task_output, feedback_output, state, error_box])
    next_btn.click(fn=go_to_next_question, inputs=[state], outputs=[question_output, state, error_box])
    pass_btn.click(fn=go_to_next_question, inputs=[state], outputs=[question_output, state, error_box])
    download_btn.click(fn=download, outputs=download_file)

app.launch()


100%|███████████████████████████████████████| 139M/139M [00:03<00:00, 40.8MiB/s]
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


theme_schema%401.2.1.json:   0%|          | 0.00/14.5k [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://fbfcf3ac24c8d48e2c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import gradio as gr
import pandas as pd
import uuid
import random
import chromadb
import datetime
import fitz  # PyMuPDF for PDF reading
import mimetypes
import whisper
import re
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.runnables import RunnableLambda

# -------------------- LLM Setup --------------------
llm = ChatGroq(
    temperature=0,
    groq_api_key="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",
    model_name="llama-3.3-70b-versatile"
)

model_whisper = whisper.load_model("base")

# -------------------- Question Bank --------------------
question_bank = {
    "General & Motivational": [
        "Can you introduce yourself?", "Why do you want to join this company?",
        "What motivates you to come to work every day?", "What makes you different from others?",
        "Where do you see yourself in 5 years?", "Why should we hire you?"
    ],
    "Situational & Skill-Based": [
        "Describe a time you handled failure.", "Tell me about a tight deadline situation.",
        "Describe a challenge at work and how you overcame it.",
        "Have you solved a problem without full info?", "How do you deal with stress?"
    ],
    "Teamwork & Leadership": [
        "How would teammates describe you?", "Tell me about a team conflict you resolved.",
        "Have you ever led a team? What was your approach?", "How do you give/receive feedback?"
    ],
    "Work Style & Personality": [
        "How do you stay organized?", "What's your strength and weakness?",
        "Do you prefer team or solo work?", "How do you keep up with trends?"
    ]
}

def get_random_questions(count=6):
    all_qs = [q for group in question_bank.values() for q in group]
    return random.sample(all_qs, min(count, len(all_qs)))

# -------------------- Loaders --------------------
def load_cv(cv_file=None, cv_url=None):
    if cv_url:
        loader = WebBaseLoader(cv_url)
        return loader.load().pop().page_content
    elif cv_file:
        file_type, _ = mimetypes.guess_type(cv_file.name)
        if file_type == 'application/pdf':
            try:
                doc = fitz.open(cv_file.name)
                text = "\n".join([page.get_text() for page in doc])
                doc.close()
                return text
            except Exception as e:
                return f"Failed to read PDF: {str(e)}"
        else:
            try:
                return cv_file.read().decode("utf-8")
            except:
                return "Unsupported file format or decoding failed."
    return ""

def extract_job_info(job_text_or_url):
    try:
        if job_text_or_url.startswith("http"):
            loader = WebBaseLoader(job_text_or_url)
            page_data = loader.load().pop().page_content
        else:
            page_data = job_text_or_url

        parser = JsonOutputParser()

        prompt = PromptTemplate.from_template("""
        You are a hiring analyst. Analyze the following job description text and return structured job details in JSON format.

        ### INPUT:
        {page_data}

        ### OUTPUT FORMAT:
        {format_instructions}
        """).partial(format_instructions=parser.get_format_instructions())

        chain = prompt | llm | RunnableLambda(lambda x: x.content.strip())
        result = chain.invoke({'page_data': page_data})
        return parser.parse(result)

    except Exception as e:
        print("⚠️ Failed to extract job info:", str(e))
        return {
            "role": "Unknown",
            "experience": "Not specified",
            "skills": [],
            "description": page_data[:500] + "..."
        }

# -------------------- Interview Logic --------------------
interview_data = []

def ask_next_question(question, answer):
    prompt = PromptTemplate.from_template("""
        ### QUESTION:
        {question}

        ### ANSWER:
        {answer}

        ### INSTRUCTION:
        Score the answer from 0 to 100 based on clarity, relevance, and confidence.
        Give short feedback and return JSON:
        {{"score": 85, "feedback": "Good explanation but lacked structure."}}
    """)
    chain = prompt | llm | RunnableLambda(lambda x: x.content.strip())
    raw_output = chain.invoke({"question": question, "answer": answer})

    match = re.search(r"\{.*\}", raw_output, re.DOTALL)
    if match:
        json_part = match.group()
        parser = JsonOutputParser()
        return parser.parse(json_part)
    else:
        raise ValueError(f"Invalid response format: {raw_output}")

def transcribe_audio(audio_path):
    try:
        result = model_whisper.transcribe(audio_path)
        return result["text"].strip()
    except Exception as e:
        return f"(Transcription failed: {str(e)})"

def generate_follow_up(score, job_data, cv_text):
    if score < 60:
        task_prompt = PromptTemplate.from_template("""
            ### JOB:
            {job_data}

            ### INSTRUCTION:
            Generate a task (coding/math/business) suitable for this role.
            Return JSON: {{"task_type": "math", "instructions": "Solve this..."}}
        """)
        task_result = (task_prompt | llm | RunnableLambda(lambda x: x.content.strip())).invoke({"job_data": str(job_data)})
        parser = JsonOutputParser()
        task_data = parser.parse(task_result)

        feedback_prompt = PromptTemplate.from_template("""
            ### CV:
            {cv_text}

            ### INSTRUCTION:
            Give feedback on areas of improvement in 3 lines.
        """)
        feedback_result = (feedback_prompt | llm).invoke({"cv_text": cv_text})
        return task_data, feedback_result.content.strip()
    return {}, ""

def download_results():
    df = pd.DataFrame(interview_data)
    filename = f"interview_results_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
    df.to_csv(filename, index=False)
    return filename

# -------------------- Gradio UI --------------------
with gr.Blocks(theme='Respair/Shiki@1.2.1') as app:
    gr.Markdown("""
        # 🎤 AI Interviewer
        Upload your CV, paste a link, add job requirements, and begin an interactive mock interview with voice and text.
    """)

    with gr.Row():
        cv_file = gr.File(label="📄 Upload CV")
        cv_url = gr.Textbox(label="🌐 Or paste CV Link")

    job_input = gr.Textbox(label="🏢 Job Requirements or URL")
    start_btn = gr.Button("🚀 Start Interview")

    question_output = gr.Textbox(label="🧠 Interview Question", lines=2)
    mic_input = gr.Audio(type="filepath", label="🎙️ Speak your answer")
    text_input = gr.Textbox(label="✍️ Or type your answer")
    submit_btn = gr.Button("✅ Submit Answer")
    next_btn = gr.Button("⏭️ Next Question")

    score_output = gr.Textbox(label="📊 Score & Feedback")
    task_output = gr.Textbox(label="🧪 Extra Task (if any)")
    feedback_output = gr.Textbox(label="🧾 Final Feedback")
    download_btn = gr.Button("📥 Download Full Interview Report")
    download_file = gr.File()
    error_box = gr.Textbox(label="🚨 Error Log (if any)", lines=2, interactive=False)
    state = gr.State({})

    def start(cv_file, cv_url, job_input):
        try:
            cv_text = load_cv(cv_file, cv_url)
            job_data = extract_job_info(job_input)
            questions = get_random_questions(8)
            new_state = {
                "cv_text": cv_text,
                "job_data": job_data,
                "questions": questions,
                "current": 0,
                "answers": []
            }
            return questions[0] if questions else "No questions available.", new_state, ""
        except Exception as e:
            return "❌ Error occurred while starting interview.", {}, str(e)

    def submit_answer(audio_path, text, state):
        try:
            question = state["questions"][state["current"]]
            if not text and audio_path:
                text = transcribe_audio(audio_path)

            answer = text or "(No valid response)"
            result = ask_next_question(question, answer)
            score = result["score"]
            feedback = result["feedback"]

            interview_data.append({
                "question": question,
                "answer": answer,
                "score": score,
                "feedback": feedback
            })

            state["answers"].append((question, answer, score))

            avg_score = sum(s for _, _, s in state["answers"]) / len(state["answers"])
            task, final_feedback = generate_follow_up(avg_score, state["job_data"], state["cv_text"])

            return f"Score: {score}\nFeedback: {feedback}", task.get("instructions", ""), final_feedback, state, ""
        except Exception as e:
            return "", "", "", state, str(e)

    def go_to_next_question(state):
        try:
            state["current"] += 1
            if state["current"] < len(state["questions"]):
                return state["questions"][state["current"]], state, ""
            else:
                return "✅ Interview Complete", state, ""
        except Exception as e:
            return "❌ Error moving to next question", state, str(e)

    def download():
        try:
            return download_results()
        except Exception as e:
            return str(e)

    start_btn.click(fn=start, inputs=[cv_file, cv_url, job_input], outputs=[question_output, state, error_box])
    submit_btn.click(fn=submit_answer, inputs=[mic_input, text_input, state], outputs=[score_output, task_output, feedback_output, state, error_box])
    next_btn.click(fn=go_to_next_question, inputs=[state], outputs=[question_output, state, error_box])
    download_btn.click(fn=download, outputs=download_file)

app.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://4867b73982fd3886c9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


