In [1]:
import os, time, queue, re, contextlib, threading, sys
import sounddevice as sd
from scipy.io.wavfile import write
import cv2, numpy as np

try:
    import whisper
except:
    whisper = None

try:
    import pyttsx3
    TTS_AVAILABLE = True
except:
    pyttsx3 = None
    TTS_AVAILABLE = False

try:
    import google.generativeai as genai
    GEMINI_AVAILABLE = True
except:
    genai = None
    GEMINI_AVAILABLE = False

import PyPDF2, docx
from tkinter import Tk
from tkinter.filedialog import askopenfilename

print("Instructions:")
print("üéôÔ∏è Speak your answers naturally. The system records automatically.")
print("‚è±Ô∏è If you stay silent for 10 seconds or press 'q', it moves to next question.")
print("üö™ Press 'Esc' anytime to exit.\n")

GEMINI_API_KEY = "AIzaSyB3lGV8ju_l86r5_j465vTsXXW22yqddYY"
GENIE_MODEL_NAME = "gemini-2.0-flash"
WHISPER_MODEL_NAME = "medium"

AUDIO_FILENAME = "candidate_answer.wav"
SAMPLE_RATE = 16000
SILENCE_DURATION = 10.0
SILENCE_THRESHOLD = 0.01
WIN_W, WIN_H = 960, 540

MAX_QUESTION_WORDS = 14
TECH_QUESTIONS = 6
HR_QUESTIONS = 3
TOTAL_QUESTIONS = 1 + TECH_QUESTIONS + HR_QUESTIONS
GEN_RETRY = 1 

gen_model = None
if GEMINI_AVAILABLE and GEMINI_API_KEY:
    try:
        genai.configure(api_key=GEMINI_API_KEY)
        gen_model = genai.GenerativeModel(GENIE_MODEL_NAME)
        print("Gemini configured.")
    except Exception as e:
        print("Gemini init failed:", e)

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")
smile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_smile.xml")

whisper_model = None
if whisper:
    try:
        print("Loading Whisper model:", WHISPER_MODEL_NAME)
        whisper_model = whisper.load_model(WHISPER_MODEL_NAME)
        print("Whisper loaded.")
    except Exception as e:
        print("Whisper load failed:", e)

def speak_blocking(text):
    if not TTS_AVAILABLE:
        print("AI:", text)
        return
    try:
        engine = pyttsx3.init()
        engine.setProperty('rate', 170)
        engine.say(str(text))
        engine.runAndWait()
    except Exception:
        print("AI:", text)

def clean_text(t):
    if t is None: return ""
    t = re.sub(r'[^\x00-\x7F]+', ' ', str(t))
    return re.sub(r'\s+', ' ', t).strip()

def wrap_text(text, max_chars=40):
    if not text:
        return [""]
    words, lines, cur = text.split(), [], ""
    for w in words:
        if len((cur + " " + w).strip()) <= max_chars:
            cur = (cur + " " + w).strip()
        else:
            lines.append(cur)
            cur = w
    if cur: lines.append(cur)
    return lines

def extract_text_from_pdf(path):
    text = ""
    try:
        with open(path,"rb") as f:
            r = PyPDF2.PdfReader(f)
            for p in r.pages:
                text += (p.extract_text() or "") + "\n"
    except:
        pass
    return clean_text(text)

def extract_text_from_docx(path):
    try:
        d = docx.Document(path)
        return clean_text("\n".join(p.text for p in d.paragraphs if p.text.strip()))
    except:
        return ""

def extract_text_from_file(path):
    if not path: return ""
    p = path.lower()
    if p.endswith(".pdf"): return extract_text_from_pdf(path)
    if p.endswith((".doc", ".docx")): return extract_text_from_docx(path)
    return ""

def record_audio(stop_flag, filename=AUDIO_FILENAME):
    q_audio, frames = queue.Queue(), []
    last_loud = time.time()
    speech_started = False

    def _cb(indata, frames_count, time_info, status):
        q_audio.put(indata.copy())

    try:
        with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, callback=_cb):
            while not stop_flag["next"]:
                try:
                    data = q_audio.get(timeout=0.05)
                    frames.append(data)
                    rms = np.sqrt(np.mean(data**2))
                    if rms > SILENCE_THRESHOLD:
                        speech_started = True
                        last_loud = time.time()
                    elif speech_started and (time.time() - last_loud) > SILENCE_DURATION:
                        stop_flag["next"] = True
                except queue.Empty:
                    continue
    except Exception as e:
        print("Audio issue:", e)

    if frames:
        audio = np.concatenate(frames, axis=0).flatten()
        audio /= (np.max(np.abs(audio)) + 1e-9)
        write(filename, SAMPLE_RATE, (audio * 32767).astype(np.int16))

def transcribe_audio(filename=AUDIO_FILENAME):
    if whisper_model is None or not os.path.exists(filename):
        return ""
    with contextlib.redirect_stdout(None):
        try:
            r = whisper_model.transcribe(filename, fp16=False, language="en")
            return clean_text(r.get("text", ""))
        except Exception as e:
            print("Whisper error:", e)
            return ""

def generate_next_question_background(jd_summary, resume_text, used_set, container, role):
    jd_short = jd_summary if len(jd_summary) <= 800 else jd_summary[:800] + "..."
    resume_short = resume_text if len(resume_text) <= 1200 else resume_text[:1200] + "..."
    prompt = (
        f"Generate ONE {role} interview question (max {MAX_QUESTION_WORDS} words). "
        f"JD: {jd_short}. Resume: {resume_short}. "
        f"Only output the question. Avoid repeated or nonsensical questions. "
        f"Previous questions: {list(used_set)}."
    )
    if gen_model is None:
        container[0] = None
        return
    for _ in range(GEN_RETRY):
        try:
            r = gen_model.generate_content(prompt)
            q = clean_text(r.text.split("\n")[0])
            q = re.sub(r'^(question\s*(\d*|once|for you)\s*[:\-]?)', '', q, flags=re.I).strip()
            q = q.split('.')[0].strip()
            if q and q not in used_set:
                used_set.add(q)
                container[0] = q
                return
        except:
            time.sleep(0.4)
    container[0] = None

def generate_feedback(question, answer):
    if not gen_model:
        return ""
    prompt = (
        f"Provide a brief positive feedback or encouragement (max 20 words) "
        f"on this answer: '{answer}' "
        f"for the question: '{question}'. "
        f"Avoid negative comments. Only output the feedback."
    )
    try:
        r = gen_model.generate_content(prompt)
        fb = clean_text(r.text.split("\n")[0])
        return fb
    except:
        return ""

def run_interview():
    candidate = input("Enter your name: ").strip() or "Candidate"
    print(f"\nWelcome {candidate}! Please upload your resume.")
    speak_blocking("Please upload your resume.")
    Tk().withdraw()
    resume_path = askopenfilename(title="Select Resume", filetypes=[("Documents","*.pdf *.docx")])
    resume_text = extract_text_from_file(resume_path) if resume_path else ""
    print("Resume uploaded.\n")

    jd_summary = (
        "We are looking for a motivated AI/ML Engineer with 1‚Äì3 years of hands-on experience "
        "in building and deploying machine learning models."
    )

    used = set()
    current_q = "Introduce yourself."
    used.add(current_q)

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Cannot open webcam.")
        return
    cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
    cv2.namedWindow("AI Interview", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("AI Interview", WIN_W, WIN_H)

    all_answers = []

    total_qs = TOTAL_QUESTIONS

    for q_index in range(1, total_qs + 1):
        print(f"\nQuestion {q_index}/{total_qs}: {current_q}")
        speak_blocking(f"Question {q_index}. {current_q}")

        stop_flag = {"next": False}
        rec_thread = threading.Thread(target=record_audio, args=(stop_flag,), daemon=True)
        rec_thread.start()

        wrapped = wrap_text(current_q, 40)

        while not stop_flag["next"]:
            ret, frame = cap.read()
            if not ret:
                time.sleep(0.01)
                continue
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = face_cascade.detectMultiScale(gray, 1.3, 5)
            for (x, y, w, h) in faces:
                cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
                roi_gray = gray[y:y + h, x:x + w]
                roi_color = frame[y:y + h, x:x + w]
                eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 3)
                for (ex, ey, ew, eh) in eyes:
                    cv2.rectangle(roi_color, (ex, ey), (ex + ew, ey + eh), (0, 255, 0), 2)
                mouth = smile_cascade.detectMultiScale(roi_gray, 1.7, 20)
                for (lx, ly, lw, lh) in mouth:
                    cv2.rectangle(roi_color, (lx, ly), (lx + lw, ly + lh), (0, 0, 255), 2)
            y0 = 30
            for line in wrapped:
                cv2.putText(frame, line, (20, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
                y0 += 30

            cv2.imshow("AI Interview", frame)
            if cv2.waitKey(1) & 0xFF in [ord('q'), 27]:
                stop_flag["next"] = True
                break

        if rec_thread.is_alive():
            rec_thread.join(timeout=1.0)

        transcription = {"text": ""}
        trans_done_flag = threading.Event()

        def _transcribe():
            transcription["text"] = transcribe_audio(AUDIO_FILENAME)
            trans_done_flag.set()

        wt = threading.Thread(target=_transcribe, daemon=True)
        wt.start()

        while not trans_done_flag.is_set():
            ret, frame = cap.read()
            if not ret:
                time.sleep(0.01)
                continue
            cv2.putText(frame, "Processing answer...", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
            cv2.imshow("AI Interview", frame)
            cv2.waitKey(1)

        answer_text = transcription["text"]
        print("Answer:", answer_text)
        all_answers.append(answer_text)

        # Generate feedback and speak it
        feedback = ""
        if q_index == 1:
            feedback = f"Nice to meet you, {candidate}."
        else:
            feedback = generate_feedback(current_q, answer_text)

        if feedback:
            print("Feedback:", feedback)
            speak_blocking(feedback)

        # Generate next question except if last question
        if q_index < total_qs:
            next_container = [None]
            if q_index == 1 or (2 <= q_index <= 1 + TECH_QUESTIONS - 1):
                role = "Technical"
            else:
                role = "HR"
            generate_next_question_background(jd_summary, resume_text, used, next_container, role)
            current_q = next_container[0] or ("Tell me about a project from your resume." if role=="Technical" else "Why do you want to join our company?")

    cap.release()
    cv2.destroyAllWindows()
    print("\nInterview Completed.")
    speak_blocking(f"Thank you for the interview, {candidate}.")

if __name__ == "__main__":
    run_interview()


Instructions:
üéôÔ∏è Speak your answers naturally. The system records automatically.
‚è±Ô∏è If you stay silent for 10 seconds or press 'q', it moves to next question.
üö™ Press 'Esc' anytime to exit.

Gemini configured.
Loading Whisper model: medium
Whisper loaded.


Enter your name:  Mahesh



Welcome Mahesh! Please upload your resume.
Resume uploaded.


Question 1/10: Introduce yourself.
Answer: Good morning, this is Mahesh and I am from Warangal. Thank you.
Feedback: Nice to meet you, Mahesh.

Question 2/10: Explain how you handled imbalanced classes in a previous ML project
Answer: Yeah, I used one technique that is SMOTE technique which is used to imbalance data set to balance. Thank you.
Feedback: Great job using SMOTE to address the class imbalance! It's a valuable technique to know.

Question 3/10: Explain a time you deployed a machine learning model and the challenges faced
Answer: Yeah, I usually like and don't know about this answer, sorry.
Feedback: Thanks for sharing! Getting started is the hardest part. Keep thinking about your experiences!

Question 4/10: Explain how you optimized your Naive Bayes spam detection model for deployment
Answer: First, I load the data. Second step is tokenization. That means the whole corpus or sentence are breaking down into parts

In [None]:
import os, time, queue, re, contextlib, threading, sys
import sounddevice as sd
from scipy.io.wavfile import write
import cv2, numpy as np

try:
    import whisper
except:
    whisper = None

try:
    import pyttsx3
    TTS_AVAILABLE = True
except:
    pyttsx3 = None
    TTS_AVAILABLE = False

try:
    import google.generativeai as genai
    GEMINI_AVAILABLE = True
except:
    genai = None
    GEMINI_AVAILABLE = False

import PyPDF2, docx
from tkinter import Tk
from tkinter.filedialog import askopenfilename

print("Instructions:")
print("üéôÔ∏è Speak your answers naturally. The system records automatically.")
print("‚è±Ô∏è If you stay silent for 10 seconds or press 'q', it moves to next question.")
print("üö™ Press 'Esc' anytime to exit.\n")

GEMINI_API_KEY = "AIzaSyB3lGV8ju_l86r5_j465vTsXXW22yqddYY"
GENIE_MODEL_NAME = "gemini-2.0-flash"
WHISPER_MODEL_NAME = "medium"

AUDIO_FILENAME = "candidate_answer.wav"
SAMPLE_RATE = 16000
SILENCE_DURATION = 10.0
SILENCE_THRESHOLD = 0.01
WIN_W, WIN_H = 960, 540

MAX_QUESTION_WORDS = 14
TECH_QUESTIONS = 6
HR_QUESTIONS = 3
TOTAL_QUESTIONS = 1 + TECH_QUESTIONS + HR_QUESTIONS
GEN_RETRY = 1

gen_model = None
if GEMINI_AVAILABLE and GEMINI_API_KEY:
    try:
        genai.configure(api_key=GEMINI_API_KEY)
        gen_model = genai.GenerativeModel(GENIE_MODEL_NAME)
        print("Gemini configured.")
    except Exception as e:
        print("Gemini init failed:", e)

face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")
smile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_smile.xml")

whisper_model = None
if whisper:
    try:
        print("Loading Whisper model:", WHISPER_MODEL_NAME)
        whisper_model = whisper.load_model(WHISPER_MODEL_NAME)
        print("Whisper loaded.")
    except Exception as e:
        print("Whisper load failed:", e)

def speak_blocking(text):
    if not TTS_AVAILABLE:
        print("AI:", text)
        return
    try:
        engine = pyttsx3.init()
        engine.setProperty('rate', 170)
        engine.say(str(text))
        engine.runAndWait()
    except Exception:
        print("AI:", text)

def clean_text(t):
    if t is None: return ""
    t = re.sub(r'[^\x00-\x7F]+', ' ', str(t))
    return re.sub(r'\s+', ' ', t).strip()

def wrap_text(text, max_chars=40):
    if not text:
        return [""]
    words, lines, cur = text.split(), [], ""
    for w in words:
        if len((cur + " " + w).strip()) <= max_chars:
            cur = (cur + " " + w).strip()
        else:
            lines.append(cur)
            cur = w
    if cur: lines.append(cur)
    return lines

def extract_text_from_pdf(path):
    text = ""
    try:
        with open(path,"rb") as f:
            r = PyPDF2.PdfReader(f)
            for p in r.pages:
                text += (p.extract_text() or "") + "\n"
    except:
        pass
    return clean_text(text)

def extract_text_from_docx(path):
    try:
        d = docx.Document(path)
        return clean_text("\n".join(p.text for p in d.paragraphs if p.text.strip()))
    except:
        return ""

def extract_text_from_file(path):
    if not path: return ""
    p = path.lower()
    if p.endswith(".pdf"): return extract_text_from_pdf(path)
    if p.endswith((".doc", ".docx")): return extract_text_from_docx(path)
    return ""

def record_audio(stop_flag, filename=AUDIO_FILENAME):
    q_audio, frames = queue.Queue(), []
    last_loud = time.time()
    speech_started = False

    def _cb(indata, frames_count, time_info, status):
        q_audio.put(indata.copy())

    try:
        with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, callback=_cb):
            while not stop_flag["next"]:
                try:
                    data = q_audio.get(timeout=0.05)
                    frames.append(data)
                    rms = np.sqrt(np.mean(data**2))
                    if rms > SILENCE_THRESHOLD:
                        speech_started = True
                        last_loud = time.time()
                    elif speech_started and (time.time() - last_loud) > SILENCE_DURATION:
                        stop_flag["next"] = True
                except queue.Empty:
                    continue
    except Exception as e:
        print("Audio issue:", e)

    if frames:
        audio = np.concatenate(frames, axis=0).flatten()
        audio /= (np.max(np.abs(audio)) + 1e-9)
        write(filename, SAMPLE_RATE, (audio * 32767).astype(np.int16))

def transcribe_audio(filename=AUDIO_FILENAME):
    if whisper_model is None or not os.path.exists(filename):
        return ""
    with contextlib.redirect_stdout(None):
        try:
            r = whisper_model.transcribe(filename, fp16=False, language="en")
            return clean_text(r.get("text", ""))
        except Exception as e:
            print("Whisper error:", e)
            return ""

def generate_next_question_background(jd_summary, resume_text, used_set, container, role):
    jd_short = jd_summary if len(jd_summary) <= 800 else jd_summary[:800] + "..."
    resume_short = resume_text if len(resume_text) <= 1200 else resume_text[:1200] + "..."
    prompt = (
        f"Generate ONE {role} interview question (max {MAX_QUESTION_WORDS} words). "
        f"JD: {jd_short}. Resume: {resume_short}. "
        f"Only output the question. Avoid repeated or nonsensical questions. "
        f"Previous questions: {list(used_set)}."
    )
    if gen_model is None:
        container[0] = None
        return
    for _ in range(GEN_RETRY):
        try:
            r = gen_model.generate_content(prompt)
            q = clean_text(r.text.split("\n")[0])
            q = re.sub(r'^(question\s*(\d*|once|for you)\s*[:\-]?)', '', q, flags=re.I).strip()
            q = q.split('.')[0].strip()
            if q and q not in used_set:
                used_set.add(q)
                container[0] = q
                return
        except:
            time.sleep(0.4)
    container[0] = None

def generate_feedback(question, answer):
    if not gen_model:
        return f"Thank you for your answer."
    prompt = (
        f"Provide a brief positive feedback or encouragement (max 20 words) "
        f"on this answer: '{answer}' "
        f"for the question: '{question}'. "
        f"Avoid negative comments. Only output the feedback."
    )
    try:
        r = gen_model.generate_content(prompt)
        fb = clean_text(r.text.split("\n")[0])
        return fb or "Thank you for your answer."
    except:
        return "Thank you for your answer."

def generate_first_answer_feedback(candidate_name, candidate_answer):
    if not gen_model:
        return f"Nice to meet you, {candidate_name}."
    prompt = (
        f"You are an AI assistant. The candidate said: '{candidate_answer}'. "
        f"Generate a warm, friendly, and natural greeting response including their name {candidate_name}. "
        f"Do NOT be generic. Keep it brief and positive."
    )
    try:
        r = gen_model.generate_content(prompt)
        reply = clean_text(r.text.split("\n")[0])
        return reply or f"Nice to meet you, {candidate_name}."
    except:
        return f"Nice to meet you, {candidate_name}."

def run_interview():
    candidate = input("Enter your name: ").strip() or "Candidate"
    print(f"\nWelcome {candidate}! Please upload your resume.")
    speak_blocking("Please upload your resume.")
    Tk().withdraw()
    resume_path = askopenfilename(title="Select Resume", filetypes=[("Documents","*.pdf *.docx")])
    resume_text = extract_text_from_file(resume_path) if resume_path else ""
    print("Resume uploaded.\n")

    jd_summary = (
        "We are looking for a motivated AI/ML Engineer with 1‚Äì3 years of hands-on experience "
        "in building and deploying machine learning models."
    )

    used = set()
    current_q = "Introduce yourself."
    used.add(current_q)

    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Cannot open webcam.")
        return
    cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
    cv2.namedWindow("AI Interview", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("AI Interview", WIN_W, WIN_H)

    all_answers = []

    total_qs = TOTAL_QUESTIONS

    for q_index in range(1, total_qs + 1):
        print(f"\nQuestion {q_index}/{total_qs}: {current_q}")
        speak_blocking(f"Question {q_index}. {current_q}")

        stop_flag = {"next": False}
        rec_thread = threading.Thread(target=record_audio, args=(stop_flag,), daemon=True)
        rec_thread.start()

        wrapped = wrap_text(current_q, 40)

        while not stop_flag["next"]:
            ret, frame = cap.read()
            if not ret:
                time.sleep(0.01)
                continue
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = face_cascade.detectMultiScale(gray, 1.3, 5)
            for (x, y, w, h) in faces:
                cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
                roi_gray = gray[y:y + h, x:x + w]
                roi_color = frame[y:y + h, x:x + w]
                eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 3)
                for (ex, ey, ew, eh) in eyes:
                    cv2.rectangle(roi_color, (ex, ey), (ex + ew, ey + eh), (0, 255, 0), 2)
                mouth = smile_cascade.detectMultiScale(roi_gray, 1.7, 20)
                for (lx, ly, lw, lh) in mouth:
                    cv2.rectangle(roi_color, (lx, ly), (lx + lw, ly + lh), (0, 0, 255), 2)
            y0 = 30
            for line in wrapped:
                cv2.putText(frame, line, (20, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
                y0 += 30
            cv2.imshow("AI Interview", frame)
            if cv2.waitKey(1) & 0xFF in [ord('q'), 27]:
                stop_flag["next"] = True
                break

        if rec_thread.is_alive():
            rec_thread.join(timeout=1.0)

        transcription = {"text": ""}
        trans_done_flag = threading.Event()

        def _transcribe():
            transcription["text"] = transcribe_audio(AUDIO_FILENAME)
            trans_done_flag.set()

        wt = threading.Thread(target=_transcribe, daemon=True)
        wt.start()

        while not trans_done_flag.is_set():
            ret, frame = cap.read()
            if not ret:
                time.sleep(0.01)
                continue
            cv2.putText(frame, "Processing answer...", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
            cv2.imshow("AI Interview", frame)
            cv2.waitKey(1)

        answer_text = transcription["text"]
        print("Answer:", answer_text)
        all_answers.append(answer_text)

        # Generate dynamic AI feedback
        if q_index == 1:
            feedback = generate_first_answer_feedback(candidate, answer_text)
        else:
            feedback = generate_feedback(current_q, answer_text)
        if feedback:
            print("Feedback:", feedback)
            speak_blocking(feedback)

        # Generate next question except if last question
        if q_index < total_qs:
            next_container = [None]
            if q_index == 1 or (2 <= q_index <= 1 + TECH_QUESTIONS - 1):
                role = "Technical"
            else:
                role = "HR"
            generate_next_question_background(jd_summary, resume_text, used, next_container, role)
            current_q = next_container[0] or ("Tell me about a project from your resume." if role=="Technical" else "Why do you want to join our company?")

    cap.release()
    cv2.destroyAllWindows()
    print("\nInterview Completed.")
    speak_blocking(f"Thank you for the interview, {candidate}.")

if __name__ == "__main__":
    run_interview()


Instructions:
üéôÔ∏è Speak your answers naturally. The system records automatically.
‚è±Ô∏è If you stay silent for 10 seconds or press 'q', it moves to next question.
üö™ Press 'Esc' anytime to exit.

Gemini configured.
Loading Whisper model: medium
Whisper loaded.


Enter your name:  Mahesh



Welcome Mahesh! Please upload your resume.
Resume uploaded.


Question 1/10: Introduce yourself.
Answer: Good afternoon, this is Mahesh and I am from Warangal.
Feedback: "Good afternoon, Mahesh! Great to have you join us today."

Question 2/10: Explain how you handled imbalanced data in a previous ML project


In [None]:
# ================= AI INTERVIEW SYSTEM =================
# - Keeps webcam live while long tasks run
# - Feedback always spoken BEFORE next question generation
# - Next question is NOT spoken at the end of previous loop (prevents duplicate)
# - Uses background threads for transcription / generation / TTS
# ======================================================

import os
import time
import queue
import re
import contextlib
import threading
import sys
import sounddevice as sd
from scipy.io.wavfile import write
import cv2
import numpy as np
# ---------------- optional libraries ----------------
try:
    import whisper
except:
    whisper = None

try:
    import pyttsx3
    TTS_AVAILABLE = True
except:
    pyttsx3 = None
    TTS_AVAILABLE = False

try:
    import google.generativeai as genai
    GEMINI_AVAILABLE = True
except:
    genai = None
    GEMINI_AVAILABLE = False


import PyPDF2
import docx
from tkinter import Tk
from tkinter.filedialog import askopenfilename

# ---------------- basic instructions ----------------
print("Instructions:")
print("üéôÔ∏è Speak your answers naturally. The system records automatically.")
print("‚è±Ô∏è If you stay silent for 10 seconds or press 'q', it moves to next question.")
print("üö™ Press 'Esc' anytime to exit.\n")

# ---------------- config values ----------------
GEMINI_API_KEY = "AIzaSyB3lGV8ju_l86r5_j465vTsXXW22yqddYY"
GENIE_MODEL_NAME = "gemini-2.0-flash"
WHISPER_MODEL_NAME = "medium"

AUDIO_FILENAME = "candidate_answer.wav"
SAMPLE_RATE = 16000
SILENCE_DURATION = 10.0
SILENCE_THRESHOLD = 0.01
WIN_W, WIN_H = 960, 540

MAX_QUESTION_WORDS = 14
TECH_QUESTIONS = 6
HR_QUESTIONS = 3
TOTAL_QUESTIONS = 1 + TECH_QUESTIONS + HR_QUESTIONS
GEN_RETRY = 1

# ---------------- init gemini (if available) ----------------
gen_model = None
if GEMINI_AVAILABLE and GEMINI_API_KEY:
    try:
        genai.configure(api_key=GEMINI_API_KEY)
        gen_model = genai.GenerativeModel(GENIE_MODEL_NAME)
        print("Gemini configured.")
    except Exception as e:
        print("Gemini init failed:", e)

# ---------------- load CV cascades ----------------
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")
smile_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_smile.xml")

# ---------------- load whisper ----------------
whisper_model = None
if whisper:
    try:
        print("Loading Whisper model:", WHISPER_MODEL_NAME)
        whisper_model = whisper.load_model(WHISPER_MODEL_NAME)
        print("Whisper loaded.")
    except Exception as e:
        print("Whisper load failed:", e)

# ---------------- blocking TTS (used only at startup / final thank you) ----------------
def speak_blocking(text):
    """Speak text using pyttsx3, blocking until finished."""
    if not TTS_AVAILABLE:
        print("AI:", text)
        return
    try:
        engine = pyttsx3.init()
        engine.setProperty('rate', 170)
        engine.say(str(text))
        engine.runAndWait()
    except Exception:
        print("AI:", text)

# ---------------- non-blocking TTS with completion Event ----------------
def speak_async(text):
    """
    Speak text in a background thread and return a threading.Event()
    which will be set when speaking finishes. If TTS not available the
    event is set immediately (and it prints).
    """
    done = threading.Event()

    if not TTS_AVAILABLE:
        print("AI (no TTS):", text)
        done.set()
        return done

    def _run():
        try:
            engine = pyttsx3.init()
            engine.setProperty('rate', 170)
            engine.say(str(text))
            engine.runAndWait()
        except Exception:
            print("AI:", text)
        finally:
            done.set()

    threading.Thread(target=_run, daemon=True).start()
    return done

# ---------------- text cleanup & wrapping ----------------
def clean_text(t):
    if t is None:
        return ""
    t = re.sub(r'[^\x00-\x7F]+', ' ', str(t))
    return re.sub(r'\s+', ' ', t).strip()

def wrap_text(text, max_chars=40):
    """Wrap text for display on webcam."""
    if not text:
        return [""]
    words, lines, cur = text.split(), [], ""
    for w in words:
        if len((cur + " " + w).strip()) <= max_chars:
            cur = (cur + " " + w).strip()
        else:
            lines.append(cur)
            cur = w
    if cur:
        lines.append(cur)
    return lines

# ---------------- resume extraction ----------------
def extract_text_from_pdf(path):
    text = ""
    try:
        with open(path, "rb") as f:
            r = PyPDF2.PdfReader(f)
            for p in r.pages:
                text += (p.extract_text() or "") + "\n"
    except Exception:
        pass
    return clean_text(text)

def extract_text_from_docx(path):
    try:
        d = docx.Document(path)
        return clean_text("\n".join(p.text for p in d.paragraphs if p.text.strip()))
    except Exception:
        return ""

def extract_text_from_file(path):
    if not path:
        return ""
    p = path.lower()
    if p.endswith(".pdf"):
        return extract_text_from_pdf(path)
    if p.endswith((".doc", ".docx")):
        return extract_text_from_docx(path)
    return ""

# ---------------- audio recording ----------------
def record_audio(stop_flag, filename=AUDIO_FILENAME):
    """
    Record audio from microphone until silence (SILENCE_DURATION) after speech started,
    or until stop_flag['next'] becomes True (user pressed q).
    Saves file to 'filename'.
    """
    q_audio = queue.Queue()
    frames = []
    last_loud = time.time()
    speech_started = False

    def _cb(indata, frames_count, time_info, status):
        q_audio.put(indata.copy())

    try:
        with sd.InputStream(samplerate=SAMPLE_RATE, channels=1, callback=_cb):
            while not stop_flag["next"]:
                try:
                    data = q_audio.get(timeout=0.05)
                    frames.append(data)
                    rms = np.sqrt(np.mean(data**2))
                    if rms > SILENCE_THRESHOLD:
                        speech_started = True
                        last_loud = time.time()
                    elif speech_started and (time.time() - last_loud) > SILENCE_DURATION:
                        stop_flag["next"] = True
                except queue.Empty:
                    continue
    except Exception as e:
        print("Audio issue:", e)

    if frames:
        audio = np.concatenate(frames, axis=0).flatten()
        audio /= (np.max(np.abs(audio)) + 1e-9)
        write(filename, SAMPLE_RATE, (audio * 32767).astype(np.int16))

# ---------------- transcription ----------------
def transcribe_audio(filename=AUDIO_FILENAME):
    """Use Whisper to transcribe or return empty string if not available."""
    if whisper_model is None or not os.path.exists(filename):
        return ""
    # suppress whisper printing to stdout
    with contextlib.redirect_stdout(None):
        try:
            r = whisper_model.transcribe(filename, fp16=False, language="en")
            return clean_text(r.get("text", ""))
        except Exception as e:
            print("Whisper error:", e)
            return ""

# ---------------- gemini helpers ----------------
def generate_next_question_background(jd_summary, resume_text, used_set, container, role):
    """Generate a single next question using Gemini (stores in container[0] or None)."""
    jd_short = jd_summary if len(jd_summary) <= 800 else jd_summary[:800] + "..."
    resume_short = resume_text if len(resume_text) <= 1200 else resume_text[:1200] + "..."
    prompt = (
        f"Generate ONE {role} interview question (max {MAX_QUESTION_WORDS} words). "
        f"JD: {jd_short}. Resume: {resume_short}. "
        f"Only output the question. Avoid repeated or nonsensical questions. "
        f"Previous questions: {list(used_set)}."
    )
    if gen_model is None:
        container[0] = None
        return
    for _ in range(GEN_RETRY):
        try:
            r = gen_model.generate_content(prompt)
            q = clean_text(r.text.split("\n")[0])
            q = re.sub(r'^(question\s*\d*[:\-]?)', '', q, flags=re.I).strip()
            q = q.split('.')[0].strip()
            if q and q not in used_set:
                used_set.add(q)
                container[0] = q
                return
        except Exception:
            time.sleep(0.4)
    container[0] = None

def generate_feedback(question, answer):
    """Generate a short positive feedback for an answer using Gemini."""
    if not gen_model:
        return "Thank you for your answer."
    prompt = (
        f"Provide a brief positive feedback or encouragement (max 20 words) "
        f"on this answer: '{answer}' "
        f"for the question: '{question}'. "
        f"Avoid negative comments. Only output the feedback."
    )
    try:
        r = gen_model.generate_content(prompt)
        fb = clean_text(r.text.split("\n")[0])
        return fb or "Thank you for your answer."
    except Exception:
        return "Thank you for your answer."

def generate_first_answer_feedback(candidate_name, candidate_answer):
    """Generate warm greeting feedback for the first answer (introduction)."""
    if not gen_model:
        return f"Nice to meet you, {candidate_name}."
    prompt = (
        f"You are an AI assistant. The candidate said: '{candidate_answer}'. "
        f"Generate a warm, friendly, and natural greeting response including their name {candidate_name}. "
        f"Do NOT be generic. Keep it brief and positive."
    )
    try:
        r = gen_model.generate_content(prompt)
        reply = clean_text(r.text.split("\n")[0])
        return reply or f"Nice to meet you, {candidate_name}."
    except Exception:
        return f"Nice to meet you, {candidate_name}."

# ================= main interview flow =================
def run_interview():
    # ---------------- candidate + resume ----------------
    candidate = input("Enter your name: ").strip() or "Candidate"
    print(f"\nWelcome {candidate}! Please upload your resume.")
    # initial prompt ‚Äî small blocking call is acceptable
    speak_blocking("Please upload your resume.")
    Tk().withdraw()
    resume_path = askopenfilename(title="Select Resume", filetypes=[("Documents","*.pdf *.docx")])
    resume_text = extract_text_from_file(resume_path) if resume_path else ""
    print("Resume uploaded.\n")

    # example JD ‚Äî can be replaced with user input
    jd_summary = (
        "We are looking for full-stack developer and skills required: Java and SQL."
    )

    used = set()
    current_q = "Introduce yourself."
    used.add(current_q)

    # ---------------- camera setup ----------------
    cap = cv2.VideoCapture(0)
    if not cap.isOpened():
        print("Cannot open webcam.")
        return
    cap.set(cv2.CAP_PROP_BUFFERSIZE, 1)
    cv2.namedWindow("AI Interview", cv2.WINDOW_NORMAL)
    cv2.resizeWindow("AI Interview", WIN_W, WIN_H)

    all_answers = []
    total_qs = TOTAL_QUESTIONS

    try:
        for q_index in range(1, total_qs + 1):
            # ---------------- speak the question at the start of iteration ----------------
            print(f"\nQuestion {q_index}/{total_qs}: {current_q}")
            # Speak the question (non-blocking) and wait while keeping webcam live
            q_speak_evt = speak_async(f"Question {q_index}. {current_q}")
            # Display the question while it's being spoken
            while not q_speak_evt.is_set():
                ret, frame = cap.read()
                if not ret:
                    time.sleep(0.01)
                    continue
                wrapped = wrap_text(current_q, 40)
                y0 = 30
                for line in wrapped:
                    cv2.putText(frame, line, (20, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
                    y0 += 30
                cv2.putText(frame, "Listening shortly... (press 'q' to skip)", (20, WIN_H - 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (200, 200, 200), 1)
                cv2.imshow("AI Interview", frame)
                if cv2.waitKey(1) & 0xFF in [ord('q'), 27]:
                    # allow user to skip speaking
                    break

            # ---------------- recording ----------------
            stop_flag = {"next": False}
            rec_thread = threading.Thread(target=record_audio, args=(stop_flag,), daemon=True)
            rec_thread.start()

            wrapped = wrap_text(current_q, 40)

            # keep camera alive while recording
            while not stop_flag["next"]:
                ret, frame = cap.read()
                if not ret:
                    time.sleep(0.01)
                    continue
                gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
                faces = face_cascade.detectMultiScale(gray, 1.3, 5)
                for (x, y, w, h) in faces:
                    cv2.rectangle(frame, (x, y), (x + w, y + h), (255, 0, 0), 2)
                    roi_gray = gray[y:y + h, x:x + w]
                    roi_color = frame[y:y + h, x:x + w]
                    eyes = eye_cascade.detectMultiScale(roi_gray, 1.1, 3)
                    for (ex, ey, ew, eh) in eyes:
                        cv2.rectangle(roi_color, (ex, ey), (ex + ew, ey + eh), (0, 255, 0), 2)
                    mouth = smile_cascade.detectMultiScale(roi_gray, 1.7, 20)
                    for (lx, ly, lw, lh) in mouth:
                        cv2.rectangle(roi_color, (lx, ly), (lx + lw, ly + lh), (0, 0, 255), 2)
                y0 = 30
                for line in wrapped:
                    cv2.putText(frame, line, (20, y0), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2)
                    y0 += 30
                cv2.putText(frame, "Recording... (press 'q' to stop)", (20, WIN_H - 30),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 1)
                cv2.imshow("AI Interview", frame)
                key = cv2.waitKey(1) & 0xFF
                if key in [ord('q'), 27]:
                    stop_flag["next"] = True
                    break

            # ensure recording thread finishes saving file
            if rec_thread.is_alive():
                rec_thread.join(timeout=1.0)

            # ---------------- transcription (background) ----------------
            transcription = {"text": ""}
            trans_done_flag = threading.Event()

            def _transcribe():
                transcription["text"] = transcribe_audio(AUDIO_FILENAME)
                trans_done_flag.set()

            wt = threading.Thread(target=_transcribe, daemon=True)
            wt.start()

            while not trans_done_flag.is_set():
                ret, frame = cap.read()
                if not ret:
                    time.sleep(0.01)
                    continue
                cv2.putText(frame, "Processing answer...", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 255, 255), 2)
                cv2.imshow("AI Interview", frame)
                if cv2.waitKey(1) & 0xFF == 27:
                    break

            answer_text = transcription["text"]
            print("Answer:", answer_text)
            all_answers.append(answer_text)

            # ---------------- generate feedback (background) ----------------
            fb_container = [None]
            fb_done = threading.Event()

            def _gen_feedback():
                try:
                    if q_index == 1:
                        fb = generate_first_answer_feedback(candidate, answer_text)
                    else:
                        fb = generate_feedback(current_q, answer_text)
                    fb_container[0] = fb
                except Exception as e:
                    print("Feedback generation error:", e)
                    fb_container[0] = "Thank you for your answer."
                finally:
                    fb_done.set()

            threading.Thread(target=_gen_feedback, daemon=True).start()

            # keep camera alive while feedback generated
            while not fb_done.is_set():
                ret, frame = cap.read()
                if not ret:
                    time.sleep(0.01)
                    continue
                cv2.putText(frame, "Generating feedback...", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 200, 200), 2)
                cv2.imshow("AI Interview", frame)
                if cv2.waitKey(1) & 0xFF == 27:
                    break

            feedback = fb_container[0] or "Thank you for your answer."
            print("Feedback:", feedback)

            # ---------------- speak feedback (non-blocking) and wait while keeping webcam live ----------------
            speak_evt = speak_async(feedback)
            while not speak_evt.is_set():
                ret, frame = cap.read()
                if not ret:
                    time.sleep(0.01)
                    continue
                cv2.putText(frame, "Speaking feedback...", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (180, 180, 255), 2)
                cv2.imshow("AI Interview", frame)
                if cv2.waitKey(1) & 0xFF == 27:
                    break

            # ---------------- generate next question AFTER feedback is spoken ----------------
            if q_index < total_qs:
                next_container = [None]
                nq_done = threading.Event()

                def _gen_nextq():
                    try:
                        if q_index == 1 or (2 <= q_index <= 1 + TECH_QUESTIONS - 1):
                            role = "Technical"
                        else:
                            role = "HR"
                        generate_next_question_background(jd_summary, resume_text, used, next_container, role)
                    except Exception as e:
                        print("Next question error:", e)
                    finally:
                        nq_done.set()

                threading.Thread(target=_gen_nextq, daemon=True).start()

                # keep camera alive while next question is being generated
                while not nq_done.is_set():
                    ret, frame = cap.read()
                    if not ret:
                        time.sleep(0.01)
                        continue
                    cv2.putText(frame, "Preparing next question...", (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (200, 200, 100), 2)
                    cv2.imshow("AI Interview", frame)
                    if cv2.waitKey(1) & 0xFF == 27:
                        break

                # set current_q to generated or fallback; do NOT speak it here (prevents duplicate)
                if q_index == 1 or (2 <= q_index <= 1 + TECH_QUESTIONS - 1):
                    role_for_fallback = "Technical"
                else:
                    role_for_fallback = "HR"

                current_q = next_container[0] or (
                    "Tell me about a project from your resume." if role_for_fallback == "Technical"
                    else "Why do you want to join our company?"
                )

                # mark used (so gemini avoids repeats)
                if current_q not in used:
                    used.add(current_q)

            # ---------------- else: if last question, nothing to prepare ----------------
            else:
                pass

        # end for loop
    finally:
        cap.release()
        cv2.destroyAllWindows()

    print("\nInterview Completed.")
    # final thank-you can be blocking
    speak_blocking(f"Thank you for the interview, {candidate}.")

# ---------------- run ----------------
if __name__ == "__main__":
    run_interview()
