In [None]:
import cv2
import os
import numpy as np
import sounddevice as sd
import scipy.io.wavfile as wav
from keras.models import load_model
from keras.preprocessing.image import img_to_array
from datetime import datetime
import matplotlib.pyplot as plt
import speech_recognition as sr
import librosa
import librosa.display
from fpdf import FPDF

# Load your actual model (grayscale)
model = load_model(r"C:\Users\sagni\Downloads\Emotion Aware Virtual Interviewer\model.h5")
emotion_labels = ['Angry', 'Disgust', 'Fear', 'Happy', 'Sad', 'Surprise', 'Neutral']

# Output directory
save_dir = r"C:\Users\sagni\Downloads\Emotion Aware Virtual Interviewer"
os.makedirs(save_dir, exist_ok=True)

# Interview questions
questions = [
    "Tell me about yourself.",
    "What are your strengths?",
    "Describe a challenge you've overcome.",
    "Where do you see yourself in five years?",
    "Why should we hire you?"
]

# Preprocess face (grayscale version)
def preprocess_face_gray(face):
    face = cv2.resize(face, (48, 48))
    face = cv2.cvtColor(face, cv2.COLOR_BGR2GRAY)
    face = img_to_array(face)
    face = face.astype("float") / 255.0
    face = np.expand_dims(face, axis=0)
    face = np.expand_dims(face, axis=-1)
    return face

# Voice tone features
def extract_voice_features(filename):
    y, sr = librosa.load(filename)
    zcr = np.mean(librosa.feature.zero_crossing_rate(y).T)
    cent = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr).T)
    rolloff = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr).T)
    return {'ZCR': round(zcr, 4), 'Centroid': round(cent, 2), 'Roll-off': round(rolloff, 2)}

# PDF setup
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.cell(200, 10, txt="Emotion-Aware Virtual Interview Report", ln=True, align='C')
pdf.ln(10)

cap = cv2.VideoCapture(0)
rec = sr.Recognizer()

for idx, question in enumerate(questions):
    print(f"\nQuestion {idx+1}: {question}")
    pdf.set_font("Arial", style='B', size=12)
    pdf.multi_cell(0, 10, f"Q{idx+1}: {question}")
    
    print("Recording your voice and detecting facial emotion... Start speaking...")

    # Record audio
    duration = 7
    fs = 44100
    audio = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    
    emotions = []
    for _ in range(int(duration * 5)):
        ret, frame = cap.read()
        if not ret:
            continue

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
        faces = face_cascade.detectMultiScale(gray, 1.3, 5)

        for (x, y, w, h) in faces:
            face_img = frame[y:y+h, x:x+w]
            processed = preprocess_face_gray(face_img)
            pred = model.predict(processed, verbose=0)
            emotion = emotion_labels[np.argmax(pred)]
            emotions.append(emotion)

        cv2.imshow("Interview Feed", frame)
        if cv2.waitKey(50) & 0xFF == ord('q'):
            break

    sd.wait()
    audio_path = os.path.join(save_dir, f"audio_q{idx+1}.wav")
    wav.write(audio_path, fs, audio)

    # Transcribe
    try:
        with sr.AudioFile(audio_path) as source:
            audio_data = rec.record(source)
            text = rec.recognize_google(audio_data)
    except:
        text = "Could not transcribe."

    # Voice tone
    features = extract_voice_features(audio_path)

    # Most frequent emotion
    final_emotion = max(set(emotions), key=emotions.count) if emotions else "Unknown"

    # Add to PDF
    pdf.set_font("Arial", size=12)
    pdf.multi_cell(0, 10, f"Response: {text}")
    pdf.cell(0, 10, f"Detected Emotion: {final_emotion}", ln=True)
    pdf.cell(0, 10, f"Voice Tone - ZCR: {features['ZCR']}, Centroid: {features['Centroid']}, Roll-off: {features['Roll-off']}", ln=True)
    pdf.ln(5)

cap.release()
cv2.destroyAllWindows()

# Save PDF
report_path = os.path.join(save_dir, "interview_report.pdf")
pdf.output(report_path)
print(f"\n✅ Interview completed. Report saved at:\n{report_path}")





Question 1: Tell me about yourself.
Recording your voice and detecting facial emotion... Start speaking...

Question 2: What are your strengths?
Recording your voice and detecting facial emotion... Start speaking...

Question 3: Describe a challenge you've overcome.
Recording your voice and detecting facial emotion... Start speaking...
