In [1]:
pip install sounddevice librosa joblib numpy

Collecting sounddevice
  Using cached sounddevice-0.5.2-py3-none-win_amd64.whl.metadata (1.6 kB)
Collecting librosa
  Using cached librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Using cached audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Using cached soundfile-0.13.1-py2.py3-none-win_amd64.whl.metadata (16 kB)
Collecting pooch>=1.1 (from librosa)
  Using cached pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Using cached soxr-1.0.0-cp312-abi3-win_amd64.whl.metadata (5.6 kB)
Collecting standard-sunau (from librosa)
  Using cached standard_sunau-3.13.0-py3-none-any.whl.metadata (914 bytes)
Using cached sounddevice-0.5.2-py3-none-win_amd64.whl (363 kB)
Using cached librosa-0.11.0-py3-none-any.whl (260 kB)
Using cached audioread-3.0.1-py3-none-any.whl (23 kB)
Using cached pooch-1.8.2-py3-none-any.whl (64 kB)
Using cached soundfile-0.13.1-py2.py3-none-win_a

In [2]:
pip install sounddevice librosa joblib soundfile numpy

Note: you may need to restart the kernel to use updated packages.


In [4]:
pip install sounddevice soundfile numpy librosa joblib scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [3]:
import joblib
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import os

model_path = "voice_liveness_model.pkl"
expected_features = 6960  # same as used in feature extraction

if not os.path.exists(model_path):
    # Create a dummy model with random data
    X_dummy = np.random.rand(10, expected_features)
    y_dummy = np.random.randint(0, 2, 10)  # 0 = Fake, 1 = Real
    model = RandomForestClassifier()
    model.fit(X_dummy, y_dummy)
    joblib.dump(model, model_path)
    print("‚úÖ Dummy voice model created.")
else:
    print("‚úÖ Model already exists.")


‚úÖ Dummy voice model created.


In [4]:
import sounddevice as sd
import soundfile as sf

duration = 2  # seconds
samplerate = 16000
temp_audio_file = "temp_voice.wav"

print("üé§ Speak now (recording for 2 seconds)...")
audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1)
sd.wait()
sf.write(temp_audio_file, audio, samplerate)


üé§ Speak now (recording for 2 seconds)...


In [5]:
import librosa

y, sr = librosa.load(temp_audio_file, sr=samplerate)
mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
features = mfcc.flatten()

# Pad or truncate to expected features
if len(features) < expected_features:
    features = np.pad(features, (0, expected_features - len(features)), mode='constant')
elif len(features) > expected_features:
    features = features[:expected_features]

features = features.reshape(1, -1)


In [6]:
model = joblib.load(model_path)

try:
    if hasattr(model, "predict_proba"):
        proba = model.predict_proba(features)[0]
        fake_score = proba[0] * 100
        real_score = proba[1] * 100
    else:
        pred = model.predict(features)[0]
        real_score = 100 if pred == 1 else 0
        fake_score = 100 - real_score

    label = "Real" if real_score > fake_score else "Fake"
    conf = max(real_score, fake_score)

    print(f"\nüîé Voice Detected: {label} ({conf:.2f}%)")

except Exception as e:
    print("‚ùå Error during prediction:", e)

# Clean up temporary audio file
import os
if os.path.exists(temp_audio_file):
    os.remove(temp_audio_file)



üîé Voice Detected: Fake (50.00%)


In [7]:
# ------------------ Imports ------------------
import os
import numpy as np
import sounddevice as sd
import soundfile as sf
import librosa
import joblib
from sklearn.ensemble import RandomForestClassifier

# ------------------ Parameters ------------------
model_path = "voice_liveness_model.pkl"
temp_audio_file = "temp_voice.wav"
duration = 2          # seconds to record
samplerate = 16000    # audio sample rate
expected_features = 6960  # number of features model expects

# ------------------ Create Dummy Model if Not Found ------------------
if not os.path.exists(model_path):
    print("‚¨áÔ∏è Creating dummy voice model...")
    X_dummy = np.random.rand(10, expected_features)
    y_dummy = np.random.randint(0, 2, 10)  # 0 = Fake, 1 = Real
    model = RandomForestClassifier()
    model.fit(X_dummy, y_dummy)
    joblib.dump(model, model_path)
    print("‚úÖ Dummy voice model created.")
else:
    print("‚úÖ Voice model already exists.")

# Load model
model = joblib.load(model_path)

# ------------------ Record Audio ------------------
print(f"\nüé§ Speak now (recording for {duration} seconds)...")
audio = sd.rec(int(duration * samplerate), samplerate=samplerate, channels=1)
sd.wait()
sf.write(temp_audio_file, audio, samplerate)

# ------------------ Extract Features ------------------
y, sr = librosa.load(temp_audio_file, sr=samplerate)
if len(y) < 0.3 * sr:
    print("‚ö†Ô∏è Voice too short. Try speaking louder or longer.")
    os.remove(temp_audio_file)
    exit()

mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
features = mfcc.flatten()

# Pad or truncate features
if len(features) < expected_features:
    features = np.pad(features, (0, expected_features - len(features)), mode='constant')
elif len(features) > expected_features:
    features = features[:expected_features]

features = features.reshape(1, -1)

# ------------------ Predict ------------------
try:
    if hasattr(model, "predict_proba"):
        proba = model.predict_proba(features)[0]
        fake_score = proba[0] * 100
        real_score = proba[1] * 100
    else:
        pred = model.predict(features)[0]
        real_score = 100 if pred == 1 else 0
        fake_score = 100 - real_score

    label = "Real" if real_score > fake_score else "Fake"
    conf = max(real_score, fake_score)
    print(f"\nüîé Voice Detected: {label} ({conf:.2f}%)")

except Exception as e:
    print("‚ùå Error during prediction:", e)

# ------------------ Clean Up ------------------
if os.path.exists(temp_audio_file):
    os.remove(temp_audio_file)


‚úÖ Voice model already exists.

üé§ Speak now (recording for 2 seconds)...

üîé Voice Detected: Real (53.00%)


In [8]:
import sounddevice as sd
import soundfile as sf
import numpy as np
import librosa
import joblib
import os
import time

model_path = "voice_liveness_model.pkl"
temp_audio_file = "temp_voice.wav"
record_duration = 2  # seconds of voice input
gap_between_checks = 5  # seconds between detections
samplerate = 16000
expected_features = 6960  # Model input size

if not os.path.exists(model_path):
    print("‚ùå Model file 'voice_liveness_model.pkl' not found.")
    exit()
model = joblib.load(model_path)
print("‚úÖ Voice model loaded.\n")

try:
    while True:
        print("üé§ Listening for voice (recording for 2 seconds)...")
        audio = sd.rec(int(record_duration * samplerate), samplerate=samplerate, channels=1)
        sd.wait()
        sf.write(temp_audio_file, audio, samplerate)

        y, sr = librosa.load(temp_audio_file, sr=samplerate)
        if len(y) < 0.3 * sr:
            print("‚ö†Ô∏è Voice too short or silent. Skipping...\n")
            time.sleep(gap_between_checks)
            continue

        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        features = mfcc.flatten()

        if len(features) < expected_features:
            features = np.pad(features, (0, expected_features - len(features)), mode='constant')
        elif len(features) > expected_features:
            features = features[:expected_features]

        features = features.reshape(1, -1)

        try:
            if hasattr(model, "predict_proba"):
                proba = model.predict_proba(features)[0]
                fake_score = proba[0] * 100
                real_score = proba[1] * 100
            else:
                pred = model.predict(features)[0]
                real_score = 100 if pred == 1 else 0
                fake_score = 100 - real_score

            label = "Real" if real_score > fake_score else "Fake"
            conf = max(real_score, fake_score)

            print(f"üîé Voice Detected: {label} ({conf:.2f}%)\n")

        except Exception as e:
            print("‚ùå Error during prediction:", e)

        if os.path.exists(temp_audio_file):
            os.remove(temp_audio_file)

        time.sleep(gap_between_checks)

except KeyboardInterrupt:
    print("\nüõë Detection stopped by user.")


‚úÖ Voice model loaded.

üé§ Listening for voice (recording for 2 seconds)...
üîé Voice Detected: Real (52.00%)

üé§ Listening for voice (recording for 2 seconds)...
üîé Voice Detected: Real (51.00%)

üé§ Listening for voice (recording for 2 seconds)...
üîé Voice Detected: Fake (50.00%)

üé§ Listening for voice (recording for 2 seconds)...
üîé Voice Detected: Real (52.00%)

üé§ Listening for voice (recording for 2 seconds)...
üîé Voice Detected: Real (52.00%)

üé§ Listening for voice (recording for 2 seconds)...
üîé Voice Detected: Fake (50.00%)

üé§ Listening for voice (recording for 2 seconds)...
üîé Voice Detected: Fake (50.00%)

üé§ Listening for voice (recording for 2 seconds)...
üîé Voice Detected: Real (51.00%)


üõë Detection stopped by user.


In [10]:
import cv2
import numpy as np
import time
import sounddevice as sd
import queue
import librosa
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from scipy.spatial import distance as dist
from joblib import load as joblib_load

# ------------------ Models ------------------
face_model = load_model("liveness_model.h5")  # face liveness model
voice_model = joblib_load("voice_liveness_model.pkl")  # voice liveness model

IMG_SIZE = 64
EAR_THRESHOLD = 0.21
CONSEC_FRAMES = 3
TEXTURE_THRESHOLD = 5.0
sr = 16000  # audio sample rate

q_audio = queue.Queue()
audio_buffer = []

# ------------------ Load OpenCV face & eye detectors ------------------
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")

blink_counter = 0
total_blinks = 0
start_time = time.time()

# ------------------ Audio recording callback ------------------
def audio_callback(indata, frames, time_info, status):
    if status:
        print("[Audio Callback Warning]", status)
    q_audio.put(indata.copy())

# ------------------ MFCC extraction ------------------
def extract_mfcc(audio, sr):
    try:
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        if mfcc.shape[1] < 174:
            mfcc = np.pad(mfcc, ((0, 0), (0, 174 - mfcc.shape[1])), mode='constant')
        return mfcc[:, :174].flatten().reshape(1, -1)
    except Exception as e:
        print("[MFCC Error]", e)
        return None

# ------------------ Eye Aspect Ratio ------------------
def eye_aspect_ratio(eye):
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    C = dist.euclidean(eye[0], eye[3])
    return (A + B) / (2.0 * C)

# ------------------ Texture calculation ------------------
def calculate_texture(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return np.std(gray)

# ------------------ Start video & audio ------------------
cap = cv2.VideoCapture(0)
stream = sd.InputStream(samplerate=sr, channels=1, callback=audio_callback)
stream.start()

print("[INFO] Press 'q' to quit...")

voice_label = "Waiting..."
voice_confidence = 0.0

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))
    
    face_label = "No Face"
    face_confidence = 0.0

    for (x, y, w, h) in faces:
        face_crop = frame[y:y+h, x:x+w]
        resized = cv2.resize(face_crop, (IMG_SIZE, IMG_SIZE))
        face_input = img_to_array(cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)) / 255.0
        face_input = np.expand_dims(face_input, axis=0)

        # ------------------ Face prediction ------------------
        try:
            prediction = face_model.predict(face_input, verbose=0)[0][0]
            texture = calculate_texture(face_crop)

            # ------------------ Eye detection ------------------
            eyes = eye_cascade.detectMultiScale(cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY))
            ear = 1.0  # default
            if len(eyes) >= 2:
                # pick first two eyes
                eye_pts = []
                for ex, ey, ew, eh in eyes[:2]:
                    eye_pts.append((ex, ey))
                    eye_pts.append((ex+ew, ey+eh))
                ear = eye_aspect_ratio(eye_pts)

            if prediction > 0.5 and ear < EAR_THRESHOLD and texture > TEXTURE_THRESHOLD:
                is_real = True
            else:
                is_real = prediction > 0.5 and texture > TEXTURE_THRESHOLD

            face_label = "Real" if is_real else "Fake"
            face_confidence = prediction * 100

        except Exception as e:
            print("[Face Error]", e)

        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0) if face_label=="Real" else (0,0,255), 2)

    # ------------------ Audio processing ------------------
    if not q_audio.empty():
        audio_chunk = q_audio.get().flatten()
        audio_buffer.extend(audio_chunk.tolist())

    if len(audio_buffer) >= sr:
        audio_np = np.array(audio_buffer[:sr])
        audio_buffer = audio_buffer[sr:]
        mfcc = extract_mfcc(audio_np, sr)
        if mfcc is not None:
            try:
                prediction = voice_model.predict(mfcc)[0]
                voice_label = "Real" if prediction == 1 else "Fake"
                voice_confidence = 100.0 if prediction == 1 else 0.0
            except Exception as e:
                voice_label = "Model Error"
                voice_confidence = 0.0
                print("[Voice Model Error]", e)

    # ------------------ Display ------------------
    cv2.putText(frame, f"Face: {face_label} ({face_confidence:.1f}%)", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0) if face_label=="Real" else (0,0,255), 2)
    cv2.putText(frame, f"Voice: {voice_label} ({voice_confidence:.1f}%)", (10, 60),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0) if voice_label=="Real" else (0,0,255), 2)

    cv2.imshow("Face & Voice Liveness Detection", frame)

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
stream.stop()
cv2.destroyAllWindows()




[INFO] Press 'q' to quit...
[Face Error] list index out of range
[Face Error] list index out of range
[Face Error] list index out of range
[Face Error] list index out of range
[Face Error] list index out of range
[Face Error] list index out of range
[Face Error] list index out of range
[Face Error] list index out of range
[Face Error] list index out of range
[Face Error] list index out of range
[Face Error] list index out of range
[Face Error] list index out of range


In [12]:
import cv2
import numpy as np
import time
import sounddevice as sd
import queue
import librosa
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from scipy.spatial import distance as dist
from joblib import load as joblib_load

# ---------------- Models ----------------
face_model = load_model("liveness_model.h5")
voice_model = joblib_load("voice_liveness_model.pkl")

IMG_SIZE = 64
EAR_THRESHOLD = 0.21
CONSEC_FRAMES = 3
TEXTURE_THRESHOLD = 5.0
BRIGHTNESS_THRESHOLD = 50.0
sr = 16000

q_audio = queue.Queue()
audio_buffer = []

# ---------------- OpenCV cascades ----------------
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")

blink_counter = 0
total_blinks = 0
start_time = time.time()

voice_label = "Waiting..."
voice_confidence = 0.0

# ---------------- Audio callback ----------------
def audio_callback(indata, frames, time_info, status):
    if status:
        print("[Audio Warning]", status)
    q_audio.put(indata.copy())

# ---------------- MFCC extraction ----------------
def extract_mfcc(audio, sr):
    try:
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        if mfcc.shape[1] < 174:
            mfcc = np.pad(mfcc, ((0, 0), (0, 174 - mfcc.shape[1])), mode='constant')
        return mfcc[:, :174].flatten().reshape(1, -1)
    except Exception as e:
        print("[MFCC Error]", e)
        return None

# ---------------- Eye Aspect Ratio ----------------
def eye_aspect_ratio(eye_pts):
    if len(eye_pts) < 6:
        return 1.0
    A = dist.euclidean(eye_pts[1], eye_pts[5])
    B = dist.euclidean(eye_pts[2], eye_pts[4])
    C = dist.euclidean(eye_pts[0], eye_pts[3])
    return (A + B) / (2.0 * C)

# ---------------- Texture, Blur, Brightness ----------------
def calculate_texture(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return np.std(gray)

def calculate_blur(image):
    return cv2.Laplacian(image, cv2.CV_64F).var()

def calculate_brightness(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    _, _, v = cv2.split(hsv)
    return np.mean(v)

# ---------------- Video & Audio ----------------
cap = cv2.VideoCapture(0)
stream = sd.InputStream(samplerate=sr, channels=1, callback=audio_callback)
stream.start()

print("[INFO] Press 'q' to quit...")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(60, 60))

    face_label = "No Face"
    face_confidence = 0.0
    blink_rate = 0
    blur_val = 0
    texture_val = 0
    brightness_val = 0

    for (x, y, w, h) in faces:
        face_crop = frame[y:y+h, x:x+w]

        # ------------- Face Liveness ----------------
        try:
            resized = cv2.resize(face_crop, (IMG_SIZE, IMG_SIZE))
            face_input = img_to_array(cv2.cvtColor(resized, cv2.COLOR_BGR2RGB)) / 255.0
            face_input = np.expand_dims(face_input, axis=0)
            prediction = face_model.predict(face_input, verbose=0)[0][0]

            texture_val = calculate_texture(face_crop)
            blur_val = calculate_blur(face_crop)
            brightness_val = calculate_brightness(face_crop)

            # ------------- Eye Blink Detection ------------
            eyes = eye_cascade.detectMultiScale(cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY))
            ear = 1.0
            if len(eyes) >= 2:
                eye_pts = []
                for ex, ey, ew, eh in eyes[:2]:
                    eye_pts.append((ex, ey))
                    eye_pts.append((ex+ew, ey+eh))
                ear = eye_aspect_ratio(eye_pts)

            if ear < EAR_THRESHOLD:
                blink_counter += 1
            else:
                if blink_counter >= CONSEC_FRAMES:
                    total_blinks += 1
                blink_counter = 0

            elapsed = time.time() - start_time
            blink_rate = total_blinks / (elapsed / 60) if elapsed > 0 else 0

            # ------------- Final Face Decision -------------
            is_real = prediction > 0.5 and blink_rate >= 1 and texture_val > TEXTURE_THRESHOLD
            face_label = "Real" if is_real else "Fake"
            face_confidence = prediction * 100

        except Exception as e:
            print("[Face Error]", e)

        cv2.rectangle(frame, (x, y), (x+w, y+h), (0, 255, 0) if face_label=="Real" else (0,0,255), 2)

    # ---------------- Audio Processing ----------------
    if not q_audio.empty():
        audio_chunk = q_audio.get().flatten()
        audio_buffer.extend(audio_chunk.tolist())

    if len(audio_buffer) >= sr:
        audio_np = np.array(audio_buffer[:sr])
        audio_buffer = audio_buffer[sr:]
        mfcc = extract_mfcc(audio_np, sr)
        if mfcc is not None:
            try:
                prediction = voice_model.predict(mfcc)[0]
                voice_label = "Real" if prediction == 1 else "Fake"
                voice_confidence = 100.0 if prediction == 1 else 0.0
            except Exception as e:
                voice_label = "Model Error"
                voice_confidence = 0.0
                print("[Voice Model Error]", e)

    # ---------------- Display Info ----------------
    cv2.putText(frame, f"Face: {face_label} ({face_confidence:.1f}%)", (10, 30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0) if face_label=="Real" else (0,0,255), 2)
    cv2.putText(frame, f"Voice: {voice_label} ({voice_confidence:.1f}%)", (10, 60),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0) if voice_label=="Real" else (0,0,255), 2)
    cv2.putText(frame, f"Blinks: {total_blinks}", (10, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 1)
    cv2.putText(frame, f"Blink Rate: {blink_rate:.2f}/min", (10, 110), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 1)
    cv2.putText(frame, f"Blur: {blur_val:.2f}", (10, 130), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 1)
    cv2.putText(frame, f"Texture: {texture_val:.2f}", (10, 150), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 1)
    cv2.putText(frame, f"Brightness: {brightness_val:.2f}", (10, 170), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,255), 1)

    cv2.imshow("Face & Voice Liveness Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
stream.stop()
cv2.destroyAllWindows()




[INFO] Press 'q' to quit...


In [2]:
import cv2
import numpy as np
import time
import sounddevice as sd
import queue
import librosa
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import img_to_array
from scipy.spatial import distance as dist
from joblib import load as joblib_load

# ---------------- Models ----------------
face_model = load_model("liveness_model.h5")
voice_model = joblib_load("voice_liveness_model.pkl")

IMG_SIZE = 64
EAR_THRESHOLD = 0.21
CONSEC_FRAMES = 3
TEXTURE_THRESHOLD = 5.0
VOICE_CONFIDENCE_THRESHOLD = 0.5  # for voice prediction
sr = 16000

q_audio = queue.Queue()
audio_buffer = []

# ---------------- OpenCV cascades ----------------
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_frontalface_default.xml")
eye_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + "haarcascade_eye.xml")

blink_counter = 0
total_blinks = 0
start_time = time.time()

voice_label = "Waiting..."
voice_confidence = 0.0

# ---------------- Audio callback ----------------
def audio_callback(indata, frames, time_info, status):
    if status:
        print("[Audio Warning]", status)
    q_audio.put(indata.copy())

# ---------------- MFCC extraction ----------------
def extract_mfcc(audio, sr):
    try:
        energy = np.sum(audio ** 2)
        if energy < 0.001:
            return None  # ignore silence
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40)
        if mfcc.shape[1] < 174:
            mfcc = np.pad(mfcc, ((0,0),(0,174-mfcc.shape[1])), mode='constant')
        return mfcc[:, :174].flatten().reshape(1,-1)
    except Exception as e:
        print("[MFCC Error]", e)
        return None

# ---------------- Eye Aspect Ratio ----------------
def eye_aspect_ratio(eye_pts):
    if len(eye_pts) < 6:
        return 1.0
    A = dist.euclidean(eye_pts[1], eye_pts[5])
    B = dist.euclidean(eye_pts[2], eye_pts[4])
    C = dist.euclidean(eye_pts[0], eye_pts[3])
    return (A + B) / (2.0 * C)

# ---------------- Texture, Blur, Brightness ----------------
def calculate_texture(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    return np.std(gray)

def calculate_blur(image):
    return cv2.Laplacian(image, cv2.CV_64F).var()

def calculate_brightness(image):
    hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    _, _, v = cv2.split(hsv)
    return np.mean(v)

# ---------------- Video & Audio ----------------
cap = cv2.VideoCapture(0)
stream = sd.InputStream(samplerate=sr, channels=1, callback=audio_callback)
stream.start()

print("[INFO] Press 'q' to quit...")

while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    faces = face_cascade.detectMultiScale(gray_frame, scaleFactor=1.1, minNeighbors=5, minSize=(60,60))

    face_label = "No Face"
    face_confidence = 0.0
    blink_rate = 0
    blur_val = 0
    texture_val = 0
    brightness_val = 0

    for (x, y, w, h) in faces:
        face_crop = frame[y:y+h, x:x+w]

        # ------------- Face Liveness ----------------
        try:
            resized = cv2.resize(face_crop, (IMG_SIZE, IMG_SIZE))
            face_input = img_to_array(cv2.cvtColor(resized, cv2.COLOR_BGR2RGB))/255.0
            face_input = np.expand_dims(face_input, axis=0)
            prediction = face_model.predict(face_input, verbose=0)[0][0]

            texture_val = calculate_texture(face_crop)
            blur_val = calculate_blur(face_crop)
            brightness_val = calculate_brightness(face_crop)

            # ------------- Eye Blink Detection ------------
            eyes = eye_cascade.detectMultiScale(cv2.cvtColor(face_crop, cv2.COLOR_BGR2GRAY))
            ear = 1.0
            if len(eyes) >= 2:
                eye_pts = []
                for ex, ey, ew, eh in eyes[:2]:
                    eye_pts.append((ex, ey))
                    eye_pts.append((ex+ew, ey+eh))
                ear = eye_aspect_ratio(eye_pts)

            if ear < EAR_THRESHOLD:
                blink_counter += 1
            else:
                if blink_counter >= CONSEC_FRAMES:
                    total_blinks += 1
                blink_counter = 0

            elapsed = time.time() - start_time
            blink_rate = total_blinks / (elapsed / 60) if elapsed > 0 else 0

            # ------------- Final Face Decision -------------
            is_real = prediction > 0.5 and blink_rate >= 1 and texture_val > TEXTURE_THRESHOLD
            face_label = "Real" if is_real else "Fake"
            face_confidence = prediction * 100

        except Exception as e:
            print("[Face Error]", e)

        cv2.rectangle(frame, (x,y), (x+w,y+h), (0,255,0) if face_label=="Real" else (0,0,255), 2)

    # ---------------- Audio Processing ----------------
    if not q_audio.empty():
        audio_chunk = q_audio.get().flatten()
        audio_buffer.extend(audio_chunk.tolist())

    if len(audio_buffer) >= sr:
        audio_np = np.array(audio_buffer[:sr])
        audio_buffer = audio_buffer[sr:]
        mfcc = extract_mfcc(audio_np, sr)
        if mfcc is not None:
            try:
                prediction = voice_model.predict_proba(mfcc)[0][1]  # probability of Real
                voice_label = "Real" if prediction > VOICE_CONFIDENCE_THRESHOLD else "Fake"
                voice_confidence = prediction * 100
            except Exception as e:
                voice_label = "Model Error"
                voice_confidence = 0.0
                print("[Voice Model Error]", e)
        else:
            voice_label = "Silent"
            voice_confidence = 0.0

    # ---------------- Display Info ----------------
    cv2.putText(frame, f"Face: {face_label} ({face_confidence:.1f}%)", (10,30),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0) if face_label=="Real" else (0,0,255), 2)
    cv2.putText(frame, f"Voice: {voice_label} ({voice_confidence:.1f}%)", (10,60),
                cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0,255,0) if voice_label=="Real" else (0,0,255), 2)
    cv2.putText(frame, f"Blinks: {total_blinks}", (10,90), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 1)
    cv2.putText(frame, f"Blink Rate: {blink_rate:.2f}/min", (10,110), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 1)
    cv2.putText(frame, f"Blur: {blur_val:.2f}", (10,130), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 1)
    cv2.putText(frame, f"Texture: {texture_val:.2f}", (10,150), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255,255,0), 1)
    cv2.putText(frame, f"Brightness: {brightness_val:.2f}", (10,170), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,255), 1)

    cv2.imshow("Face & Voice Liveness Detection", frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
stream.stop()
cv2.destroyAllWindows()




[INFO] Press 'q' to quit...
