In [1]:
import numpy as np

def calculate_smile_intensity(landmarks, image_width, image_height):
    left = landmarks[61]
    right = landmarks[291]
    top = landmarks[13]
    bottom = landmarks[14]

    def denorm(pt):
        return np.array([pt.x * image_width, pt.y * image_height])

    left_pt = denorm(left)
    right_pt = denorm(right)
    top_pt = denorm(top)
    bottom_pt = denorm(bottom)

    mouth_width = np.linalg.norm(right_pt - left_pt)
    mouth_height = np.linalg.norm(bottom_pt - top_pt)

    if mouth_width == 0:
        return 0.0
    
    intensity = (mouth_height / mouth_width) * 2.0
    return round(intensity)


In [2]:
import numpy as np

def check_eye_contact(landmarks, image_width, image_height, threshold=0.15):
    def denorm(pt):
        return np.array([pt.x * image_width, pt.y * image_height])
    left_eye_outer = denorm(landmarks[33])
    left_eye_inner = denorm(landmarks[133])
    left_iris = denorm(landmarks[468])

    right_eye_inner = denorm(landmarks[362])
    right_eye_outer = denorm(landmarks[263])
    right_iris = denorm(landmarks[473])

    left_eye_width = np.linalg.norm(left_eye_outer - left_eye_inner)
    right_eye_width = np.linalg.norm(right_eye_outer - right_eye_inner)

    left_eye_center = (left_eye_outer + left_eye_inner) / 2
    right_eye_center = (right_eye_outer + right_eye_inner) / 2

    left_offset = np.linalg.norm(left_iris - left_eye_center) / left_eye_width
    right_offset = np.linalg.norm(right_iris - right_eye_center) / right_eye_width

    avg_offset = (left_offset + right_offset) / 2
    
    return avg_offset < threshold, round(avg_offset, 3)


In [3]:
import numpy as np

def get_eye_gaze_direction(landmarks, image_width, image_height, side="left"):
    def denorm(pt):
        return np.array([pt.x * image_width, pt.y * image_height])

    if side == "left":
        outer = denorm(landmarks[33])
        inner = denorm(landmarks[133])
        iris = denorm(landmarks[468])
    else:
        outer = denorm(landmarks[263])
        inner = denorm(landmarks[362])
        iris = denorm(landmarks[473])

    eye_width = np.linalg.norm(outer - inner)
    if eye_width == 0:
        return "Unknown"

    eye_center = (outer + inner) / 2
    iris_offset = (iris - eye_center)[0]  

    normalized_offset = iris_offset / eye_width

    if normalized_offset < -0.12:
        return "Right"  
    elif normalized_offset > 0.12:
        return "Left"   
    else:
        return "Center"


In [4]:
import numpy as np

def detect_blink(landmarks, image_width, image_height, prev_state, threshold=0.22):
    def denorm(pt): return np.array([pt.x * image_width, pt.y * image_height])

    top = denorm(landmarks[159])
    bottom = denorm(landmarks[145])
    left = denorm(landmarks[33])
    right = denorm(landmarks[133])

    vertical = np.linalg.norm(top - bottom)
    horizontal = np.linalg.norm(left - right)

    if horizontal == 0:
        return False, prev_state

    ear = vertical / horizontal

    if ear < threshold and prev_state == "open":
        return True, "closed"
    elif ear >= threshold:
        return False, "open"
    else:
        return False, prev_state


In [5]:
def check_head_position(landmarks, w, h, threshold=10):
    left_eye_outer = np.array([landmarks[33].x * w, landmarks[33].y * h])
    right_eye_outer = np.array([landmarks[263].x * w, landmarks[263].y * h])

    dx = right_eye_outer[0] - left_eye_outer[0]
    dy = right_eye_outer[1] - left_eye_outer[1]
    angle = np.degrees(np.arctan2(dy, dx))  

    tilt_score = max(0, 1 - abs(angle) / threshold)
    tilt_score = round(min(tilt_score, 1.0), 2)

    return tilt_score, round(angle, 2)


In [6]:
def score_smile(smile_intensity):
    if smile_intensity > 0.4:
        return 1.0
    elif smile_intensity > 0.25:
        return 0.7
    elif smile_intensity > 0.15:
        return 0.4
    else:
        return 0.0

def score_eye_contact(eye_contact):
    return 1.0 if eye_contact else 0.0

def score_gaze(gaze_direction):
    if gaze_direction == "Center":
        return 1.0
    elif gaze_direction == "Uncertain":
        return 0.5
    else:
        return 0.0

def score_blink_rate(blink_rate):
    if 10 <= blink_rate <= 25:
        return 1.0
    elif 6 <= blink_rate <= 30:
        return 0.5
    else:
        return 0.0
def score_head_position(head_score):
    return head_score

In [7]:
def evaluate_confidence(smile_intensity, eye_contact, gaze_direction, blink_rate, head_pos):
    s = score_smile(smile_intensity)
    e = score_eye_contact(eye_contact)
    g = score_gaze(gaze_direction)
    b = score_blink_rate(blink_rate)
    h = score_head_position(head_pos)

    final_score = (0.22 * s) + (0.25 * e) + (0.20 * g) + (0.15 * b) + (0.18 * h)
    return round(final_score * 100)


In [8]:
'''def evaluate_confidence(smile_intensity, eye_contact, gaze_direction, blink_rate):
    s = score_smile(smile_intensity)
    e = score_eye_contact(eye_contact)
    g = score_gaze(gaze_direction)
    b = score_blink_rate(blink_rate)


    final_score = (0.25 * s) + (0.35 * e) + (0.25 * g) + (0.15 * b)
    return round(final_score * 100)
'''

'def evaluate_confidence(smile_intensity, eye_contact, gaze_direction, blink_rate):\n    s = score_smile(smile_intensity)\n    e = score_eye_contact(eye_contact)\n    g = score_gaze(gaze_direction)\n    b = score_blink_rate(blink_rate)\n\n\n    final_score = (0.25 * s) + (0.35 * e) + (0.25 * g) + (0.15 * b)\n    return round(final_score * 100)\n'

In [9]:
import cv2
import mediapipe as mp
import time

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True
)
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

blink_count = 0
blink_state = "open"
start_time = time.time()

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(frame_rgb)

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            h, w, _ = frame.shape
            landmarks = face_landmarks.landmark

            # Smile
            intensity = calculate_smile_intensity(landmarks, w, h)
            if intensity < 0.15:
                smile_label = "No Smile"
            elif intensity < 0.25:
                smile_label = "Light Smile"
            elif intensity < 0.4:
                smile_label = "Moderate Smile"
            else:
                smile_label = "Big Smile"

            cv2.putText(frame, f"Smile Intensity: {intensity} ({smile_label})",
                        (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)

            # Eye Contact
            eye_contact, offset = check_eye_contact(landmarks, w, h)
            eye_label = "Looking at Camera" if eye_contact else "Not Looking"
            eye_color = (0, 200, 0) if eye_contact else (0, 0, 200)

            cv2.putText(frame, f"Eye Contact: {eye_label} (Offset: {offset})",
                        (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, eye_color, 2)

            # Gaze Direction
            left_gaze = get_eye_gaze_direction(landmarks, w, h, side="left")
            right_gaze = get_eye_gaze_direction(landmarks, w, h, side="right")
            gaze_label = left_gaze if left_gaze == right_gaze else "Uncertain"
            gaze_score = 1.0 if gaze_label == "Center" else 0.5 if gaze_label != "Uncertain" else 0.0

            cv2.putText(frame, f"Gaze: {gaze_label}",
                        (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)

            # Blink Detection
            blink, blink_state = detect_blink(landmarks, w, h, blink_state)
            if blink:
                blink_count += 1

            elapsed_minutes = (time.time() - start_time) / 60
            blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0
            blink_score = 1.0 if 10 <= blink_rate <= 25 else 0.5 if blink_rate < 40 else 0.2  # Adjust ranges if needed

            cv2.putText(frame, f"Blinks: {blink_count}", (30, 120),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 100, 255), 2)
            cv2.putText(frame, f"Blink Rate: {blink_rate:.2f} blinks/min", (30, 150),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (150, 255, 150), 2)

            # Head Position
            score_head, head_angle = check_head_position(landmarks, w, h)

            cv2.putText(frame, f"Head Tilt Angle: {head_angle}°", (30, 240),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (100, 255, 255), 2)

            # Confidence Evaluation
            confidence_score = evaluate_confidence(
                smile_intensity=intensity,
                eye_contact=1.0 if eye_contact else 0.0,
                gaze_direction=gaze_score,
                blink_rate=blink_score,
                head_pos=score_head
            )

            # Label
            if confidence_score > 75:
                conf_label = "High Confidence"
            elif confidence_score > 50:
                conf_label = "Moderate Confidence"
            else:
                conf_label = "Low Confidence"

            cv2.putText(frame, f"Confidence: {confidence_score}%", (30, 180),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 200, 100), 2)
            cv2.putText(frame, f"Evaluation: {conf_label}", (30, 210),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 150), 2)

    cv2.imshow('Real-time Confidence Estimator', frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()


error: OpenCV(4.12.0) D:\a\opencv-python\opencv-python\opencv\modules\highgui\src\window.cpp:1301: error: (-2:Unspecified error) The function is not implemented. Rebuild the library with Windows, GTK+ 2.x or Cocoa support. If you are on Ubuntu or Debian, install libgtk2.0-dev and pkg-config, then re-run cmake or configure script in function 'cvShowImage'


In [None]:
import ffmpeg
import os

def extract_audio(video_path, audio_path):
    ffmpeg.input(video_path).output(audio_path, ac=1, ar='16000').run(overwrite_output=True)

In [None]:
import ffmpeg
import os

def extract_audio(video_path, audio_path):
    try:
        out, err = ffmpeg.input(video_path).output(audio_path, ac=1, ar='16000').run(overwrite_output=True, capture_stdout=True, capture_stderr=True)
        print('Audio extraction succeeded.')
        print('ffmpeg stdout:', out.decode('utf-8') if out else '')
        print('ffmpeg stderr:', err.decode('utf-8') if err else '')
    except ffmpeg.Error as e:
        print('Audio extraction failed:')
        print('ffmpeg stderr:', e.stderr.decode('utf-8') if e.stderr else str(e))

video_path = 'media\\video_sample.mp4'
audio_path = 'media\\extracted_audio\\audio_sample.wav'
extract_audio(video_path=video_path, audio_path=audio_path)

Audio extraction failed:
ffmpeg stderr: ffmpeg version 7.1.1-essentials_build-www.gyan.dev Copyright (c) 2000-2025 the FFmpeg developers
  built with gcc 14.2.0 (Rev1, Built by MSYS2 project)
  configuration: --enable-gpl --enable-version3 --enable-static --disable-w32threads --disable-autodetect --enable-fontconfig --enable-iconv --enable-gnutls --enable-libxml2 --enable-gmp --enable-bzlib --enable-lzma --enable-zlib --enable-libsrt --enable-libssh --enable-libzmq --enable-avisynth --enable-sdl2 --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxvid --enable-libaom --enable-libopenjpeg --enable-libvpx --enable-mediafoundation --enable-libass --enable-libfreetype --enable-libfribidi --enable-libharfbuzz --enable-libvidstab --enable-libvmaf --enable-libzimg --enable-amf --enable-cuda-llvm --enable-cuvid --enable-dxva2 --enable-d3d11va --enable-d3d12va --enable-ffnvcodec --enable-libvpl --enable-nvdec --enable-nvenc --enable-vaapi --enable-libgme --enable-libopenmpt --enable

In [None]:
import os

video_path = os.path.join(os.path.dirname(__file__), 'media', 'video_sample.mp4')
print(video_path)
print(os.path.isfile(video_path))

NameError: name '__file__' is not defined

In [None]:
import os
video_path = os.path.join(os.path.dirname(__file__), 'media','video_sample.mp4')
print(os.path.isfile(video_path))

NameError: name '__file__' is not defined

In [None]:
'''import cv2
import mediapipe as mp
import time

mp_face_mesh = mp.solutions.face_mesh
face_mesh = mp_face_mesh.FaceMesh(
    static_image_mode=False,
    max_num_faces=1,
    refine_landmarks=True
)
mp_drawing = mp.solutions.drawing_utils

cap = cv2.VideoCapture(0)

blink_count = 0
blink_state = "open"
start_time = time.time()

while cap.isOpened():
    success, frame = cap.read()
    if not success:
        break
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    results = face_mesh.process(frame_rgb)

    if results.multi_face_landmarks:
        for face_landmarks in results.multi_face_landmarks:
            h, w, _ = frame.shape
            landmarks = face_landmarks.landmark

            # Smile
            intensity = calculate_smile_intensity(landmarks, w, h)
            if intensity < 0.15:
                smile_label = "No Smile"
            elif intensity < 0.25:
                smile_label = "Light Smile"
            elif intensity < 0.4:
                smile_label = "Moderate Smile"
            else:
                smile_label = "Big Smile"

            cv2.putText(frame, f"Smile Intensity: {intensity} ({smile_label})",
                        (30, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 255), 2)

            # Eye Contact
            eye_contact, offset = check_eye_contact(landmarks, w, h)
            eye_label = "Looking at Camera" if eye_contact else "Not Looking"
            eye_color = (0, 200, 0) if eye_contact else (0, 0, 200)

            cv2.putText(frame, f"Eye Contact: {eye_label} (Offset: {offset})",
                        (30, 60), cv2.FONT_HERSHEY_SIMPLEX, 0.7, eye_color, 2)

            # Gaze Direction
            left_gaze = get_eye_gaze_direction(landmarks, w, h, side="left")
            right_gaze = get_eye_gaze_direction(landmarks, w, h, side="right")
            gaze_label = left_gaze if left_gaze == right_gaze else "Uncertain"

            cv2.putText(frame, f"Gaze: {gaze_label}",
                        (30, 90), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 0), 2)

            # Blink Detection
            blink, blink_state = detect_blink(landmarks, w, h, blink_state)
            if blink:
                blink_count += 1

            elapsed_minutes = (time.time() - start_time) / 60
            blink_rate = blink_count / elapsed_minutes if elapsed_minutes > 0 else 0

            cv2.putText(frame, f"Blinks: {blink_count}", (30, 120),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (200, 100, 255), 2)
            cv2.putText(frame, f"Blink Rate: {blink_rate:.2f} blinks/min", (30, 150),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (150, 255, 150), 2)

            # Confidence Evaluation
            confidence_score = evaluate_confidence(
                smile_intensity=intensity,
                eye_contact=eye_contact,
                gaze_direction=gaze_label,
                blink_rate=blink_rate
            )

            # Label
            if confidence_score > 75:
                conf_label = "High Confidence"
            elif confidence_score > 50:
                conf_label = "Moderate Confidence"
            else:
                conf_label = "Low Confidence"

            cv2.putText(frame, f"Confidence: {confidence_score}%", (30, 180),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 200, 100), 2)
            cv2.putText(frame, f"Evaluation: {conf_label}", (30, 210),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 255, 150), 2)


    cv2.imshow('Real-time Confidence Estimator', frame)
    if cv2.waitKey(1) & 0xFF == 27:
        break

cap.release()
cv2.destroyAllWindows()
'''

'import cv2\nimport mediapipe as mp\nimport time\n\nmp_face_mesh = mp.solutions.face_mesh\nface_mesh = mp_face_mesh.FaceMesh(\n    static_image_mode=False,\n    max_num_faces=1,\n    refine_landmarks=True\n)\nmp_drawing = mp.solutions.drawing_utils\n\ncap = cv2.VideoCapture(0)\n\nblink_count = 0\nblink_state = "open"\nstart_time = time.time()\n\nwhile cap.isOpened():\n    success, frame = cap.read()\n    if not success:\n        break\n    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)\n    results = face_mesh.process(frame_rgb)\n\n    if results.multi_face_landmarks:\n        for face_landmarks in results.multi_face_landmarks:\n            h, w, _ = frame.shape\n            landmarks = face_landmarks.landmark\n\n            # Smile\n            intensity = calculate_smile_intensity(landmarks, w, h)\n            if intensity < 0.15:\n                smile_label = "No Smile"\n            elif intensity < 0.25:\n                smile_label = "Light Smile"\n            elif intensity 