In [1]:
import cv2
import dlib
from imutils import face_utils
from scipy.spatial import distance as dist
from scipy.spatial import ConvexHull
from deepface import DeepFace
import numpy as np




In [2]:
detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor('shape_predictor_68_face_landmarks.dat')


In [3]:
def eye_aspect_ratio(eye):
    A = dist.euclidean(eye[1], eye[5])
    B = dist.euclidean(eye[2], eye[4])
    C = dist.euclidean(eye[0], eye[3])
    return (A + B) / (2.0 * C)

In [4]:
def mouth_aspect_ratio(mouth):
    A = dist.euclidean(mouth[13], mouth[19])
    B = dist.euclidean(mouth[15], mouth[17])
    C = dist.euclidean(mouth[12], mouth[16])
    return (A + B) / (2.0 * C)

In [5]:
def calculate_area(landmarks):
    hull = ConvexHull(landmarks)
    return hull.volume



In [6]:
def eyebrow_distance(left_eyebrow, right_eyebrow):
    return dist.euclidean(left_eyebrow[-1], right_eyebrow[0])


In [7]:
def head_tilt_angle(shape):
    nose = shape[33]
    chin = shape[8]
    return nose[1] - chin[1]



In [8]:
def gaze_direction(eye):
    left_corner = eye[0]
    right_corner = eye[3]
    top_center = (eye[1] + eye[2]) / 2
    bottom_center = (eye[4] + eye[5]) / 2

    horizontal_ratio = (right_corner[0] - left_corner[0]) / (bottom_center[1] - top_center[1])
    return horizontal_ratio



In [9]:
def extract_emotion(face_roi):
    try:
        result = DeepFace.analyze(face_roi, actions=['emotion'], enforce_detection=False)
        dominant_emotion = result['dominant_emotion']
        emotion_confidence = result['emotion'][dominant_emotion]
        return dominant_emotion, emotion_confidence
    except:
        return "Neutral", 0.0

In [10]:
import pandas as pd

def extract_features_from_video(video_path, label, fps=30):
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_interval = max(1, frame_count // 10)  # Sample 10 frames per video
    features = []

    for i in range(0, frame_count, frame_interval):
        cap.set(cv2.CAP_PROP_POS_FRAMES, i)
        ret, frame = cap.read()
        if not ret:
            break

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = detector(gray)

        for face in faces:
            shape = predictor(gray, face)
            shape = face_utils.shape_to_np(shape)

            # Extract landmarks
            left_eye = shape[36:42]
            right_eye = shape[42:48]
            mouth = shape[48:68]
            left_eyebrow = shape[17:22]
            right_eyebrow = shape[22:27]

            # Compute features
            ear = (eye_aspect_ratio(left_eye) + eye_aspect_ratio(right_eye)) / 2.0
            mar = mouth_aspect_ratio(mouth)
            head_tilt = head_tilt_angle(shape)
            mouth_area = calculate_area(mouth)
            eye_area = calculate_area(left_eye) + calculate_area(right_eye)
            eyebrow_dist = eyebrow_distance(left_eyebrow, right_eyebrow)
            gaze = gaze_direction(left_eye)
            
            # Emotion Detection
            (x, y, w, h) = face_utils.rect_to_bb(face)
            face_roi = frame[y:y + h, x:x + w]
            emotion, emotion_conf = extract_emotion(face_roi)

            # Append features
            features.append([
                ear, mar, head_tilt, mouth_area, eye_area, 
                eyebrow_dist, gaze, emotion, emotion_conf, label
            ])
    cap.release()
    return features


In [None]:
import pandas as pd

# Load dataset
dataset = pd.read_csv('DAiSEE/Labels/TrainLabels.csv')
dataset['ClipID'] = dataset['ClipID'].astype(str)

# Find the minimum number of videos per engagement group
min_videos_per_group = dataset['Engagement'].value_counts().min()
print(min_videos_per_group)
# Sample an equal number of videos from each group
balanced_videos = []
for engagement_level in dataset['Engagement'].unique():
    group_videos = dataset[dataset['Engagement'] == engagement_level]
    sampled_videos = group_videos.sample(min_videos_per_group, random_state=42)
    balanced_videos.append(sampled_videos)

# Concatenate the sampled videos into a single DataFrame
balanced_dataset = pd.concat(balanced_videos)

# Extract features for the balanced dataset
all_features = []
for index, row in balanced_dataset.iterrows():
    video_name = row['ClipID'].replace('.avi', '')
    label = row['Engagement']
    folder_1 = video_name[:6]
    folder_2 = video_name[:10]
    video_path = f"DAiSEE/DataSet/Train/{folder_1}/{folder_2}/{video_name}.avi"

    try:
        # Call the feature extraction function
        video_features = extract_features_from_video(video_path, label)
        all_features.extend(video_features)
    except Exception as e:
        print(f"Error processing {video_name}: {e}")

# Save extracted features
columns = [
    'EAR', 'MAR', 'Head_Tilt', 'Mouth_Area', 'Eye_Area', 
    'Eyebrow_Dist', 'Gaze', 'Emotion', 'Emotion_Conf', 'Engagement'
]
features_df = pd.DataFrame(all_features, columns=columns)
features_df.to_csv('balanced_extracted_features_v1.csv', index=False)
print("Balanced feature extraction complete and saved to 'balanced_extracted_features.csv'.")



34
Balanced feature extraction complete and saved to 'balanced_extracted_features.csv'.


In [12]:
# Load dataset
dataset = pd.read_csv('DAiSEE/Labels/TrainLabels.csv')
dataset['ClipID'] = dataset['ClipID'].astype(str)

# Limit the number of videos per class to 34
videos_per_class = 34
balanced_videos = []
for engagement_level in dataset['Engagement'].unique():
    group_videos = dataset[dataset['Engagement'] == engagement_level]
    sampled_videos = group_videos.head(videos_per_class)  # Use head() instead of sample() if the dataset is small
    balanced_videos.append(sampled_videos)

# Concatenate the sampled videos into a single DataFrame
balanced_dataset = pd.concat(balanced_videos)

# Feature extraction with increased frame sampling
all_features = []
for index, row in balanced_dataset.iterrows():
    video_name = row['ClipID'].replace('.avi', '')
    label = row['Engagement']
    folder_1 = video_name[:6]
    folder_2 = video_name[:10]
    video_path = f"DAiSEE/DataSet/Train/{folder_1}/{folder_2}/{video_name}.avi"

    try:
        cap = cv2.VideoCapture(video_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        frame_interval = max(1, frame_count // 88)  # Extract ~88 frames per video
        
        for i in range(0, frame_count, frame_interval):
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if not ret:
                break
            
            gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            faces = detector(gray)

            for face in faces:
                shape = predictor(gray, face)
                shape = face_utils.shape_to_np(shape)

                left_eye = shape[36:42]
                right_eye = shape[42:48]
                mouth = shape[48:68]
                left_eyebrow = shape[17:22]
                right_eyebrow = shape[22:27]

                # Compute features
                ear = (eye_aspect_ratio(left_eye) + eye_aspect_ratio(right_eye)) / 2.0
                mar = mouth_aspect_ratio(mouth)
                head_tilt = shape[33][1] - shape[8][1]
                mouth_area = calculate_area(mouth)
                eye_area = calculate_area(left_eye) + calculate_area(right_eye)
                eyebrow_dist = eyebrow_distance(left_eyebrow, right_eyebrow)

                # Append features
                all_features.append([ear, mar, head_tilt, mouth_area, eye_area, eyebrow_dist, label])

        cap.release()
    except Exception as e:
        print(f"Error processing {video_name}: {e}")

# Save features
columns = ['EAR', 'MAR', 'Head_Tilt', 'Mouth_Area', 'Eye_Area', 'Eyebrow_Dist', 'Engagement']
features_df = pd.DataFrame(all_features, columns=columns)
features_df.to_csv('balanced_extracted_features_12000.csv', index=False)
print("Enhanced balanced feature extraction complete and saved to 'balanced_extracted_features_12000.csv'.")


Enhanced balanced feature extraction complete and saved to 'balanced_extracted_features_12000.csv'.
