### Import Modules

In [1]:
!pip install cmake



In [2]:
!git clone https://github.com/davisking/dlib.git
%cd dlib
!mkdir build
%cd build
!cmake .. -DDLIB_USE_CUDA=0 -DUSE_AVX_INSTRUCTIONS=1
!cmake --build .
%cd ..
!python setup.py install --no DLIB_USE_CUDA

Cloning into 'dlib'...
remote: Enumerating objects: 52110, done.[K
remote: Counting objects: 100% (338/338), done.[K
remote: Compressing objects: 100% (220/220), done.[K
remote: Total 52110 (delta 237), reused 118 (delta 118), pack-reused 51772 (from 4)[K
Receiving objects: 100% (52110/52110), 49.11 MiB | 7.67 MiB/s, done.
Resolving deltas: 100% (36674/36674), done.
Updating files: 100% (2275/2275), done.
/content/dlib
/content/dlib/build
-- The C compiler identification is GNU 11.4.0
-- The CXX compiler identification is GNU 11.4.0
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Check for working C compiler: /usr/bin/cc - skipped
-- Detecting C compile features
-- Detecting C compile features - done
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Check for working CXX compiler: /usr/bin/c++ - skipped
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Using CMake version: 3.31.6
-- Compiling dlib

In [1]:
import os
import cv2
import dlib
import numpy as np
import pandas as pd
from tqdm import tqdm
from collections import defaultdict
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [4]:
!wget http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
!bzip2 -d shape_predictor_68_face_landmarks.dat.bz2

--2025-04-10 05:32:36--  http://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Resolving dlib.net (dlib.net)... 107.180.26.78
Connecting to dlib.net (dlib.net)|107.180.26.78|:80... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: https://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2 [following]
--2025-04-10 05:32:36--  https://dlib.net/files/shape_predictor_68_face_landmarks.dat.bz2
Connecting to dlib.net (dlib.net)|107.180.26.78|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 64040097 (61M)
Saving to: ‘shape_predictor_68_face_landmarks.dat.bz2’


2025-04-10 05:32:42 (13.1 MB/s) - ‘shape_predictor_68_face_landmarks.dat.bz2’ saved [64040097/64040097]



### Download NTHUDDD Dataset

In [None]:
from google.colab import files
files.upload()

In [6]:
!mkdir -p ~/.kaggle
!mv kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [7]:
!kaggle datasets download -d faisal7/nthuddd -p /content

Dataset URL: https://www.kaggle.com/datasets/faisal7/nthuddd
License(s): unknown


In [8]:
!unzip -o /content/nthuddd.zip -d /content/nthu_ddd

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/nthu_ddd/archive (7)/validation_data/notdrowsy/001_noglasses_nonsleepyCombination_1016_notdrowsy.jpg  
  inflating: /content/nthu_ddd/archive (7)/validation_data/notdrowsy/001_noglasses_nonsleepyCombination_1020_notdrowsy.jpg  
  inflating: /content/nthu_ddd/archive (7)/validation_data/notdrowsy/001_noglasses_nonsleepyCombination_1025_notdrowsy.jpg  
  inflating: /content/nthu_ddd/archive (7)/validation_data/notdrowsy/001_noglasses_nonsleepyCombination_102_notdrowsy.jpg  
  inflating: /content/nthu_ddd/archive (7)/validation_data/notdrowsy/001_noglasses_nonsleepyCombination_1034_notdrowsy.jpg  
  inflating: /content/nthu_ddd/archive (7)/validation_data/notdrowsy/001_noglasses_nonsleepyCombination_1039_notdrowsy.jpg  
  inflating: /content/nthu_ddd/archive (7)/validation_data/notdrowsy/001_noglasses_nonsleepyCombination_1043_notdrowsy.jpg  
  inflating: /content/nthu_ddd/archive (7)/validation_data/no

### Get Facial Landmarks

In [9]:
face_detector = dlib.get_frontal_face_detector()
predictor = dlib.shape_predictor("shape_predictor_68_face_landmarks.dat")

In [14]:
def get_video_id_from_filename(filename):
    return "_".join(filename.split("_")[:3])

In [11]:
EYE_AR_THRESH = 0.25

In [12]:
def eye_aspect_ratio(eye):
    A = np.linalg.norm(eye[1] - eye[5])
    B = np.linalg.norm(eye[2] - eye[4])
    C = np.linalg.norm(eye[0] - eye[3])
    return (A + B) / (2.0 * C)

In [17]:
def calculate_eyeline_tilt(left_eye, right_eye):
    left_point = left_eye[0]  # outer left
    right_point = right_eye[3]  # outer right
    delta_y = right_point[1] - left_point[1]
    delta_x = right_point[0] - left_point[0]
    angle = np.degrees(np.arctan2(delta_y, delta_x))
    return angle

In [20]:
def process_video_group(frames, label, video_id):
    closed_frames = 0
    total_frames = 0
    blink_count = 0
    current_eye_state = None
    streak = 0
    longest_streak = 0

    aol_values = []
    ov_values = []
    cv_values = []

    for frame_path in frames:
        frame = cv2.imread(frame_path)
        if frame is None:
            continue
        total_frames += 1

        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_detector(gray)
        if len(faces) == 0:
            continue

        shape = predictor(gray, faces[0])
        shape_np = np.array([[pt.x, pt.y] for pt in shape.parts()])
        left_eye = shape_np[42:48]
        right_eye = shape_np[36:42]
        both_eyes = np.vstack((left_eye, right_eye))
        mouth = shape_np[48:60]

        ear_left = eye_aspect_ratio(left_eye)
        ear_right = eye_aspect_ratio(right_eye)
        ear = (ear_left + ear_right) / 2.0

        # AOL = tilt of eye line
        aol_values.append(calculate_eyeline_tilt(left_eye, right_eye))

        if ear < EYE_AR_THRESH:
            closed_frames += 1
            streak += 1
            longest_streak = max(longest_streak, streak)
            if current_eye_state != "closed":
                blink_count += 1
            current_eye_state = "closed"
        else:
            streak = 0
            current_eye_state = "open"

        # OV (variance in mouth height) — using original landmark indices
        mouth_height = np.mean([
            np.linalg.norm(shape_np[62] - shape_np[66]),  # upper inner lip to lower inner lip
            np.linalg.norm(shape_np[61] - shape_np[67])   # upper outer lip to lower outer lip
        ])
        ov_values.append(mouth_height)

        # CV (variance in eye opening / ear)
        cv_values.append(ear)

    if total_frames == 0:
        return None

    return {
        "video_id": video_id,
        "label": label,
        "PERCLOS": closed_frames / total_frames,
        "MCD": longest_streak / total_frames,
        "AOL": np.mean(aol_values),
        "BF": blink_count / total_frames,
        "OV": np.var(ov_values),
        "CV": np.var(cv_values)
    }

Train Data

In [21]:
base_path = "/content/nthu_ddd/archive (7)"
all_metrics = []
for split in ["train_data"]:
    for class_dir in ["drowsy", "notdrowsy"]:
        folder_path = os.path.join(base_path, split, class_dir)
        if not os.path.exists(folder_path):
            continue

        label = 0 if "notdrowsy" in class_dir else 1

        frame_groups = defaultdict(list)

        for filename in os.listdir(folder_path):
            if not filename.lower().endswith((".jpg", ".png")):
                continue
            video_id = get_video_id_from_filename(filename)
            frame_path = os.path.join(folder_path, filename)
            frame_groups[video_id].append(frame_path)

        for video_id, frames in tqdm(frame_groups.items()):
            frames = sorted(frames)
            result = process_video_group(frames, label, video_id)
            if result:
                all_metrics.append(result)

100%|██████████| 21/21 [21:31<00:00, 61.51s/it]
100%|██████████| 27/27 [18:11<00:00, 40.42s/it]


In [22]:
df = pd.DataFrame(all_metrics)
display(df)

Unnamed: 0,video_id,label,PERCLOS,MCD,AOL,BF,OV,CV
0,005_noglasses_sleepyCombination,1,0.678155,0.040291,130.982294,0.12233,139.853549,0.001667
1,002_noglasses_yawning,1,0.147059,0.016043,170.164519,0.053476,272.327835,0.002724
2,002_noglasses_sleepyCombination,1,0.29669,0.044917,173.539088,0.073877,203.282355,0.001724
3,005_glasses_sleepyCombination,1,0.506508,0.020607,168.603227,0.12039,226.705724,0.001663
4,002_glasses_sleepyCombination,1,0.267967,0.010585,96.325605,0.11532,114.554525,0.0033
5,002_glasses_slowBlinkWithNodding,1,0.481961,0.032377,90.203287,0.120259,10.773063,0.00445
6,001_glasses_sleepyCombination,1,0.378073,0.024585,173.434991,0.113621,28.985384,0.001821
7,001_noglasses_sleepyCombination,1,0.269946,0.095981,133.603014,0.034793,45.890907,0.002734
8,006_glasses_sleepyCombination,1,0.034464,0.002223,5.51005,0.024458,107.271867,0.00126
9,005_noglasses_slowBlinkWithNodding,1,0.660802,0.045742,131.51281,0.114708,0.359151,0.001692


In [109]:
df.to_csv("drowsiness_metrics.csv", index=False)

In [110]:
!mkdir -p /content/files_for_download

!cp drowsiness_metrics.csv /content/files_for_download/

%ls /content/files_for_download

drowsiness_metrics.csv


Test Data

In [129]:
base_path = "/content/nthu_ddd/archive (7)"
all_metrics = []
for split in ["test_data"]:
    for class_dir in ["drowsy", "notdrowsy"]:
        folder_path = os.path.join(base_path, split, class_dir)
        if not os.path.exists(folder_path):
            continue

        label = 0 if "notdrowsy" in class_dir else 1

        frame_groups = defaultdict(list)

        for filename in os.listdir(folder_path):
            if not filename.lower().endswith((".jpg", ".png")):
                continue
            video_id = get_video_id_from_filename(filename)
            frame_path = os.path.join(folder_path, filename)
            frame_groups[video_id].append(frame_path)

        for video_id, frames in tqdm(frame_groups.items()):
            frames = sorted(frames)
            result = process_video_group(frames, label, video_id)
            if result:
                all_metrics.append(result)

100%|██████████| 21/21 [03:13<00:00,  9.23s/it]
100%|██████████| 27/27 [02:33<00:00,  5.69s/it]


In [130]:
test_df = pd.DataFrame(all_metrics)
display(test_df)

Unnamed: 0,video_id,label,PERCLOS,MCD,AOL,BF,OV,CV
0,005_glasses_slowBlinkWithNodding,1,0.52356,0.057592,149.648637,0.198953,0.340279,0.001267
1,002_noglasses_yawning,1,0.149533,0.028037,170.208165,0.084112,278.555022,0.002845
2,002_glasses_yawning,1,0.552147,0.067485,142.975911,0.220859,72.714156,0.003269
3,001_noglasses_sleepyCombination,1,0.239496,0.10084,133.281832,0.071429,45.372084,0.002785
4,001_glasses_sleepyCombination,1,0.376744,0.032558,173.770878,0.2,29.334455,0.001905
5,001_glasses_slowBlinkWithNodding,1,0.338843,0.057851,168.043708,0.181818,0.597211,0.001777
6,002_glasses_sleepyCombination,1,0.311284,0.046693,93.849447,0.132296,114.931269,0.003226
7,005_glasses_sleepyCombination,1,0.526515,0.079545,171.17909,0.151515,227.256945,0.001511
8,002_glasses_slowBlinkWithNodding,1,0.467532,0.071429,89.967436,0.181818,8.509878,0.004865
9,005_noglasses_slowBlinkWithNodding,1,0.64532,0.1133,124.025223,0.147783,0.188485,0.001817


In [133]:
test_df.to_csv("test_drowsiness_metrics.csv", index=False)

In [134]:
!mkdir -p /content/files_for_download

!cp test_drowsiness_metrics.csv /content/files_for_download/

%ls /content/files_for_download

drowsiness_metrics.csv  test_drowsiness_metrics.csv


### Classification using Fischer Discriminant Functions

In [113]:
df = pd.read_csv("/content/drowsiness_metrics.csv")

In [114]:
df.drop([24, 25, 29, 41, 42, 43, 44, 45, 46], inplace=True)

In [115]:
X = df[["PERCLOS", "MCD", "AOL", "BF", "OV", "CV"]]
y = df["label"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [116]:
lda = LinearDiscriminantAnalysis(solver='svd', store_covariance=True)
lda.fit(X_scaled, y)

In [117]:
# Class means and covariance
class_means = lda.means_
cov_inv = np.linalg.inv(lda.covariance_)

# Now compute coefficients for each class function: coef = mean @ cov_inv
class_coefs = class_means @ cov_inv.T

# Constant term for each function: -0.5 * mean @ cov_inv @ mean.T + log(prior)
constants = []
for i in range(len(class_means)):
    mean = class_means[i]
    const = -0.5 * mean @ cov_inv @ mean.T + np.log(lda.priors_[i])
    constants.append(const)

# Put into DataFrame
lda_coeffs = pd.DataFrame(class_coefs, columns=X.columns)
lda_coeffs["Constant"] = constants
lda_coeffs.index = [f"Function {i}" for i in range(len(class_means))]

print("Classification Function Coefficients:")
display(lda_coeffs)

Classification Function Coefficients:


Unnamed: 0,PERCLOS,MCD,AOL,BF,OV,CV,Constant
Function 0,-0.006073,-0.319034,0.159039,-0.098941,-0.781054,-0.055616,-1.01058
Function 1,0.005205,0.273458,-0.136319,0.084806,0.669475,0.047671,-0.793448


In [135]:
test_df = pd.read_csv("test_drowsiness_metrics.csv")

In [120]:
test_df.drop([24, 25, 29, 41, 42, 43, 44, 45, 46], inplace=True)

In [121]:
def compute_lda_scores(row, coeffs):
    score = 0
    for feature in ['PERCLOS', 'MCD', 'AOL', 'BF', 'OV', 'CV']:
        score += coeffs[feature] * row[feature]
    score += coeffs['Constant']
    return score

In [122]:
test_df['score_0'] = test_df.apply(lambda row: compute_lda_scores(row, lda_coeffs.iloc[0, :]), axis=1)
test_df['score_1'] = test_df.apply(lambda row: compute_lda_scores(row, lda_coeffs.iloc[1, :]), axis=1)

In [123]:
test_df['predicted_label'] = (test_df['score_1'] > test_df['score_0']).astype(int)

In [124]:
accuracy = accuracy_score(test_df['label'], test_df['predicted_label'])
print(f"Classification Accuracy: {accuracy * 100:.2f}%")

Classification Accuracy: 71.79%


### Classification using SVM

In [159]:
df = pd.read_csv("/content/drowsiness_metrics.csv")

In [160]:
test_df = pd.read_csv("test_drowsiness_metrics.csv")

In [161]:
# df.drop([24, 25, 29, 41, 42, 43, 44, 45, 46], inplace=True)
# test_df.drop([24, 25, 29, 41, 42, 43, 44, 45, 46], inplace=True)
# df.reset_index(drop=True, inplace=True)

In [162]:
X_train = df[["PERCLOS", "MCD", "AOL", "BF", "OV", "CV"]]
y_train = df["label"]
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

X_test = test_df[["PERCLOS", "MCD", "AOL", "BF", "OV", "CV"]]
y_test = test_df["label"]
X_test = scaler.fit_transform(X_test)

In [163]:
svm = SVC(kernel='linear', C=1.0, random_state=42)
svm.fit(X_train, y)

In [164]:
y_pred = svm.predict(X_test)

In [165]:
print(f"Accuracy: {accuracy_score(y, y_pred) * 100:.2f}%\n")
print("Classification Report:")
print(classification_report(y, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y, y_pred))

Accuracy: 77.08%

Classification Report:
              precision    recall  f1-score   support

           0       0.71      1.00      0.83        27
           1       1.00      0.48      0.65        21

    accuracy                           0.77        48
   macro avg       0.86      0.74      0.74        48
weighted avg       0.84      0.77      0.75        48

Confusion Matrix:
[[27  0]
 [11 10]]


### Classification using Decsion Tree

In [167]:
df = pd.read_csv("/content/drowsiness_metrics.csv")

In [168]:
test_df = pd.read_csv("test_drowsiness_metrics.csv")

In [169]:
X_train = df[["PERCLOS", "MCD", "AOL", "BF", "OV", "CV"]]
y_train = df["label"]
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

X_test = test_df[["PERCLOS", "MCD", "AOL", "BF", "OV", "CV"]]
y_test = test_df["label"]
X_test = scaler.fit_transform(X_test)

In [178]:
dt = DecisionTreeClassifier(random_state=42, max_depth=12)
dt.fit(X_train, y_train)

In [179]:
y_pred = dt.predict(X_test)

print(f"Accuracy: {accuracy_score(y, y_pred) * 100:.2f}%\n")
print("Classification Report:")
print(classification_report(y, y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y, y_pred))

Accuracy: 85.42%

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.89      0.87        27
           1       0.85      0.81      0.83        21

    accuracy                           0.85        48
   macro avg       0.85      0.85      0.85        48
weighted avg       0.85      0.85      0.85        48

Confusion Matrix:
[[24  3]
 [ 4 17]]
