In [1]:
import numpy as np
from tqdm import tqdm

---

# Feature Processing

In [2]:
def calculate_perclos(ear_feats, threshold=0.2):
    if len(ear_feats) == 0:
        return 0.0
    closed = np.sum(np.array(ear_feats) < threshold)
    return closed / len(ear_feats)

    
def calculate_blink_stats(ear_feats, threshold=0.2, min_duration=2):
    blinks = []
    count = 0
    in_blink = False

    for val in ear_feats:
        if val < threshold:
            count += 1
            in_blink = True
        elif in_blink:
            blinks.append(count)
            count = 0
            in_blink = False

    if in_blink:
        blinks.append(count)

    blink_freq = len(blinks)
    blink_dur = np.mean(blinks) if blinks else 0.0
    return blink_freq, blink_dur


def calculate_yawn_stats(mar_feats, threshold=0.5, min_duration=3):
    yawns = []
    count = 0
    in_yawn = False

    for val in mar_feats:
        if val > threshold:
            count += 1
            in_yawn = True
        elif in_yawn:
            yawns.append(count)
            count = 0
            in_yawn = False

    if in_yawn:
        yawns.append(count)

    yawn_freq = len(yawns)
    yawn_dur = np.mean(yawns) if yawns else 0.0
    return yawn_freq, yawn_dur


def calculate_ear_deltas(ear_feats):
    return np.mean(np.abs(np.diff(ear_feats))) if len(ear_feats) > 1 else 0.0
    
def calculate_mar_deltas(mar_feats):
    return np.mean(np.abs(np.diff(mar_feats))) if len(mar_feats) > 1 else 0.0
    
def calculate_pose_deltas(pitch_feats, yaw_feats, roll_feats):
    d_pitch = np.mean(np.abs(np.diff(pitch_feats))) if len(pitch_feats) > 1 else 0.0
    d_yaw = np.mean(np.abs(np.diff(yaw_feats))) if len(yaw_feats) > 1 else 0.0
    d_roll = np.mean(np.abs(np.diff(roll_feats))) if len(roll_feats) > 1 else 0.0
    return d_pitch, d_yaw, d_roll

def interpolate_nan(arr):
    n = len(arr)
    idx = np.arange(n)
    valid = ~np.isnan(arr)
    if valid.sum() == 0 or valid.sum() == n:
        return arr
    interp = np.copy(arr)
    interp[np.isnan(interp)] = np.interp(idx[np.isnan(interp)], idx[valid], arr[valid])
    return interp

def fill_missing(arr, global_means):
    temp = arr.copy()
    temp = np.array(temp)
    nan = np.isnan(temp)
    temp[nan] = global_means[nan]
    return temp

def add_feature_deltas(window_features):
    remove_idx = [i for i in range(10, len(window_features[0]))]
    w_features = np.delete(window_features, remove_idx, axis=1)
    # get speed
    deltas = np.diff(w_features, axis=0)
    deltas = np.vstack(([np.zeros(w_features.shape[1])], deltas))
    # get acceleration
    delta2 = np.diff(deltas, axis=0)
    delta2 = np.vstack(([np.zeros(deltas.shape[1])], delta2))
    combined = np.hstack((window_features, deltas, delta2))
    return combined

In [3]:
def process_window_features(features):
    ear_feats = features[0]
    mar_feats = features[1]
    pitch_feats = features[2]
    yaw_feats = features[3]
    roll_feats = features[4]
    yolo_feats = features[5:]

    valid_ear  = float((~np.isnan(ear_feats)).sum())  / max(1, len(ear_feats))
    valid_mar  = float((~np.isnan(mar_feats)).sum())  / max(1, len(mar_feats))
    valid_hpe = float((~np.isnan(pitch_feats)).sum()) / max(1, len(pitch_feats))
    valid_yolo = float((~np.isnan(yolo_feats[0])).sum()) / max(1, len(yolo_feats[0]))
    confidence = (valid_ear + valid_mar + valid_hpe + valid_yolo) / 4

    ear_interp = interpolate_nan(ear_feats)
    mar_interp = interpolate_nan(mar_feats)
    pitch_interp = interpolate_nan(pitch_feats)
    yaw_interp = interpolate_nan(yaw_feats)
    roll_interp = interpolate_nan(roll_feats)
    yolo_interp = [interpolate_nan(yol) for yol in yolo_feats]
        

    if np.isnan(ear_interp).all():
        ear_mean, ear_std, blink_freq, blink_dur, perclos, d_ear = np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
    else:
        ear_mean, ear_std = np.mean(ear_interp), np.std(ear_interp)
        blink_freq, blink_dur = calculate_blink_stats(ear_interp)
        perclos = calculate_perclos(ear_interp)
        d_ear = calculate_ear_deltas(ear_interp)

    if np.isnan(mar_interp).all():
        mar_mean, mar_std, yawn_freq, yawn_dur, d_mar = np.nan, np.nan, np.nan, np.nan, np.nan
    else:
        mar_mean, mar_std = np.mean(mar_interp), np.std(mar_interp)
        yawn_freq, yawn_dur = calculate_yawn_stats(mar_interp)
        d_mar = calculate_mar_deltas(mar_interp)

    if np.isnan(pitch_interp).all() or np.isnan(yaw_interp).all() or np.isnan(roll_interp).all():
        pitch_mean, pitch_std, yaw_mean, yaw_std, roll_mean, roll_std, d_pitch, d_yaw, d_roll = np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
    else:
        pitch_mean, pitch_std = np.mean(pitch_interp), np.std(pitch_interp)
        yaw_mean, yaw_std = np.mean(yaw_interp), np.std(yaw_interp)
        roll_mean, roll_std = np.mean(roll_interp), np.std(roll_interp)
        d_pitch, d_yaw, d_roll = calculate_pose_deltas(pitch_interp, yaw_interp, roll_interp)

    yolo_features = []
    for yol in yolo_interp:
        if np.isnan(yol).all():
            yolo_features.append(np.nan)
        else:
            yolo_features.append(np.mean(yol))
            

    window_features = [
        ear_mean, ear_std, 
        mar_mean, mar_std, 
        pitch_mean, pitch_std, 
        yaw_mean, yaw_std, 
        roll_mean, roll_std, 
        blink_freq, blink_dur, 
        yawn_freq, yawn_dur, 
        perclos, 
        d_ear, d_mar, d_pitch, d_yaw, d_roll, 
        confidence]
    window_features.extend(yolo_features)
    return window_features

In [4]:
def get_window_features(features, window_size, stride):
    window_features = []
    for i in range(0, len(features) - window_size + 1, stride):
        temp = features[i:i+window_size]
        window_features.append(process_window_features(np.transpose(temp)))
    
    global_means = np.nanmean(window_features, axis=0)
    for l, feats in enumerate(window_features):
        if np.isnan(feats).any():
            window_features[l] = fill_missing(feats, global_means)

    return window_features

## 10 seconds window (300 frames, 150 stride)/3 => (100 frames, 50 stride)

In [8]:
for i in range(5):
    fold_features = []
    features_lengths = []
    data = np.load(f"/kaggle/input/uta-rldd-feature-vector-npz/fold_{i+1}.npz", allow_pickle=True)
    for idx in tqdm(range(len(data["features"])), desc=f"{i+1}/5"):
        temp_feats = np.array(data["features"][idx])
        temp_yolo_feats = []
        for j, feat in enumerate(data["yolo_features"][idx]):
            if np.isnan(feat).any():
                temp_yolo_feats.append(np.array(feat))
            else:
                temp_yolo_feats.append(np.array(feat).squeeze())
        temp_yolo_feats = np.array(temp_yolo_feats)
        feats = np.array([np.concatenate((temp_feats, temp_yolo_feats), axis=1)]).squeeze()
        feats = [f for k, f in enumerate(feats) if k%3==0] # every 3 frame

        feats = get_window_features(feats, 100, 50) # frame/3
        feats = add_feature_deltas(np.array(feats))
        fold_features.append(feats)
        features_lengths.append(len(feats))
    fold_features = np.array(fold_features, dtype="object")
    features_lengths = np.array(features_lengths, dtype="object")
    np.savez(f"/kaggle/working/10s_window/combined_fold_{i+1}.npz", features=fold_features, labels=data["labels"], lengths=features_lengths)

1/5: 100%|██████████| 36/36 [07:09<00:00, 11.93s/it]
2/5: 100%|██████████| 36/36 [07:03<00:00, 11.77s/it]
3/5: 100%|██████████| 36/36 [07:11<00:00, 11.98s/it]
4/5: 100%|██████████| 36/36 [06:59<00:00, 11.64s/it]
5/5: 100%|██████████| 36/36 [08:00<00:00, 13.36s/it]


## 5 seconds window (150 frames, 75 stride)/3 => (50 frames, 25 stride)

In [19]:
for i in range(5):
    fold_features = []
    features_lengths = []
    data = np.load(f"/kaggle/input/uta-rldd-feature-vector-npz/fold_{i+1}.npz", allow_pickle=True)
    for idx in tqdm(range(len(data["features"])), desc=f"{i+1}/5"):
        temp_feats = np.array(data["features"][idx])
        temp_yolo_feats = []
        for j, feat in enumerate(data["yolo_features"][idx]):
            if np.isnan(feat).any():
                temp_yolo_feats.append(np.array(feat))
            else:
                temp_yolo_feats.append(np.array(feat).squeeze())
        temp_yolo_feats = np.array(temp_yolo_feats)
        feats = np.array([np.concatenate((temp_feats, temp_yolo_feats), axis=1)]).squeeze()
        feats = [f for k, f in enumerate(feats) if k%3==0] # every 3 frame

        feats = get_window_features(feats, 50, 25) # frame/3
        feats = add_feature_deltas(np.array(feats))
        fold_features.append(feats)
        features_lengths.append(len(feats))
    fold_features = np.array(fold_features, dtype="object")
    features_lengths = np.array(features_lengths, dtype="object")
    np.savez(f"/kaggle/working/5s_window/combined_fold_{i+1}.npz", features=fold_features, labels=data["labels"], lengths=features_lengths)

1/5: 100%|██████████| 36/36 [07:21<00:00, 12.28s/it]
2/5: 100%|██████████| 36/36 [07:16<00:00, 12.12s/it]
3/5: 100%|██████████| 36/36 [07:20<00:00, 12.23s/it]
4/5: 100%|██████████| 36/36 [07:12<00:00, 12.02s/it]
5/5: 100%|██████████| 36/36 [08:15<00:00, 13.77s/it]


## 1 second window (30 frames, 15 stride)/3 => (10 frames, 5 stride)

In [20]:
for i in range(5):
    fold_features = []
    features_lengths = []
    data = np.load(f"/kaggle/input/uta-rldd-feature-vector-npz/fold_{i+1}.npz", allow_pickle=True)
    for idx in tqdm(range(len(data["features"])), desc=f"{i+1}/5"):
        temp_feats = np.array(data["features"][idx])
        temp_yolo_feats = []
        for j, feat in enumerate(data["yolo_features"][idx]):
            if np.isnan(feat).any():
                temp_yolo_feats.append(np.array(feat))
            else:
                temp_yolo_feats.append(np.array(feat).squeeze())
        temp_yolo_feats = np.array(temp_yolo_feats)
        feats = np.array([np.concatenate((temp_feats, temp_yolo_feats), axis=1)]).squeeze()
        feats = [f for k, f in enumerate(feats) if k%3==0] # every 3 frame

        feats = get_window_features(feats, 10, 5) # frame/3
        feats = add_feature_deltas(np.array(feats))
        fold_features.append(feats)
        features_lengths.append(len(feats))
    fold_features = np.array(fold_features, dtype="object")
    features_lengths = np.array(features_lengths, dtype="object")
    np.savez(f"/kaggle/working/1s_window/combined_fold_{i+1}.npz", features=fold_features, labels=data["labels"], lengths=features_lengths)

1/5: 100%|██████████| 36/36 [09:40<00:00, 16.13s/it]
2/5: 100%|██████████| 36/36 [09:27<00:00, 15.78s/it]
3/5: 100%|██████████| 36/36 [09:38<00:00, 16.06s/it]
4/5: 100%|██████████| 36/36 [09:21<00:00, 15.59s/it]
5/5: 100%|██████████| 36/36 [10:55<00:00, 18.20s/it]


## frames (frame_count/3)

In [21]:
for i in range(5):
    fold_features = []
    features_lengths = []
    data = np.load(f"/kaggle/input/uta-rldd-feature-vector-npz/fold_{i+1}.npz", allow_pickle=True)
    for idx in tqdm(range(len(data["features"])), desc=f"{i+1}/5"):
        temp_feats = np.array(data["features"][idx])
        temp_yolo_feats = []
        for j, feat in enumerate(data["yolo_features"][idx]):
            if np.isnan(feat).any():
                temp_yolo_feats.append(np.array(feat))
            else:
                temp_yolo_feats.append(np.array(feat).squeeze())
        temp_yolo_feats = np.array(temp_yolo_feats)
        feats = np.array([np.concatenate((temp_feats, temp_yolo_feats), axis=1)]).squeeze()
        feats = [f for k, f in enumerate(feats) if k%3==0] # every 3 frame

        global_means = np.nanmean(feats, axis=0)
        for l, feat in enumerate(feats):
            if np.isnan(feat).any():
                feats[l] = fill_missing(feat, global_means)
        
        fold_features.append(feats)
        features_lengths.append(len(feats))
    fold_features = np.array(fold_features, dtype="object")
    features_lengths = np.array(features_lengths, dtype="object")
    np.savez(f"/kaggle/working/frames/combined_fold_{i+1}.npz", features=fold_features, labels=data["labels"], lengths=features_lengths)

1/5: 100%|██████████| 36/36 [06:46<00:00, 11.29s/it]
2/5: 100%|██████████| 36/36 [06:33<00:00, 10.92s/it]
3/5: 100%|██████████| 36/36 [06:48<00:00, 11.36s/it]
4/5: 100%|██████████| 36/36 [07:28<00:00, 12.46s/it]
5/5: 100%|██████████| 36/36 [07:43<00:00, 12.88s/it]
