In [62]:
from functools import cache
from typing import List, Tuple, Set

import numpy as np
import pandas as pd
from sortedcontainers import SortedSet
from tqdm import tqdm

from drowsiness_detection.config import PREPROCESSED_DATA_PATH
from drowsiness_detection.data import create_eye_closure_karolinksa_dataset
from drowsiness_detection.data_models import BlinkEvent
from drowsiness_detection.data_models import PreComputedSlicer

np.random.seed(42)

### 2. Identify Blink Events

In [71]:
interval_in_min = 5
interval_in_frames = interval_in_min * (60 * 30)
global_mean, global_std = 0.25191899173446713, 0.12801327657273856

MIN_CLOSURE_FOR_BLINK = global_mean + 2 * global_std
MIN_BLINK_LENGTH = 2
MIN_CHANGE_OF_BLINK = .01
MIN_AMPLITUDE_HEIGHT = global_std
MAX_START_END_DELTA = global_std
REOPENING_THRESHOLD = .05

features_names = ["blink_duration", "blink_interval", "lid_opening_delay", "closing_speed", "max_closing_speed"]

In [114]:
def filter_points_above_threshold(threshold: float, data: np.ndarray) -> list:
    """
    """
    above_std_sets = []
    new_set = SortedSet()
    it = np.nditer(data, flags=["f_index"])
    for value in it:
        if value > threshold:
            new_set.add(it.index)
        else:
            if new_set:
                above_std_sets.append(new_set)
                new_set = SortedSet()
    return above_std_sets


def filter_blink_sets_by_length(min_len: int, sets: list):
    return [s for s in sets if len(s) > min_len]


def extend_start_stop(blink_sets: List[Set], data: np.ndarray):
    for index_set in blink_sets:
        start, stop = index_set[0], index_set[-1]
        while (data[start] - data[start - 1]) > MIN_CHANGE_OF_BLINK:
            index_set.add(start - 1)
            start -= 1
        while (data[stop] - data[stop + 1]) > MIN_CHANGE_OF_BLINK:
            index_set.add(stop + 1)
            stop += 1
    return blink_sets


def filter_blink_sets_by_amplitude(data: np.ndarray, blink_sets: List[Set], min_amplitude_heigth: float):
    new_blink_sets = []
    for blink_set in blink_sets:
        values = data[blink_set]
        height = max(values) - min(values)
        if height > min_amplitude_heigth:
            new_blink_sets.append(blink_set)
    return new_blink_sets


def filter_blink_sets_by_start_end_delta(data: np.ndarray, blink_sets: List[Set], max_start_end_delta: float):
    new_blink_sets = []
    for blink_set in blink_sets:
        start = data[blink_set[0]]
        end = data[blink_set[-1]]
        if abs(start - end) < max_start_end_delta:
            new_blink_sets.append(blink_set)
    return new_blink_sets


def set_blink_intervals(blink_events: List[BlinkEvent]):
    for i, be in enumerate(blink_events):
        if i == 0:
            be._blink_interval = 300  # mean of the first session
            continue
        be._blink_interval = abs(be.start_idx - blink_events[i - 1].end_idx)  # abs() because there were negative blinks
    # mean_blink_interal = np.nanmean([be.blink_interval for be in blink_events])
    # for be in blink_events:
    #     if np.isnan(be._blink_interval):
    #         be.blink_interval = mean_blink_interal
    return blink_events


@cache
def calculate_blink_event_statistics(events: Tuple[BlinkEvent]):
    feature_values = np.array([[be.__getattribute__(feature_name) for be in events] for feature_name in features_names], dtype=float)
    if feature_values.size == 0:
        return np.zeros(4 * feature_values.shape[0])
    means = np.nanmean(feature_values, axis=1)
    medians = np.nanmedian(feature_values, axis=1)
    stds = np.nanstd(feature_values, axis=1)
    if np.any((stds == 0)):
        stds[(stds == 0)] = float("inf")
    skews = 3 * (means - medians) / stds
    return np.concatenate([means, medians, stds, skews])


def create_slicer(eye_closure_data: pd.DataFrame, blink_events: List[BlinkEvent], interval: int) -> PreComputedSlicer:
    # take index of peak closure as identifier for each blink event
    blink_event_dict = {int(be.full_closure_idx): be for be in blink_events}
    blink_event_slicer = PreComputedSlicer(indices=[idx for idx in eye_closure_data.index], object_dict=blink_event_dict, index_interval=interval)
    return blink_event_slicer


def create_feature_array(eye_closure_data: pd.DataFrame, num_features: int, slicer: PreComputedSlicer):
    feature_array = np.zeros(shape=(len(eye_closure_data), num_features))
    indices = eye_closure_data.index[interval_in_frames:]
    for index in tqdm(indices):
        past_events = slicer[index]
        feature_array[index] = calculate_blink_event_statistics(events=tuple(past_events))
    return feature_array


def create_feature_df(feature_array: np.ndarray, feature_names: List[str]):
    feature_df = pd.DataFrame(feature_array, columns=[name + kind for kind in ["_mean", "_median", "_std", "_skew"] for name in feature_names], dtype="float")
    return feature_df


def binarize_targets(target_array: np.ndarray) -> np.ndarray:
    return (target_array > 6).astype(int)


def process_raw_data_to_features_and_targets(filename: str):
    all_features, all_targets = [], []
    for loaded_df in create_eye_closure_karolinksa_dataset():
        target_array = loaded_df["kss"].to_numpy()
        data_df = loaded_df["eye_closure"]
        data = data_df.to_numpy()

        blink_sets = filter_points_above_threshold(threshold=MIN_CLOSURE_FOR_BLINK, data=data)
        blink_sets = filter_blink_sets_by_length(min_len=MIN_BLINK_LENGTH, sets=blink_sets)
        blink_sets = extend_start_stop(blink_sets=blink_sets, data=data)
        blink_sets = filter_blink_sets_by_amplitude(data=data, blink_sets=blink_sets, min_amplitude_heigth=MIN_AMPLITUDE_HEIGHT)
        blink_sets = filter_blink_sets_by_start_end_delta(data=data, blink_sets=blink_sets, max_start_end_delta=MAX_START_END_DELTA)

        blink_events = [BlinkEvent(indices=index_set, data=data_df, reopening_threshold=REOPENING_THRESHOLD) for index_set in blink_sets]
        blink_events = set_blink_intervals(blink_events=blink_events)
        blink_event_slicer = create_slicer(eye_closure_data=data_df, blink_events=blink_events, interval=interval_in_frames)

        feature_array = create_feature_array(eye_closure_data=data_df, num_features=len(features_names) * 4, slicer=blink_event_slicer)

        target_array = binarize_targets(target_array=target_array)
        target_array = target_array[interval_in_frames:]
        feature_array = feature_array[interval_in_frames:]

        assert feature_array.shape[0] == target_array.shape[0]

        all_features.append(feature_array)
        all_targets.append(target_array)
        break

    abs_path = PREPROCESSED_DATA_PATH.joinpath(filename+".npy").absolute()
    features = np.concatenate(all_features)
    targets = np.concatenate(all_targets)
    np.save(file=abs_path, arr=np.c_[features,targets])
    return abs_path


In [115]:
path = process_raw_data_to_features_and_targets(filename="220130_features_targets")

Extracting file /home/tim/IM/data/potsdam_aeye_112020/001_1_a.json and response file: /home/tim/IM/data/sleep_alc_labels/001_1_a_karolinska.csv.


100%|██████████| 159847/159847 [00:00<00:00, 171997.09it/s]


In [116]:
d = np.load(path)

In [117]:
X,y = d[:,:-1], d[:,-1].astype(int)

In [118]:
pd.DataFrame(X).describe()

  diff_b_a = subtract(b, a)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
count,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0,159847.0
mean,5.072181,364.201565,2.668044,0.168963,0.0,4.210417,162.726823,0.716779,0.161706,0.0,4.168729,563.236911,5.591111,0.077971,inf,0.589715,1.045406,1.045156,0.277935,0.0
std,1.718202,141.9831,2.897849,0.028892,0.0,2.043624,133.507933,0.577889,0.034918,0.0,3.224864,270.833695,6.928764,0.017287,,0.650901,0.422359,0.592603,0.540993,0.0
min,2.931034,139.227273,0.266667,0.0696,0.0,3.0,21.0,0.0,0.034444,0.0,0.759203,116.070919,0.442217,0.039318,inf,-1.488417,-0.330503,-2.84605,-1.096013,0.0
25%,3.923077,265.878788,0.944444,0.155755,0.0,3.5,69.0,0.0,0.15043,0.0,1.274101,345.91523,1.587936,0.068338,,0.329293,0.769438,0.780197,-0.047074,0.0
50%,4.707317,336.5,1.685714,0.169675,0.0,4.0,147.0,1.0,0.170258,0.0,3.346234,529.889581,2.723037,0.077162,,0.667762,1.071886,1.08832,0.206726,0.0
75%,5.777778,442.695652,2.5,0.190679,0.0,4.0,218.0,1.0,0.18264,0.0,6.11045,739.021014,4.980503,0.083103,,0.973415,1.383879,1.364097,0.641167,0.0
max,12.615385,1020.111111,19.0,0.233708,0.0,14.0,1098.0,2.5,0.238665,0.0,12.190869,1422.333983,37.138255,0.134696,inf,2.422934,2.216638,2.84605,2.031025,0.0
