In [1]:
import json
from pathlib import Path
import pandas as pd
from functools import reduce
from itertools import repeat
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
from scipy.signal import savgol_filter

from blink_features import add_blink_features_by_eye_state, add_perclos_features_to_df, add_blink_features
from response_features import add_karolinska_file_to_feature_df
from helpers import print_nan_intersections

### get files of data

In [2]:
session_identifier = "karolinska"
files = [str(p) for p in Path("sleep_alc_labels").iterdir()]

session_files = sorted([file for file in files if session_identifier in file])
# print("\n".join(session_files))

feature_files = sorted([str(p) for p in Path("potsdam_aeye_112020").iterdir()])
# with open(feature_files[0]) as fp:
#     data = json.loads(fp.read())
# print(data[0])

In [6]:
def session_file_to_df(filepath: str, filepath_response: str) -> pd.DataFrame:
    print(
        f"Extracting file {filepath} and response file: {filepath_response}.")
    with open(filepath) as fp:
        data = json.loads(fp.read())

    # join data
    df_eye_closure = pd.DataFrame([item["eye_closure"] for item in data])
    df_eye_closure[
        df_eye_closure <
        0] = 0  # some values are negative and need to be set to zero
    df_eye_state = pd.DataFrame([item["eye_state"] for item in data])
    df_closure_and_state = df_eye_closure.join(df_eye_state,
                                               rsuffix="_eye_state",
                                               rsuffix="_eye_state",
                                               lsuffix="_eye_closure")

    # add meta data
    filename = Path(filepath).stem
    subject_id, session_id, session_type = filename.split("_")
    df_closure_and_state["subject_id"] = subject_id
    df_closure_and_state["session_id"] = session_id
    df_closure_and_state["session_type"] = session_type

    # add blink features by eye state
    df_closure_and_state = add_blink_features_by_eye_state(
        feature_df=df_closure_and_state, interval_in_sec=240)

    # add karolinksa response
    df_closure_and_state = add_karolinska_file_to_feature_df(
        filepath=filepath_response, feature_df=df_closure_and_state)

    # assign dtypes
    df_closure_and_state["subject_id"] = df_closure_and_state[
        "subject_id"].astype("float").astype("Int8", copy=False)
    df_closure_and_state["session_id"] = df_closure_and_state[
        "session_id"].astype("float").astype("Int8", copy=False)
    df_closure_and_state["session_type"] = df_closure_and_state[
        "session_type"].apply(lambda x: (ord(x) - 97)).astype("float").astype(
            "Int8", copy=False)
    df_closure_and_state[[
        "combined_eye_state", "left_image_eye_state", "right_image_eye_state"
    ]] = df_closure_and_state[[
        "combined_eye_state", "left_image_eye_state", "right_image_eye_state"
    ]].astype("float").astype("Int8", copy=False)
    # df_closure_and_state["perclos_combined_60s_interval"] = df_closure_and_state["perclos_combined_60s_interval"].astype("Int16", copy=False)
    # df_closure_and_state['max_blink_duration_60s_interval'] = df_closure_and_state['max_blink_duration_60s_interval'].astype("Int16", copy=False)
    # df_closure_and_state['min_blink_duration_60s_interval'] = df_closure_and_state['min_blink_duration_60s_interval'].astype("Int16", copy=False)
    # df_closure_and_state['mean_blink_duration_60s_interval'] = df_closure_and_state['mean_blink_duration_60s_interval'].astype("Float16", copy=False)
    # df_closure_and_state['blink_counts_60s_interval'] = df_closure_and_state['blink_counts_60s_interval'].astype("Int16", copy=False)
    # df_closure_and_state["num_blinks"] = df_closure_and_state["num_blinks"].astype("float").astype("Int8", copy=False)

    # create multi-index
    multi_index = pd.MultiIndex.from_product(
        [[filename], df_closure_and_state.index], names=["filename", "frame"])
    df_closure_and_state.index = multi_index

    return df_closure_and_state


feature_df = pd.concat(
    list(map(session_file_to_df, feature_files, session_files)))

Extracting file potsdam_aeye_112020/001_1_a.json and response file: sleep_alc_labels/001_1_a_karolinska.csv.


100%|██████████| 168847/168847 [00:14<00:00, 11751.55it/s]


Extracting file potsdam_aeye_112020/001_2_s.json and response file: sleep_alc_labels/001_2_s_karolinska.csv.


100%|██████████| 137537/137537 [00:11<00:00, 11982.50it/s]


Extracting file potsdam_aeye_112020/001_3_b.json and response file: sleep_alc_labels/001_3_b_karolinska.csv.


100%|██████████| 150047/150047 [00:18<00:00, 8052.12it/s] 


Extracting file potsdam_aeye_112020/002_1_b.json and response file: sleep_alc_labels/002_1_b_karolinska.csv.


100%|██████████| 175077/175077 [00:18<00:00, 9562.57it/s] 


Extracting file potsdam_aeye_112020/002_2_a.json and response file: sleep_alc_labels/002_2_a_karolinska.csv.


100%|██████████| 212082/212082 [00:33<00:00, 6386.03it/s]


Extracting file potsdam_aeye_112020/002_3_s.json and response file: sleep_alc_labels/002_3_s_karolinska.csv.


100%|██████████| 156257/156257 [00:16<00:00, 9333.60it/s] 


Extracting file potsdam_aeye_112020/003_1_b.json and response file: sleep_alc_labels/003_1_b_karolinska.csv.


100%|██████████| 165426/165426 [00:17<00:00, 9637.78it/s] 


Extracting file potsdam_aeye_112020/003_2_s.json and response file: sleep_alc_labels/003_2_s_karolinska.csv.


100%|██████████| 169153/169153 [00:19<00:00, 8678.87it/s] 


Extracting file potsdam_aeye_112020/003_3_a.json and response file: sleep_alc_labels/003_3_a_karolinska.csv.


100%|██████████| 185125/185125 [00:24<00:00, 7441.23it/s]


Extracting file potsdam_aeye_112020/004_1_s.json and response file: sleep_alc_labels/004_1_s_karolinska.csv.


100%|██████████| 169108/169108 [00:27<00:00, 6106.84it/s]


Extracting file potsdam_aeye_112020/004_2_a.json and response file: sleep_alc_labels/004_2_a_karolinska.csv.


100%|██████████| 170013/170013 [00:18<00:00, 8990.67it/s] 


Extracting file potsdam_aeye_112020/004_3_b.json and response file: sleep_alc_labels/004_3_b_karolinska.csv.


100%|██████████| 149516/149516 [00:16<00:00, 8908.74it/s] 


Extracting file potsdam_aeye_112020/005_1_s.json and response file: sleep_alc_labels/005_1_s_karolinska.csv.


100%|██████████| 160035/160035 [00:21<00:00, 7450.42it/s] 


Extracting file potsdam_aeye_112020/005_2_b.json and response file: sleep_alc_labels/005_2_b_karolinska.csv.


100%|██████████| 147373/147373 [00:16<00:00, 8702.58it/s] 


Extracting file potsdam_aeye_112020/005_3_a.json and response file: sleep_alc_labels/005_3_a_karolinska.csv.


100%|██████████| 190628/190628 [00:24<00:00, 7756.64it/s] 


Extracting file potsdam_aeye_112020/008_1_b.json and response file: sleep_alc_labels/008_1_b_karolinska.csv.


100%|██████████| 167951/167951 [00:18<00:00, 9022.50it/s] 


Extracting file potsdam_aeye_112020/008_2_a.json and response file: sleep_alc_labels/008_2_a_karolinska.csv.


100%|██████████| 181733/181733 [00:16<00:00, 10807.54it/s]


Extracting file potsdam_aeye_112020/008_3_s.json and response file: sleep_alc_labels/008_3_s_karolinska.csv.


100%|██████████| 158511/158511 [00:11<00:00, 13373.91it/s]


Extracting file potsdam_aeye_112020/009_1_b.json and response file: sleep_alc_labels/009_1_b_karolinska.csv.


100%|██████████| 161757/161757 [00:22<00:00, 7102.72it/s] 


Extracting file potsdam_aeye_112020/011_1_s.json and response file: sleep_alc_labels/011_1_s_karolinska.csv.


100%|██████████| 149883/149883 [00:19<00:00, 7501.72it/s] 


Extracting file potsdam_aeye_112020/011_2_b.json and response file: sleep_alc_labels/011_2_b_karolinska.csv.


100%|██████████| 145378/145378 [00:14<00:00, 10072.09it/s]


Extracting file potsdam_aeye_112020/011_3_a.json and response file: sleep_alc_labels/011_3_a_karolinska.csv.


100%|██████████| 162319/162319 [00:16<00:00, 9974.63it/s] 


Extracting file potsdam_aeye_112020/014_1_b.json and response file: sleep_alc_labels/014_1_b_karolinska.csv.


100%|██████████| 147355/147355 [00:23<00:00, 6217.99it/s]


#### save data of alle subjects and session

In [4]:
feature_df.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 3781111 entries, ('001_1_a', 0) to ('014_1_b', 147354)
Data columns (total 15 columns):
 #   Column                                     Dtype  
---  ------                                     -----  
 0   combined_eye_closure                       float64
 1   left_image_eye_closure                     float64
 2   right_image_eye_closure                    float64
 3   combined_eye_state                         Int8   
 4   left_image_eye_state                       Int8   
 5   right_image_eye_state                      Int8   
 6   subject_id                                 Int8   
 7   session_id                                 Int8   
 8   session_type                               Int8   
 9   num_blinks                                 Int16  
 10  mean_blink_length                          float64
 11  mean_opening_velocity                      float64
 12  mean_closing_velocity                      float64
 13  karolinska_respon

In [5]:
feature_df.to_pickle("./all_features_240s.pkl")

In [6]:
break

SyntaxError: 'break' outside loop (<ipython-input-6-6aaf1f276005>, line 4)

In [None]:
feature_df["num_blinks"].astype("Int16")

### alternative way of calculating blink features without eye state (not working)

In [None]:
opening = df.slope <= 0
closing = -opening

mean_velocity = []
blink_lengths = []
indices_to_track = []

all_indices = []
status = True
for index in df.index:
    if opening[index] == status:
        indices_to_track.append(index)
    else:
        if indices_to_track:
            all_indices.append(indices_to_track)

            mean_slope = (np.mean(
                df["slope"][indices_to_track[0]:indices_to_track[-1] + 1]))
            length = len(indices_to_track)
            mean_velocity.extend([mean_slope] * length)
            blink_lengths.extend([length] * length)

        indices_to_track = list()
        indices_to_track.append(index)
        status = opening[index]
mean_slope = np.mean(df["slope"][indices_to_track[0]:indices_to_track[-1] + 1])
length = len(indices_to_track)

mean_velocity.extend([mean_slope] * length)
blink_lengths.extend([length] * length)
all_indices.append(indices_to_track)

In [None]:
print(len(df))

In [None]:
df["mean_velocity"] = mean_velocity
df["opening"] = opening
df["blink_length"] = blink_lengths

In [None]:
df

In [None]:
copy = df.copy()

In [None]:
df = copy.copy()

In [None]:
# mean opening velocity, mean closing velocity, mean opening time, mean closing time
open_close_tuples = []
for i, indices in enumerate(all_indices):
    opening = df["opening"][indices[0]]
    if not opening:
        is_blink = np.any(df["values"][indices] > .8)
        if is_blink:
            closing_indices = indices
            try:
                opening_indices = all_indices[i + 1]
            except IndexError:
                continue
            open_close_tuples.append((closing_indices, opening_indices))

In [None]:
df["opening_velocity"] = pd.NA
df["closing_velocity"] = pd.NA
df["opening_time"] = pd.NA
df["closing_time"] = pd.NA

In [None]:
for closing_ind, opening_ind in open_close_tuples:
    # open_and_close_ind = closing_ind + opening_ind
    opening_start_idx = opening_ind[0]
    closing_start_idx = closing_ind[0]
    df.loc[opening_start_idx,
           "opening_velocity"] = df["mean_velocity"][opening_start_idx]
    df.loc[closing_start_idx,
           "closing_velocity"] = df["mean_velocity"][closing_start_idx]
    df.loc[opening_start_idx,
           "opening_time"] = df["blink_length"][opening_start_idx]
    df.loc[closing_start_idx,
           "closing_time"] = df["blink_length"][closing_start_idx]

In [None]:
# count actual blinks
total_blinks = len(open_close_tuples)
total_time_min = len(df) / 30 / 60
blink_per_min = total_blinks / total_time_min
print(
    f"{total_blinks} blinks in {total_time_min} mins. Resulting in {blink_per_min} blinks/min."
)

In [None]:
bins = [x / 11 for x in range(12)]
state_hist = (df["values"]).plot(kind="hist",
                                 title="eye value distribution",
                                 grid=True,
                                 bins=50)

use given eye state signal

In [None]:
eye_state = feature_df["combined_eye_state"].astype(pd.Int16Dtype())

In [None]:
eye_state

In [None]:
eye_state.dropna().plot()

In [None]:
eye_state[eye_state == 1].diff()

In [None]:
plt.rcParams["figure.figsize"] = 7, 4

pd.Series(
    eye_state[eye_state == 1].index).diff().hist(bins=range(0, 1000, 100))

In [None]:
both_df = feature_df[["combined_eye_closure", "combined_eye_state"]]

In [None]:
both_df["combined_eye_closure"][both_df.combined_eye_state == 5].hist(bins=20)

In [None]:
both_df["combined_eye_closure"][168485:168550].plot()

In [None]:
plt.plot(both_df["combined_eye_closure"][200:300])

In [None]:
both_df[55470:55476]

In [None]:
n = 2
sub_df = both_df["combined_eye_state"][both_df["combined_eye_state"] ==
                                       1].sample(n=n)
for idx in sub_df.index:
    plt.plot(both_df["combined_eye_closure"][idx - 50:idx + 70])
    plt.vlines(idx, ymin=0, ymax=1, colors="red", label=str(idx))
    plt.legend()
    plt.show()

In [None]:
#print(start_idx, closed_idx, end_idx)
# plt.plot(both_df["combined_eye_closure"][start_idx - 10:closed_idx + 10])
# plt.vlines(start_idx, ymin=0, ymax=1, colors="red", label="start/end of blink")
# plt.vlines(closed_idx, ymin=0, ymax=1, colors="green", label="max closed")
# plt.vlines(end_idx, ymin=0, ymax=1, colors="red")
# #plt.legend()
# plt.show()