In [None]:
%load_ext autoreload
%autoreload 2
%cd ..

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [None]:
import pickle
from typing import Dict, Tuple
from collections import Counter

import matplotlib.pyplot as plt
from matplotlib.dates import date2num
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import scipy.signal
import scipy.fftpack

from util.paths import DATA_PATH
from util.datasets.sliding_window import SlidingWindowDataset
from util.filter import apply_butterworth_bandpass_filter, apply_butterworth_lowpass_filter
from util.mathutil import get_peaks, PeakType, cluster_1d, Cluster
from util.rule_based_apnea_detector import _detect_airflow_apnea_areas__new

# Some preparations to pretty-print tensors & ndarrays
np.set_printoptions(edgeitems=10)
np.core.arrayprint._line_width = 400

In [None]:
dataset_folder = DATA_PATH / "training" / "tr03-0005"
config = SlidingWindowDataset.Config(
    physionet_dataset_folder=dataset_folder,
    downsample_frequency_hz=5,
    time_window_size=pd.Timedelta("2 minutes")
)
sliding_window_dataset = SlidingWindowDataset(config=config, allow_caching=True)

print(f"#Physionet dataset samples: {len(sliding_window_dataset.signals)}")
print(f"#Sliding window positions: {len(sliding_window_dataset)}")
print(f"Timeframe of sliding window positions: {sliding_window_dataset.valid_center_points[-1] - sliding_window_dataset.valid_center_points[0]}")
print(f"Apnea events list present: {sliding_window_dataset.apnea_events is not None}")


### Filter events, such that we end up with only _apnea events_

In [None]:
annotated_apnea_events = sliding_window_dataset.apnea_events
print(f"{len(annotated_apnea_events)} apnea events:")
print(" - " + "\n - ".join([f"#{i}: {evt}" for i, evt in enumerate(annotated_apnea_events)]))

# Enrich whole sliding window dataset by an events outline
annotated_events_outline_mat = np.zeros(shape=(len(sliding_window_dataset.signals),))
for apnea_event in annotated_apnea_events:
    start_idx = sliding_window_dataset.signals.index.get_loc(apnea_event.start, method="nearest")
    end_idx = sliding_window_dataset.signals.index.get_loc(apnea_event.end, method="nearest")
    annotated_events_outline_mat[start_idx:end_idx] = 10.0
annotated_events_outline_series = pd.Series(data=annotated_events_outline_mat, index=sliding_window_dataset.signals.index)
sliding_window_dataset.signals["Annotated apnea events"] = annotated_events_outline_series

del annotated_events_outline_series, annotated_events_outline_mat

In [None]:
event_num = 8
event = annotated_apnea_events[event_num]

window_center_point = event.start + (event.end-event.start)/2
window_data = sliding_window_dataset.get(center_point=window_center_point)

_ = window_data.signals.plot(figsize=(20, 10), subplots=True)

### Peak detection experiments

In [None]:
event_num = 2
signal_name = "AIRFLOW"
event = annotated_apnea_events[event_num]

window_center_point = event.start + (event.end-event.start)/2
window_data = sliding_window_dataset.get(center_point=window_center_point)

#####

kernel_width = int(sliding_window_dataset.config.downsample_frequency_hz*0.7)
peaks = get_peaks(waveform=window_data.signals[signal_name].values, filter_kernel_width=kernel_width)
peaks_mat = np.zeros(shape=(window_data.signals.shape[0],))
for p in peaks:
    peaks_mat[p.start:p.end] = p.extreme_value
peaks_ser = pd.Series(peaks_mat, index = window_data.signals.index, name=f"{signal_name} peaks")

#####

data = pd.concat([window_data.signals[signal_name], peaks_ser, window_data.signals["Annotated apnea events"]], axis=1).fillna(method="pad")

data.plot(figsize=(20,7), subplots=False)

overall_baseline = np.sqrt(np.mean([np.square(p.extreme_value) for p in peaks]))
plt.axhline(y=overall_baseline, linestyle='--', color="pink")
plt.axhline(y=-overall_baseline, linestyle='--', color="pink")

### AIRFLOW-based apnea detection development

In [None]:
event_num = 2
signal_name = "AIRFLOW"
event = annotated_apnea_events[event_num]

window_center_point = event.start + (event.end-event.start)/2
window_data = sliding_window_dataset.get(center_point=window_center_point)

# --------------

kernel_width = int(sliding_window_dataset.config.downsample_frequency_hz*0.7)
peaks = get_peaks(waveform=window_data.signals[signal_name].values, filter_kernel_width=kernel_width)

# ---------------

overall_baseline = np.sqrt(np.mean([np.square(p.extreme_value) for p in peaks]))
low_peaks = [p for p in peaks if abs(p.extreme_value)<=overall_baseline]
low_peaks_mat = np.zeros(shape=(window_data.signals.shape[0],))
for p in low_peaks:
    low_peaks_mat[p.start:p.end] = p.extreme_value
low_peaks_ser = pd.Series(low_peaks_mat, index = window_data.signals.index, name="AIRFLOW below-baseline peaks")

# # ---------------

clusters, coarse_types = _detect_airflow_apnea_areas__new(window_data.signals["AIRFLOW"].values, sample_frequency_hz=sliding_window_dataset.config.downsample_frequency_hz)
cluster_mat = np.zeros(shape=(window_data.signals.shape[0],))
for c in clusters:
    cluster_mat[c.start:c.end] += 20.0
clusters_ser = pd.Series(cluster_mat, index=window_data.signals.index, name="AIRFLOW: detected apneas")
print("Detected apneas:")
print(" - " + "\n - ".join([f"{t.name}: {c.length/sliding_window_dataset.config.downsample_frequency_hz:.1f}s" for c, t in zip(clusters, coarse_types)]))



data = pd.concat([window_data.signals["AIRFLOW"], clusters_ser, window_data.signals["Annotated apnea events"]], axis=1).fillna(method="pad")

data.plot(figsize=(20,10), subplots=False)
plt.axhline(y=overall_baseline, linestyle='--', color="pink")
plt.axhline(y=-overall_baseline, linestyle='--', color="pink")

# print(f"Annotated event position: {window_data.signals.index.get_loc(event.start, method='nearest')}..{window_data.signals.index.get_loc(event.end, method='nearest')}")
None