In [1]:
import pandas as pd
import numpy as np
from pathlib import Path
import re
import os
from dataclasses import dataclass
import utils

  from .autonotebook import tqdm as notebook_tqdm


In [30]:
#TODO IMPLEMENT LOSO FEATURE
@dataclass
class SeizureDataLoader:
    npy_dataset_path :str
    event_tables_path : str
    loso_patient : str = None
    sampling_f : int = 256
    seizure_lookback: int = 600
    sample_timestep: int = 5
    overlap: int = 0

    def _get_event_tables(self,patient_name):
        event_table_list = os.listdir(self.event_tables_path)
        patient_start_table, patient_stop_table = [os.path.join(self.event_tables_path,ev_table)
        for ev_table in event_table_list if patient_name in ev_table]
        start_events_dict = self._load_csv_table_events(patient_start_table)
        stop_events_dict = self._load_csv_table_events(patient_stop_table)
        return start_events_dict,stop_events_dict
        
    def _get_recording_events(self,events_dict,recording):
        recording_list = list(events_dict[recording+'.edf'].values())
        recording_events = [int(x) for x in recording_list if not np.isnan(x)]
        return recording_events

    def _load_csv_table_events(self,table_path):
        event_dict = pd.read_csv(table_path).to_dict('index')
        return event_dict
    
    def _create_edges(self):
        ## TODO implement creation of nodes based on PLV? Ask Rosmary about that
        pass

    def _get_labels_and_features(self):
        patient_list = os.listdir(self.npy_dataset_path)
        for patient in patient_list:
            event_tables = self._get_event_tables(patient)
            patient_path = os.path.join(self.npy_dataset_path,patient)
            recording_list = os.listdir(patient_path)
            for record in recording_list:
                
                recording_path = os.path.join(patient_path,record)
                record_id = record.split('.npy')[0]
                start_event_tables = self._get_recording_events(event_tables[0],record_id)
                stop_event_tables = self._get_recording_events(event_tables[1],record_id)
                data_array = np.load(recording_path)
                ##TODO add a gateway to reject seizure periods shorter than lookback
                features,labels,time_labels = utils.extract_training_data_and_labels(
                    data_array,
                    start_event_tables,
                    stop_event_tables,
                    fs = self.sampling_f,
                    seizure_lookback = self.seizure_lookback,
                    sample_timestep = self.sample_timestep,
                    overlap = self.overlap,
                )

                try:
                    self._features = np.concatenate((self._features, features))
                    self._labels = np.concatenate((self._labels, labels))
                    self._time_labels = np.concatenate((self._time_labels , time_labels))
                except:
                    print("Creating initial attributes")
                    self._features = features
                    self._labels = labels
                    self._time_labels = time_labels
                
                

                

        

In [31]:
dataloader = SeizureDataLoader(Path('npy_data'),Path('event_tables'))

In [34]:
dataloader._features.shape

(2597, 18, 1, 1280)

In [33]:
dataloader._get_labels_and_features()

Creating initial attributes


In [None]:
df = dataloader._get_event_tables('chb16')

In [None]:
list_to_process =dataloader._get_recording_events(df[0], 'chb16_10')

In [None]:
list_to_process

In [None]:
[x for x in list_to_process if not np.isnan(x)]

In [None]:
df.to_dict('index')

In [None]:
path_to_file = Path("raw_dataset\chb16\chb16-summary.txt")
open(path_to_file,'r').readlines()

In [None]:
string = 'chb10_27.edf'

In [None]:
string.split('.edf')[0]

In [None]:
ds_path = Path('preprocessed_data')
target_path = Path('npy_data')

In [None]:
save_timeseries_array(ds_path,target_path)

In [None]:
def get_patient_annotations(path_to_file : Path, savedir : Path):
    raw_txt = open(path_to_file,'r')
    raw_txt_lines = raw_txt.readlines()
    event_dict_start = dict()
    event_dict_stop = dict()
    p = '[\d]+'
    for n,line in enumerate(raw_txt_lines):
        if "File Name" in line:
            current_file_name = line.split(': ')[1][:-1]
        if "Number of Seizures in File" in line:
            num_of_seizures = int(line[-2:])
            if  num_of_seizures > 0:
                events_in_recording = raw_txt_lines[n+1:n+num_of_seizures*2+1]
                for event in events_in_recording:
                    if "Start Time" in event:
                        sub_ev = event.split(': ')[1]
                        time_value = int(re.search(p,sub_ev).group())
                
                        if not current_file_name in event_dict_start.keys():
                            event_dict_start[current_file_name] = [time_value]
                        else:
                            event_dict_start[current_file_name].append(time_value)
                    elif "End Time" in event:
                        sub_ev = event.split(': ')[1]
                        
                        time_value = int(re.search(p,sub_ev).group())
                        
                        if not current_file_name in event_dict_stop.keys():
                            event_dict_stop[current_file_name] = [time_value]
                            
                        else:
                            event_dict_stop[current_file_name].append(time_value)
    df = pd.DataFrame.from_dict(event_dict_start,orient='index')
    col_list = []
    for n in range(1,len(df.columns)+1):
        col_list.append(f'Seizure {n}')
    df_start = pd.DataFrame.from_dict(event_dict_start,orient='index',columns=col_list)
    df_end = pd.DataFrame.from_dict(event_dict_stop,orient='index',columns=col_list)
    patient_id = current_file_name.split('_')[0]
    if not os.path.exists(savedir):
        os.mkdir(savedir)
    dst_dir_start = os.path.join(savedir,f"{patient_id}_start.csv")
    dst_dir_stop = os.path.join(savedir,f"{patient_id}_stop.csv")
    pd.DataFrame.to_csv(df_start,dst_dir_start,index_label=False) 
    pd.DataFrame.to_csv(df_end,dst_dir_stop,index_label=False) 

In [None]:
def get_annotation_files(dataset_path):
    patient_folders = os.listdir(dataset_path)
    for folder in patient_folders:
        patient_folder_path = os.path.join(dataset_path,folder)
        if os.path.isdir(patient_folder_path):
            patient_files = os.listdir(patient_folder_path)
            for filename in patient_files:
                if "summary" in filename:
                    annotation_path = os.path.join(patient_folder_path,filename)
                    get_patient_annotations(annotation_path,Path("event_tables"))


In [None]:
annotation_files = get_annotation_files(Path("raw_dataset"))
