In [None]:
# What version of Python do you have?
import sys
import os
import tensorflow.keras
import pandas as pd
import sklearn as sk
import tensorflow as tf
import numpy as np
import re
import mne
import pathlib
import openpyxl
from datetime import datetime
import pytz
import random
import os
from skimage.restoration import (denoise_wavelet, estimate_sigma)
from pathlib import Path
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers, models, Sequential
from matplotlib import pyplot as plt
import psutil
import gc
pd.io.parquet.get_engine('auto').__class__
%matplotlib inline

print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tensorflow.keras.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
gpu = len(tf.config.list_physical_devices('GPU'))>0
print("GPU is", "available" if gpu else "NOT AVAILABLE")

Globals

In [None]:
cwd = os.getcwd()
FREQ = 256
database_path = 'Dataset/CHB-MIT/chb-mit-scalp-eeg-database-1.0.0/'
filtered_database_path = 'Dataset/CHB-MIT/Filtered-chb-mit/'
filted_db_parquet_path = "Dataset/CHB-MIT/dataframe-parquet"
edf_file_type = ".edf"
patient_one_path = 'chb04/'
summary_txt_file_type = "-summary.txt"
cwd

In [None]:
def remove_cols(dataframe, col_start = 0, col_end = 0):
    if col_end == 0:
        col_end = len(dataframe.columns) - 1
    
    dataframe = dataframe.iloc[: , col_start: col_end]
    return dataframe

In [None]:
matches = ['.-0','.-1', '.-2', '.-3', '.-4', 'STI 014']

def read_compressed_df(path):
    df = pd.read_parquet(path)
    
    # Remove STI 14 col:
    if any(x in df.columns for x in matches):
        for col_name in matches:
            try:
                df.drop(columns=col_name, inplace=True)
                
            except:
                pass

    sz_df = df.loc[df['class'] == "seizure"].reset_index(drop=True)
    sz_df = remove_cols(sz_df, col_end=10)

    prei_one_df = df.loc[df['class'] == "Preictal I"]
    prei_one_df = remove_cols(prei_one_df, col_end=10)

    prei_two_df = df.loc[df['class'] == "Preictal II"]
    prei_two_df = remove_cols(prei_two_df, col_end=10)

    inter_df = df.loc[df['class'] == "Interictal"]
    inter_df = remove_cols(inter_df, col_end=10)

    channels = [item for item in list(sz_df.columns) if item != "class" if item != "timestamp"]

    return (sz_df, prei_one_df, prei_two_df, inter_df, channels)

In [None]:
def get_window(channel, start_index, data, size = 4, overlap = 0, is_sezure = False, frequency = 256):
    if(is_sezure):
        overlap = 2
    else:
        overlap = overlap
    
    
    start = start_index * (size - overlap) * frequency
    end = start + (size * frequency)
    #print("is_sezure: " + str(is_sezure) + " time: " + str(datetime.fromtimestamp((data['timestamp'][start:start+1]/1000).tolist()[0]).strftime('%H:%M:%S')) + str(' lenthWindow: '+ str(len(data[channel][start:end].tolist()))) + ' start: ' + str(start) + " end: " + str(end) )

    return [data[channel][start:end].tolist(), datetime.fromtimestamp((data['timestamp'][start:start+1]/1000).tolist()[0]).strftime('%H:%M:%S')]

In [None]:
def get_max_window_iteration(dataframe, buffer):
    len_of_df = int(len(dataframe) / (buffer*256))
    return len_of_df


In [None]:
def spec_transform_save_to_folder(index, win, channel, patient_state, patient, plot_title = False):
    plt.figure(figsize=(7,7))
    series = win[0]
    time_of_observation = str(datetime.strptime(win[1], "%H:%M:%S").strftime("%H:%M:%S")).replace(":", "-")
    try:
        series = np.array(series).astype(np.float)
    except Exception as e:
        print(f"error: {e}")
        print(f"patient_state: {patient_state} channel: {channel} index: {index} window: {series}")
    denoised_series = denoise_wavelet(series, method='BayesShrink',wavelet='db6', mode='soft',rescale_sigma=True, multichannel=False, wavelet_levels=3)
    if plot_title:
        plt.title(f"{channel} : is_seizure = {patient_state} : {time_of_observation}")

    plt.specgram(denoised_series,Fs=500,cmap='jet')

    if patient_state == "seizure":
        plt.savefig(f'{external_hardisk_drive_path}/windows/Seizure/{patient}_{index}_{channel}_{time_of_observation}.png')
    elif patient_state == "interictal":
        plt.savefig(f'{external_hardisk_drive_path}/windows/Interictal/{patient}_{index}_{channel}_{time_of_observation}.png')
    elif patient_state == "prei_one":
        plt.savefig(f'{external_hardisk_drive_path}/windows/Preictal_One/{patient}_{index}_{channel}_{str(time_of_observation).strip()}.png')
    elif patient_state == "prei_two":
        plt.savefig(f'{external_hardisk_drive_path}/windows/Preictal_Two/{patient}_{index}_{channel}_{str(time_of_observation).strip()}.png')
    
    del series
    plt.clf()    
    plt.close()
    gc.collect()

In [None]:
count = 3
--interupter--
try:
    for filename in files[7:15]:
        print("started file: " + str(filename) + " index: " + str(count))
        sz, prei_one, prei_two, inter, selected_channels = read_compressed_df(filename)
        patient = re.search('/Volumes/LaCie/Database/(.*).parquet.gzip', filename).group(1)
        for channel in selected_channels:
            if len(inter) > 0 and inter.empty == False:
                inter_win = [get_window(channel=channel,start_index=i, data=inter) for i in range(get_max_window_iteration(inter, 4))]
                for index, window in enumerate(inter_win):
                    spec_transform_save_to_folder(win=window, index=index, channel=channel, patient_state = "interictal", patient=patient)
                del inter_win

            if len(sz) > 0 and sz.empty == False:
                sz_win = [get_window(channel=channel, start_index=i, data=sz, is_sezure=True) for i in range(get_max_window_iteration(sz, 2))]
                for index, window in enumerate(sz_win):
                    spec_transform_save_to_folder(channel=channel, index=index, win=window, patient_state="seizure", patient=patient)
                del sz_win

            if len(prei_one) > 0 and prei_one.empty == False:
                prei_one_win = [get_window(channel=channel,start_index=i, data=inter) for i in range(get_max_window_iteration(prei_one, 4))]
                for index, window in enumerate(prei_one_win):
                    spec_transform_save_to_folder(channel=channel, index=index, win=window, patient_state="prei_one", patient=patient)
                del prei_one_win

            if len(prei_two) > 0 and prei_two.empty == False:
                prei_two_win = [get_window(channel=channel, start_index=i, data=inter) for i in range(get_max_window_iteration(prei_two, 4))]
                for index, window in enumerate(prei_two_win):
                    spec_transform_save_to_folder(channel=channel, index=index, win=window, patient_state="prei_two", patient=patient)
                del prei_two_win

        count += 1
        print(f"memory usage = {psutil.virtual_memory().percent} : available memory = {psutil.virtual_memory().available * 100 / psutil.virtual_memory().total}")
        print(f"filename: {filename} = done : count = {count} : files left = {len(files) - count} : time of creation = {datetime.now()}")
        del sz, prei_one, prei_two, inter
except Exception as e:
    print(e)