In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
from imblearn.over_sampling import RandomOverSampler

import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, confusion_matrix, roc_curve

import glob

from matplotlib.dates import DateFormatter
dateformat = DateFormatter(fmt = '%H:%M:%S:%f')

In [46]:
def import_data(patient_num):

    # First, get the labels
    labels = pd.read_csv(f'../data/centered/original/centered_{patient_num}.csv')

    others = pd.read_csv(f'../data/centered/reviewed/dc_{patient_num}.csv')
    others['Review'] = others['Review'].str.strip()
    others = others[others['Review'] == 'Yes']

    sgb_times = pd.concat([labels[['Time']], others[['Time']]])
    sgb_times['Time'] = pd.to_datetime(sgb_times['Time'])
    sgb_times['sgb'] = True

    # Then the raw data
    readings = pd.read_csv(f'../data/Completed file set Oct13/{patient_num} RAW 00.csv')
    readings['Time'] = pd.to_datetime(readings['Time'])
    readings['CapturedTime'] = readings['Time'].dt.time

    # Combine them to label the events times
    readings = pd.merge(left = readings,
             right = sgb_times[['Time', 'sgb']],
            how = 'outer').fillna('False')
    
    # Then remove the meal times
    events = pd.read_csv(f'../data/Completed file set Oct13/{patient_num} EVT 00.csv')
    events = events[events['EventName'] == 'Meal']
    events['Time'] = pd.to_datetime(events['Time'])
    events['Duration'] = pd.to_timedelta(events['Duration'])
    events = events.assign(End = events['Time'] + events['Duration'])
    for idx, row in events.iterrows():
        readings = readings[(readings['Time'] < row['Time']) | (readings['Time'] > row['End'])]
        
    # Normalize the readings to be between 0 and 1
    for channel in [
        'Channel_1', 'Channel_2', 'Channel_3',
        'Channel_4', 'Channel_5', 'Channel_6'
    ]:
        readings[channel] = readings[channel] / 10000
    
    # There is a section for patient 008 at the end which is all maxed outs
    if patient_num == '008':
        readings = readings.loc[:837833]
    
    # There is a missing segment of time for 009, so we need to repair the index
    if patient_num == '009':
        readings.index = [x + 4 if x >= 276 else x for x in readings.index]
        
    # There is a section for patient 010 at the end which is all maxed outs
    if patient_num == '010':
        readings = readings.loc[:731110]
    
    return readings

In [47]:
def import_data_new(patient_num):

    # First, get the labels
    labels = pd.read_csv(f'../data/new_2024/centered/centered_{patient_num}.csv')

    others = pd.read_csv(f'../data/new_2024/double_check/dc_{patient_num}.csv')
    
    sgb_times = pd.concat([labels[['Time']], others[['Time']]])
    
    additional_files = glob.glob("../data/new_2024/additional/*.csv")
    if f"../data/new_2024/additional/fp_{patient_num}.csv" in additional_files:
        additional = pd.read_csv(f'../data/new_2024/additional/fp_{patient_num}.csv')
        additional = additional[additional['keep'] == True]
        additional = additional.rename(columns={'timestamp': 'Time'})
        sgb_times = pd.concat([sgb_times[['Time']], additional[['Time']]])
    
    sgb_times['Time'] = pd.to_datetime(sgb_times['Time'])
    sgb_times['sgb'] = True

    # Then the raw data
    readings = pd.read_csv(f'../data/new_data/eval_data/{patient_num} RAW 00.csv')
    readings['Time'] = pd.to_datetime(readings['Time'])
    readings['CapturedTime'] = readings['Time'].dt.time

    # Combine them to label the events times
    readings = pd.merge(left = readings,
             right = sgb_times[['Time', 'sgb']],
            how = 'outer').fillna('False')
    
    # Then remove the meal times
    events = pd.read_csv(f'../data/new_data/eval_data/{patient_num} EVT 00.csv')
    events = events[events['EventName'] == 'Meal']
    events['Time'] = pd.to_datetime(events['Time'])
    events['Duration'] = pd.to_timedelta(events['Duration'])
    events = events.assign(End = events['Time'] + events['Duration'])
    for idx, row in events.iterrows():
        readings = readings[(readings['Time'] < row['Time']) | (readings['Time'] > row['End'])]
        
    # Normalize the readings to be between 0 and 1
    for channel in [
        'Channel_1', 'Channel_2', 'Channel_3',
        'Channel_4', 'Channel_5', 'Channel_6'
    ]:
        readings[channel] = readings[channel] / 10000
        
    return readings

In [48]:
def prepare_training(patient_num, new=False):
    if not new:
        readings = import_data(patient_num)
    else:
        readings = import_data_new(patient_num)
        
    readings.loc[readings['sgb'] == True, 'next_sgb'] = readings.loc[readings['sgb'] == True, 'Time']
    readings['next_sgb'] = readings['next_sgb'].bfill()
    readings.loc[readings['sgb'] == True, 'last_sgb'] = readings.loc[readings['sgb'] == True, 'Time']
    readings['last_sgb'] = readings['last_sgb'].ffill()

    readings['next_sgb'] = (readings['next_sgb'] - readings['Time']).dt.total_seconds()
    readings['last_sgb'] = (readings['Time'] - readings['last_sgb']).dt.total_seconds()

    potential_training_index = readings[~(readings['next_sgb'] < 2) & ~(readings['last_sgb'] < 2)].index

    nonsgb = [x for x in potential_training_index if x%10 == 0]
    sgb = readings[readings['sgb'] == True].index.tolist()
    
    keeps = 0
    
    for idx in tqdm(nonsgb):
        offset = 0
        duration = 20

        idx = idx - offset

        readings_sub = readings.loc[idx:idx + duration - 1][['Channel_' + str(j) for j in range(1,7)]].values
        if readings_sub.shape == (duration, 6) and np.abs(np.diff(readings_sub, axis = 0)).sum() > 1:
            X_train.append(readings_sub)
            y_train.append(0)
            train_indices.append(idx)
            keeps += 1
            
    print(keeps)

    for idx in tqdm(sgb * 10):
        offset = 0
        duration = 20

        idx = idx - offset

        readings_sub = readings.loc[idx:idx + duration - 1][['Channel_' + str(j) for j in range(1,7)]].values
        if readings_sub.shape == (duration, 6):
            X_train.append(readings_sub)
            y_train.append(1)
            train_indices.append(idx)

In [49]:
X_train = []
y_train = []
train_indices = []

for patient_num, sampling_strategy in [
    ('001', 1/10),
    ('002', 1/50),
    ('003', 1/10),
    ('004', 1/10),
    ('005', 1/10),
    ('006', 1/10),
    ('007', 1/10),
    ('008', 1/10),
    ('009', 1/10),
    ('010', 1/50),
    ('011', 1/10),
    ('012', 1/10),
    ('013', 1/10),
]:
    prepare_training(patient_num)
    
for patient_num in [
    '02', '15', '16', '18', '34',
    '40', '42', '55', '58', '68', 
    #'69', '75', '89', '90', '93'
]:
    prepare_training(patient_num, new=True)

  0%|          | 0/77053 [00:00<?, ?it/s]

10489


0it [00:00, ?it/s]

  0%|          | 0/68301 [00:00<?, ?it/s]

8048


  0%|          | 0/4030 [00:00<?, ?it/s]

  0%|          | 0/79790 [00:00<?, ?it/s]

2887


  0%|          | 0/160 [00:00<?, ?it/s]

  0%|          | 0/72927 [00:00<?, ?it/s]

1550


  0%|          | 0/290 [00:00<?, ?it/s]

  0%|          | 0/74816 [00:00<?, ?it/s]

5200


  0%|          | 0/140 [00:00<?, ?it/s]

  0%|          | 0/71162 [00:00<?, ?it/s]

2738


  0%|          | 0/180 [00:00<?, ?it/s]

  0%|          | 0/76775 [00:00<?, ?it/s]

2139


  0%|          | 0/110 [00:00<?, ?it/s]

  0%|          | 0/78569 [00:00<?, ?it/s]

9529


  0%|          | 0/90 [00:00<?, ?it/s]

  0%|          | 0/76244 [00:00<?, ?it/s]

6390


  0%|          | 0/500 [00:00<?, ?it/s]

  0%|          | 0/66589 [00:00<?, ?it/s]

7734


  0%|          | 0/1290 [00:00<?, ?it/s]

  0%|          | 0/78031 [00:00<?, ?it/s]

6417


  0%|          | 0/80 [00:00<?, ?it/s]

  0%|          | 0/78105 [00:00<?, ?it/s]

6531


  0%|          | 0/190 [00:00<?, ?it/s]

  0%|          | 0/73879 [00:00<?, ?it/s]

9328


  0%|          | 0/1400 [00:00<?, ?it/s]

  0%|          | 0/79128 [00:00<?, ?it/s]

20084


  0%|          | 0/1440 [00:00<?, ?it/s]

  0%|          | 0/91381 [00:00<?, ?it/s]

14322


  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/90310 [00:00<?, ?it/s]

14883


  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/73847 [00:00<?, ?it/s]

7716


  0%|          | 0/440 [00:00<?, ?it/s]

  0%|          | 0/81132 [00:00<?, ?it/s]

1645


0it [00:00, ?it/s]

  0%|          | 0/82881 [00:00<?, ?it/s]

17670


  0%|          | 0/480 [00:00<?, ?it/s]

  0%|          | 0/73682 [00:00<?, ?it/s]

3750


  0%|          | 0/320 [00:00<?, ?it/s]

  0%|          | 0/83021 [00:00<?, ?it/s]

16939


  0%|          | 0/60 [00:00<?, ?it/s]

  0%|          | 0/84069 [00:00<?, ?it/s]

28264


  0%|          | 0/370 [00:00<?, ?it/s]

  0%|          | 0/73387 [00:00<?, ?it/s]

11587


  0%|          | 0/290 [00:00<?, ?it/s]

In [50]:
model = models.Sequential()
model.add(
    layers.Conv2D(
        16, 
        (3, 6), 
        activation='relu', 
        input_shape=(20,6,1), 
        padding = 'same',
        data_format='channels_last'
    )
)

### This next layer wasn't in there before
model.add(layers.MaxPooling2D(pool_size = (2,1)))
model.add(layers.Conv2D(16, (3,6), activation='relu', padding = 'same'))
model.add(layers.MaxPooling2D(pool_size = (2,1)))
model.add(layers.Conv2D(16, (3,6), activation='relu', padding = 'same'))
model.add(layers.MaxPooling2D(pool_size = (2,1)))
model.add(layers.Flatten())
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(16, activation='relu'))
model.add(layers.Dense(1, activation = 'sigmoid'))

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy'],
)

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [51]:
history = model.fit(
    np.array(X_train).reshape(
        len(X_train), 
        X_train[0].shape[0], 
        6, 
        1), 
    np.array(y_train),
    epochs=30
)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [52]:
model.save('../models/2024_12_01_model_01')