In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tqdm.notebook import tqdm
from imblearn.over_sampling import RandomOverSampler

import tensorflow as tf
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score, confusion_matrix, roc_curve

from matplotlib.dates import DateFormatter
dateformat = DateFormatter(fmt = '%H:%M:%S:%f')

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
model = tf.keras.models.load_model("../models/2024_11_28_model")

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [3]:
def import_training_data(patient_num):
    
    # First, get the labels
    labels = pd.read_csv(f'../data/new_2024/formatted/labels_{patient_num}.csv')
    sgb = labels['labels'].tolist()
        
    # Then the raw data
    readings = pd.read_csv(f'../data/new_data/eval_data/{patient_num} RAW 00.csv')
    readings['Time'] = pd.to_datetime(readings['Time'])
    readings['CapturedTime'] = readings['Time'].dt.time
    
    # Then remove the meal times
    events = pd.read_csv(f'../data/new_data/eval_data/{patient_num} EVT 00.csv')
    events = events[events['EventName'] == 'Meal']
    events['Time'] = pd.to_datetime(events['Time'])
    events['Duration'] = pd.to_timedelta(events['Duration'])
    events = events.assign(End = events['Time'] + events['Duration'])
    for idx, row in events.iterrows():
        readings = readings[(readings['Time'] < row['Time']) | (readings['Time'] > row['End'])]
    
    return sgb, readings

In [224]:
def import_data(patient_num):

    # First, get the labels
    labels = pd.read_csv(f'../data/new_2024/centered/centered_{patient_num}.csv')

    #others = pd.read_csv(f'../data/centering/double_check/reviewed/dc_{patient_num}.csv')
    #others['Review'] = others['Review'].str.strip()
    #others = others[others['Review'] == 'Yes']

    #sgb_times = pd.concat([labels[['Time']], others[['Time']]])
    sgb_times = labels[['Time']]
    sgb_times['Time'] = pd.to_datetime(sgb_times['Time'])
    sgb_times['sgb'] = True

    # Then the raw data
    readings = pd.read_csv(f'../data/new_data/eval_data/{patient_num} RAW 00.csv')
    readings['Time'] = pd.to_datetime(readings['Time'])
    readings['CapturedTime'] = readings['Time'].dt.time

    # Combine them to label the events times
    readings = pd.merge(left = readings,
             right = sgb_times[['Time', 'sgb']],
            how = 'outer').fillna('False')
    
    # Then remove the meal times
    events = pd.read_csv(f'../data/new_data/eval_data/{patient_num} EVT 00.csv')
    events = events[events['EventName'] == 'Meal']
    events['Time'] = pd.to_datetime(events['Time'])
    events['Duration'] = pd.to_timedelta(events['Duration'])
    events = events.assign(End = events['Time'] + events['Duration'])
    for idx, row in events.iterrows():
        readings = readings[(readings['Time'] < row['Time']) | (readings['Time'] > row['End'])]
        
    # Normalize the readings to be between 0 and 1
    for channel in ['Channel_1', 'Channel_2', 'Channel_3',
           'Channel_4', 'Channel_5', 'Channel_6']:
        readings[channel] = readings[channel] / 10000    
    return readings

In [225]:
patient_num = '93'
readings = import_data(patient_num)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if sys.path[0] == '':


In [226]:
X_test = []
test_indices = []

for idx in tqdm(readings.index.tolist()):
    offset = 0
    duration = 20

    idx = idx - offset

    readings_sub = readings.loc[idx:idx + duration - 1][['Channel_' + str(j) for j in range(1,7)]].values
    if readings_sub.shape == (duration, 6) and np.abs(np.diff(readings_sub, axis = 0)).sum() > 1:
        X_test.append(readings_sub)
        test_indices.append(idx)

  0%|          | 0/772241 [00:00<?, ?it/s]

In [227]:
predictions = model.predict(
    np.array(X_test).reshape(
        len(X_test), 
        X_test[0].shape[0], 
        6, 
        1
    )
)

In [228]:
def plot_event(idx, duration = 50, shift = 15, filepath = None):
    readings_sub = (
        readings
        .loc[idx - shift:idx - shift + duration - 1]
        [['Channel_' + str(i) for i in range(1,7)]]
        .values
    ) * 10000

    fig, ax = plt.subplots(figsize = (12,6))

    times = readings.loc[idx - shift:idx - shift + duration - 1, 'Time']

    for i in range(6):
        plt.plot(times, readings_sub[:, i], label = 'Channel_' + str(i + 1))

    plt.ylim(-100, 10300)
        
    ax.xaxis.set_major_formatter(dateformat)

    plt.legend(bbox_to_anchor = (1, 0.5), loc = 'center left')
    
    if filepath:
        plt.tight_layout()
        plt.savefig(filepath, 
                    dpi = 150, 
                    transparent = False, 
                    facecolor = 'white')
        
        plt.close();

In [229]:
prediction_indices = [test_indices[x] for x in np.where(predictions.flatten() > 0.5)[0]]

true_indices = readings[readings['sgb'] == True].index.tolist()

true_positives = []
false_positives = []
for idx in tqdm(prediction_indices):
    fp = True
    for i in range(15):
        if idx - i in true_indices:
            true_positives.append(idx - i)
            fp = False
            continue
        if idx + i in true_indices:
            true_positives.append(idx + i)
            fp = False
            continue
    if fp:
        false_positives.append(idx)
        
true_positives = set(true_positives)

  0%|          | 0/98 [00:00<?, ?it/s]

In [230]:
len(true_positives)

1

In [231]:
len(true_indices)

1

In [232]:
len(prediction_indices)

98

In [233]:
len(false_positives)

92

In [234]:
false_positives = pd.DataFrame({'fp_index': false_positives})

false_positives['diff'] = false_positives['fp_index'].diff()

false_positives = false_positives[~(false_positives['diff'].isin([1, 2, 3, 4, 5]))]

In [235]:
len(false_positives)

32

In [236]:
for i, fp in enumerate(false_positives['fp_index'].to_list()):
    plot_event(fp, filepath=f'../eval_results/images_{patient_num}/fp/fp_{str(i).zfill(3)}.png')

In [237]:
fp_df = false_positives[['fp_index']]
fp_df['patient_num'] = patient_num
fp_df = fp_df.reset_index(drop = True)
fp_df = fp_df.reset_index()
fp_df['image_number'] = 'fp_' + fp_df['index'].astype('string').str.zfill(3)
fp_df['timestamp'] = readings.loc[fp_df['fp_index'].tolist(), 'Time'].tolist()
fp_df = fp_df[['patient_num', 'image_number', 'timestamp']]
fp_df.to_csv(f'../eval_results/images_{patient_num}/fp.csv', index = False)

In [238]:
false_negatives = [x for x in true_indices if x not in true_positives]

In [239]:
for i, fp in enumerate(false_negatives):
    plot_event(fp, filepath=f'../eval_results/images_{patient_num}/fn/fn_{str(i).zfill(3)}.png')

In [240]:
fn_df = pd.DataFrame({'fp_index': false_negatives})
fn_df['patient_num'] = patient_num
fn_df = fn_df.reset_index(drop = True)
fn_df = fn_df.reset_index()
fn_df['image_number'] = 'fn_' + fn_df['index'].astype('string').str.zfill(3)
fn_df['timestamp'] = readings.loc[fn_df['fp_index'].tolist(), 'Time'].tolist()
fn_df = fn_df[['patient_num', 'image_number', 'timestamp']]
fn_df.to_csv(f'../eval_results/images_{patient_num}/fn.csv', index = False)

In [241]:
# readings.loc[false_positives['fp_index'], ['Time']].to_csv(f'../data/new_data/images/images_{patient_num}/fp_{patient_num}.csv', index = False)
# readings.loc[false_negatives, ['Time']].to_csv(f'../data/new_data/images/images_{patient_num}/fn_{patient_num}.csv', index = False)

In [242]:
# readings.loc[false_negatives]