In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import random
import sys

sys.path.append('../tools')

import os

import torch

import pandas as pd
from sklearn.model_selection import train_test_split
import pytorch_lightning as pl
import pytorch_lightning.loggers as pl_loggers
import pytorch_lightning.callbacks as pl_callbacks
import data_utility, annotation_utility
from models.rns_dataloader import *
from active_learning_utility import get_strategy
from active_learning_data import Data
from active_learning_net import Net
import interrater_annotations.tools
from copy import deepcopy
from models.SwaV import SwaV
from models.LSTMDownStream import SupervisedDownstream
import warnings
import pickle
warnings.filterwarnings("ignore")


In [3]:
random_seed = 42
random.seed(random_seed)
torch.manual_seed(random_seed)
np.random.seed(random_seed)

if torch.cuda.is_available():
    torch.cuda.manual_seed(random_seed)
    # True ensures the algorithm selected by CUFA is deterministic
    torch.backends.cudnn.deterministic = True
    # torch.set_deterministic(True)
    # False ensures CUDA select the same algorithm each time the application is run
    torch.backends.cudnn.benchmark = False

import pytorch_lightning

pytorch_lightning.utilities.seed.seed_everything(seed=random_seed, workers=True)

Global seed set to 42


42

In [4]:
data_dir = "../../../user_data/"
log_folder_root = '../../../user_data/logs/'
ckpt_folder_root = '../../../user_data/checkpoints/'

In [5]:
raw_annotations = pd.read_csv('interrater_annotations/full_updated_anns (1).csv')
ids = list(raw_annotations['HUP_ID'].unique())
ids[:-1]

['HUP096',
 'HUP137',
 'HUP153',
 'HUP108',
 'HUP136',
 'HUP127',
 'HUP128',
 'HUP143',
 'HUP059',
 'HUP047',
 'HUP159',
 'HUP121',
 'HUP147',
 'HUP131',
 'HUP084',
 'RNS021',
 'HUP156',
 'HUP109',
 'HUP101',
 'RNS022',
 'HUP182',
 'HUP205',
 'RNS026',
 'HUP197']

In [6]:
from interrater_annotations.tools import data_utility
data_import = data_utility.read_files(path=data_dir + 'rns_data', patientIDs=ids,
                                      verbose=True)  # Import data with annotation

100%|██████████| 25/25 [00:51<00:00,  2.05s/it]


In [7]:
from interrater_annotations.tools import annotation_utility
annotations = annotation_utility.read_annotation(annotation_path = 'interrater_annotations/all_updated_anns.csv', annotation_catalog_path='interrater_annotations/test_datasets.csv', data = data_import)

42
42
42
43
43
43
41
41
41
41
41
41
41
41
41
41
41
41
41
41
41
48
48
48
41
41
41
42
42
42
41
41
41
41
41
41
42
42
42
41
41
41


In [8]:
annotations.annotation_dict['RNS_Test_Dataset_ErinConrad']

Unnamed: 0,Dataset,Annotation_Catalog_Index,Patient_ID,Alias_ID,Episode_Start_Timestamp,Episode_End_Timestamp,Episode_Start_UTC_Time,Episode_End_UTC_Time,Episode_Index,Episode_Start_Index,Episode_End_Index,Annotation_Start_Timestamp,Annotation_End_Timestamp,Annotation_Start_UTC_Time,Annotation_End_UTC_Time,Annotation_Start_Index,Annotation_End_Index,Type_Description,Class_Code,Annotation_Channel,Channel_Code,Binary_Channel_Code
59,RNS_Test_Dataset_ErinConrad,59,HUP096,RNS_Example_1_EC,1427742903476000,1427742993628000,2015-03-30 19:15:03.476,2015-03-30 19:16:33.628,10,250781,273318,[],[],[],[],[],[],No,0,[],[],[]
60,RNS_Test_Dataset_ErinConrad,60,HUP096,RNS_Example_1_EC,1442176442532000,1442176532616000,2015-09-13 20:34:02.532,2015-09-13 20:35:32.616,508,11181530,11204050,[1442176484602143],[1442176503681280],[2015-09-13 20:34:44.602143],[2015-09-13 20:35:03.681280],[11192047],[11196817],Yes,1,"[1,2,3,4]",[1111],[1111]
61,RNS_Test_Dataset_ErinConrad,61,HUP096,RNS_Example_1_EC,1446212334040000,1446212424128000,2015-10-30 13:38:54.040,2015-10-30 13:40:24.128,629,13870413,13892934,[1446212375851151],[1446212394966259],[2015-10-30 13:39:35.851151],[2015-10-30 13:39:54.966259],[13880865],[13885644],Yes,1,"[1,2,3,4]",[1111],[1111]
62,RNS_Test_Dataset_ErinConrad,62,HUP096,RNS_Example_1_EC,1449366435524000,1449366525608000,2015-12-06 01:47:15.524,2015-12-06 01:48:45.608,721,15909503,15932023,[1449366477065366],[1449366499097740],[2015-12-06 01:47:57.065366],[2015-12-06 01:48:19.097740],[15919888],[15925396],Yes,1,"[1,2,3,4]",[1111],[1111]
63,RNS_Test_Dataset_ErinConrad,63,HUP096,RNS_Example_1_EC,1449934755536000,1449934845628000,2015-12-12 15:39:15.536,2015-12-12 15:40:45.628,743,16390505,16413027,[1449934797422690],[1449934815379524],[2015-12-12 15:39:57.422690],[2015-12-12 15:40:15.379524],[16400976],[16405465],Yes,1,"[1,2,3,4]",[1111],[1111]
64,RNS_Test_Dataset_ErinConrad,64,HUP096,RNS_Example_1_EC,1455641459024000,1455641549116000,2016-02-16 16:50:59.024,2016-02-16 16:52:29.116,916,20226279,20248801,[1455641500632209],[1455641524046388],[2016-02-16 16:51:40.632209],[2016-02-16 16:52:04.046388],[20236681],[20242534],Yes,1,"[1,2,3,4]",[1111],[1111]
65,RNS_Test_Dataset_ErinConrad,65,HUP096,RNS_Example_1_EC,1455731984528000,1455732074628000,2016-02-17 17:59:44.528,2016-02-17 18:01:14.628,921,20338919,20361443,[1455732026202876],[1455732043242286],[2016-02-17 18:00:26.202876],[2016-02-17 18:00:43.242286],[20349337],[20353597],Yes,1,"[1,2,3,4]",[1111],[1111]
66,RNS_Test_Dataset_ErinConrad,66,HUP096,RNS_Example_1_EC,1461799060532000,1461799150616000,2016-04-27 23:17:40.532,2016-04-27 23:19:10.616,1105,24450741,24473261,[1461799101866975],[1461799126694561],[2016-04-27 23:18:21.866975],[2016-04-27 23:18:46.694561],[24461074],[24467281],Yes,1,"[1,2,3,4]",[1111],[1111]
67,RNS_Test_Dataset_ErinConrad,67,HUP096,RNS_Example_1_EC,1467873560512000,1467873650608000,2016-07-07 06:39:20.512,2016-07-07 06:40:50.608,1290,28581909,28604432,[],[],[],[],[],[],No,0,[],[],[]
68,RNS_Test_Dataset_ErinConrad,68,HUP096,RNS_Example_1_EC,1469814899464000,1469814989632000,2016-07-29 17:54:59.464,2016-07-29 17:56:29.632,1350,29921013,29943554,[],[],[],[],[],[],No,0,[],[],[]


In [9]:
data_list = os.listdir(data_dir+'rns_pred_cache')
# print(data_list)
# data_list = ['HUP047.npy', 'HUP084.npy', 'HUP096.npy', 'HUP109.npy', 'HUP121.npy', 'HUP129.npy', 'HUP131.npy',
#              'HUP137.npy', 'HUP147.npy', 'HUP156.npy', 'HUP159.npy', 'HUP182.npy', 'HUP197.npy', 'HUP199.npy',
#              'RNS026.npy', 'RNS029.npy']
# data_list = os.listdir(data_dir+'rns_test_cache')[1:]
data_list = ['HUP096.npy', 'HUP137.npy','HUP101.npy']
# data_list = ['HUP182.npy',   'HUP129.npy',   'HUP109.npy', 'HUP156.npy', 'HUP096.npy', 'RNS026.npy',  'HUP159.npy']
# data_list = ['RNS026.npy', 'HUP159.npy', 'HUP129.npy', 'HUP096.npy', 'HUP182.npy']
train_data, train_label, test_data, test_label, train_index, test_index = get_data_by_episode(data_list, file_path = 'rns_pred_cache', split=1)
# train_data, train_label, test_data, test_label, train_index, test_index = get_data_by_episode(data_list, split=0.8)
# data, label,_,_ = get_data(data_list, split=1)
# train_data, test_data, train_label, test_label = sklearn.model_selection.train_test_split(data, label, test_size=0.8, random_state=42)

print(train_data.shape)
print(train_label.shape)
print(test_data.shape)
print(test_label.shape)

3it [00:00, 14.56it/s]

(39,)
(39,)
(0,)
(0,)





In [10]:
with open(log_folder_root + 'rns_active_selected/' + 'EntropySampling' + '/' + 'selected_indices.pkl', 'rb') as f:
    # Load the content of the file into a Python object
    selected_inds = pickle.load(f)

In [11]:
selected_ind_list = []
for items in selected_inds.items():
    selected_ind_list.append(np.array(items[1]))

In [12]:
selected_ind_list

np.concatenate(selected_ind_list[:-1])

array([ 5164,  5165,  5166, ..., 83200, 83201, 83202], dtype=int64)

In [15]:
np.concatenate(train_index)[np.concatenate(selected_ind_list[:-1])][np.concatenate(train_index)[np.concatenate(selected_ind_list[:-1])]['patient_index'] == b'HUP131']

IndexError: index 5164 is out of bounds for axis 0 with size 3411

In [14]:
np.concatenate(train_index)[np.concatenate(selected_ind_list[:-1])]

IndexError: index 5164 is out of bounds for axis 0 with size 3411

In [None]:
train_list = np.array([ti[0] for ti in train_index])

In [None]:
matching_array = [None]*38
i = 0
for index, row in annotations.annotation_dict['RNS_Test_Dataset_BrianLitt'].iterrows():

    pt = row['Patient_ID'].encode('utf-8')
    si = row['Episode_Start_Index']
    filtered_1 = train_list[train_list['patient_index'] == pt]
    filtered_2 = filtered_1[filtered_1['start_index'] == si]
    matching_array[i] = np.where(train_list == filtered_2)[0]
    i+= 1




In [None]:
annot = annotations.annotation_dict['RNS_Test_Dataset_ErinConrad']
annot_nonseizure = annot[annot['Class_Code'] == 0]
annot_seizure = annot[annot['Class_Code'] == 1]
patient_list = ['HUP047',
       'HUP059',
       'HUP084',
       'HUP096',
       'HUP101',
       'HUP108',
       'HUP109',
       'HUP121',
       'HUP127',
       'HUP128',
       'HUP129',
       'HUP131',
       'HUP136',
       'HUP137',
       'HUP143',
       'HUP147',
       'HUP153',
       'HUP156',
       'HUP159',
       'HUP182',
       'HUP192',
       'HUP197',
       'HUP199',
       'HUP205',
       'RNS021',
       'RNS022',
       'RNS026',
       'RNS029']

# patient_list = [ 'HUP137',
#        'HUP153',]

# patient_list = ['RNS026', 'HUP159', 'HUP129', 'HUP096', 'HUP182']
clip_dict = {}
# for p in patient_list:
for p in patient_list:
    # print(p)
    seizure_start_index = np.array([])
    seizure_end_index = np.array([])
    nonseizure_start_index = np.array([])
    nonseizure_end_index = np.array([])
    global_episode_index_seizure = np.array([])
    global_episode_index_nonseizure = np.array([])

    annotation_list = []

    start_index = annot[annot['Patient_ID'] == p]['Episode_Start_Index']
    end_index = annot[annot['Patient_ID'] == p]['Episode_End_Index']
    annot_start_list = annot[annot['Patient_ID'] == p]['Annotation_Start_Index']
    annot_end_list = annot[annot['Patient_ID'] == p]['Annotation_End_Index']
    j = 0
    for i in range(len(start_index)):
        if end_index.iloc[i] - start_index.iloc[i] > 0:
            initial_arr = np.zeros(end_index.iloc[i] - start_index.iloc[i])
            if len(annot_start_list.iloc[i]) > 0:
                sl_order = np.argsort(annot_start_list.iloc[i])
                sl = np.array(annot_start_list.iloc[i])[sl_order]
                el = np.array(annot_end_list.iloc[i])[sl_order]

                for si, ei in zip(sl, el):
                    initial_arr[si - start_index.iloc[i]:ei - start_index.iloc[i]] = 1
        else:
            # print(i)
            initial_arr = np.zeros(1)

        annotation_list.append(initial_arr)

    ind_arr = np.vstack(
        [start_index,
         end_index,
         start_index.index]).astype(int)

    # print(annotation_list)

    valid = np.where((ind_arr[1] - ind_arr[0]) > 500)
    combined_clip = ind_arr[:, valid].squeeze()
    annotation_list = np.array(annotation_list, dtype=object)[valid]
    try:
        combined_clip = np.vstack((combined_clip, annotation_list))
    except:
        print(annotation_list)
        print(p)

    if combined_clip.shape[1]>0:

        clip_dict[p] = combined_clip

In [None]:
from scipy.stats import mode

window_len = 1
stride = 1
concat_n = 4
for id in clip_dict.keys():
    data_import[id].set_window_parameter(window_length=window_len, window_displacement=stride)
    data_import[id].set_concatenation_parameter(concatenate_window_n=concat_n)
    window_indices, _ = data_import[id].get_windowed_data(clip_dict[id][0], clip_dict[id][1])
    import_indices = []
    import_label = []
    import_clip_indices = []
    import_start_indicies = []
    import_patient_ID = []
    for i, ind in enumerate(window_indices):
        indices = window_indices[i]+1-clip_dict[id][0][i]
        offsets = np.arange(249)
        full_indices = indices[:,0][:, np.newaxis] + offsets
        slices_no_loop = clip_dict[id][3][i][full_indices]
        mode_result = mode(slices_no_loop, axis=1)
        mode_values = mode_result.mode

        # print(mode_values)
        import_label.append(mode_values)
        import_indices.append(np.repeat(clip_dict[id][2][i], len(ind)))
        import_clip_indices.append(np.arange(len(ind)))
        import_start_indicies.append(np.repeat(clip_dict[id][0][i], len(ind)))
        import_patient_ID.append(np.repeat(id, len(ind)))

    import_label = np.hstack(import_label)
    import_indices = np.hstack(import_indices)
    import_clip_indices = np.hstack(import_clip_indices)
    import_start_indicies = np.hstack(import_start_indicies)
    import_patient_ID = np.hstack(import_patient_ID)

    data_import[id].normalize_windowed_data()
    _, concatenated_data = data_import[id].get_concatenated_data(data_import[id].windowed_data, arrange='channel_stack')

    assert np.hstack(import_label).shape[0] == concatenated_data.shape[0]

    np.save(data_dir+'rns_pred_cache/' + id + '.npy', {'data': concatenated_data, 'label': import_label, 'patientID': import_patient_ID, 'indices': np.vstack([import_indices,import_clip_indices,import_start_indicies]).T})

In [None]:
# selected_episode = np.array(matching_array[:25])

In [None]:
X_train = np.concatenate(train_data[selected_episode])
y_train = np.concatenate(train_label[selected_episode])
# X_test = np.concatenate(test_data)
# y_test = np.concatenate(test_label)
index_train = np.concatenate(train_index[selected_episode])
# index_test = np.concatenate(test_index)
seq_len_train = np.array([y.shape[0] for y in train_label[selected_episode]])
# seq_len_test = np.array([y.shape[0] for y in test_label])

In [None]:
X_train = np.concatenate(train_data)
y_train = np.concatenate(train_label)
# X_test = np.concatenate(test_data)
# y_test = np.concatenate(test_label)
index_train = np.concatenate(train_index)
# index_test = np.concatenate(test_index)
seq_len_train = np.array([y.shape[0] for y in train_label])

In [None]:
args_task = {'n_epoch': 40,
             'transform_train': True,
             'strategy_name': 'EntropySampling',
             'transform': False,
             'loader_tr_args': {'batch_size': 2, 'num_workers': 4, 'collate_fn': collate_fn,
                                'drop_last': True, 'persistent_workers': True},
             'loader_te_args': {'batch_size': 2, 'num_workers': 4, 'collate_fn': collate_fn,
                                'drop_last': True, 'persistent_workers': True}
             }

In [None]:
swav = SwaV().load_from_checkpoint(
    ckpt_folder_root + 'rns_swav_50_12/rns_swav-epoch=82-swav_loss=2.58204.ckpt')
model = SupervisedDownstream(swav.backbone)
# initialize model and save the model state
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
trainer = pl.Trainer( accelerator='gpu', devices=1,precision=16)
from models.rns_dataloader import RNS_Active_by_episode_LSTM, collate_fn
train_dataset = RNS_Active_by_episode_LSTM(train_data, train_label, transform=False, astensor=True)

train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=4,
    collate_fn=collate_fn,
    shuffle=False,
    drop_last=True,
)

In [None]:
strategy_name = 'RandomSampling'
with open(log_folder_root + 'rns_active_selected/' + strategy_name + '/' + 'selected_indices.pkl', 'rb') as f:
    # Load the content of the file into a Python object
        selected_inds = pickle.load(f)

selected_inds[1]

In [None]:
for rd in range(1, NUM_ROUND + 1):
    print('round ' + str(rd))
    log_file_name = log_folder_root + 'rns_active/active_logs_' + strategy_name + '/logger_round_' + str(
        rd - 1) + '/version_0/metrics.csv'
    logs = pd.read_csv(log_file_name)
    max_ind = logs['val_acc'].argmax()
    max_row = logs.iloc[max_ind]
    ckpt_directory = ckpt_folder_root + 'rns_active/active_checkpoints_' + strategy_name
    ckpt_files = os.listdir(ckpt_directory)
    load_file_name = strategy_name + '_round_' + str(rd - 1) + '-step=' + str(int(max_row['step']+1))
    print(load_file_name)

    ind = next((i for i, s in enumerate(ckpt_files) if load_file_name in s), None)
    print(ind, ckpt_files[ind])
    strategy.net.net.load_from_checkpoint(ckpt_directory + '/' + ckpt_files[ind], backbone=swav.backbone)

    q_idxs = strategy.query(NUM_QUERY * 90)

    with open(log_folder_root + 'rns_active_selected/' + strategy_name + '/' + 'selected_indices.pkl', 'rb') as f:
    # Load the content of the file into a Python object
        selected_inds = pickle.load(f)
    selected_inds[rd] = q_idxs
    with open(log_folder_root + 'rns_active_selected/' + strategy_name + '/' + 'selected_indices.pkl', 'wb') as f:
        pickle.dump(selected_inds, f)
# Now you can use the dictionary object as usual
    strategy.update(q_idxs)
    strategy.net.round = rd
    strategy.net.net.load_state_dict(modelstate)
    torch.cuda.empty_cache()
    strategy.train()
    torch.cuda.empty_cache()

In [None]:
import os
from pathlib import Path


strategy_name = 'MarginSamplingDropout'
ckpt_directory = ckpt_folder_root + 'rns_active/active_checkpoints_' + strategy_name
# ckpt_files = os.listdir(ckpt_directory)
ckpt_files = sorted(Path(ckpt_directory).iterdir(), key=os.path.getmtime)[50:]
high_f1 = 0
high_class_f1 = 0
file_name = []
file_name_class_f1 = []
for cf in tqdm(ckpt_files):
    model = model.load_from_checkpoint(ckpt_folder_root + 'rns_active/active_checkpoints_' + strategy_name + '/' + cf.name, backbone=swav.backbone)
    predictions = trainer.predict(model,train_dataloader)
    output_list = []
    target_list = []
    emb_list = []
    m = nn.Softmax(dim=1)
    seq_len_list = []
    for pred, y, emb, emb2, seq_len in predictions:
        output_list.append(pred)
        target_list.append(y)
        emb_list.append(emb)
        seq_len_list.append(seq_len)
    pred_raw = torch.vstack(output_list)
    target = torch.concat(target_list)
    emb = torch.concat(emb_list)
    out = torch.argmax(pred_raw, dim=1)
    seq_len_arr = torch.tensor([item for sublist in seq_len_list for item in sublist])
    pred_episode = combine_window_to_episode(torch.argmax(pred_raw, dim=1),seq_len_arr)
    class_f1 = sklearn.metrics.f1_score([np.sign(tl.sum()) for tl in pred_episode], [np.sign(tl.sum()) for tl in train_label])
    f1 = sklearn.metrics.f1_score(torch.argmax(pred_raw, dim=1), target)
    if f1>high_f1:
        high_f1 = f1
        file_name.append(cf)
        print('high_f1', f1,cf)
    if class_f1>=high_class_f1:
        high_class_f1 = class_f1
        file_name_class_f1.append((class_f1, cf))
        print('high_class_f1', class_f1,cf)


In [None]:
 file_name_class_f1

In [None]:
file_name

In [None]:
model = model.load_from_checkpoint(ckpt_folder_root + 'rns_active/active_checkpoints_' + strategy_name + '/' + 'MarginSamplingDropout_round_5-step=750-train_loss=0.05351.ckpt', backbone=swav.backbone)

In [None]:
predictions = trainer.predict(model,train_dataloader)

In [None]:
import torch.nn as nn
output_list = []
target_list = []
emb_list = []
m = nn.Softmax(dim=1)
seq_len_list = []
for pred, y, emb, emb2, seq_len in predictions:
    output_list.append(pred)
    target_list.append(y)
    emb_list.append(emb)
    seq_len_list.append(seq_len)

In [None]:
pred_raw = torch.vstack(output_list)
target = torch.concat(target_list)
emb = torch.concat(emb_list)
out = torch.argmax(pred_raw, dim=1)
seq_len_arr = torch.tensor([item for sublist in seq_len_list for item in sublist])

In [None]:
import sklearn
clf_report = sklearn.metrics.classification_report(torch.argmax(pred_raw, dim=1), target, digits=6)

print(f"Classification Report : \n{clf_report}")

In [None]:
pred_episode = combine_window_to_episode(torch.argmax(pred_raw, dim=1),seq_len_arr)

In [None]:

import matplotlib.pyplot as plt
plt.figure(figsize=(10,6))
plt.plot(target)
plt.plot(torch.argmax(pred_raw, dim=1))
plt.show()

In [None]:
i = 31
plt.figure()
plt.plot(train_data[i][:,:,4].flatten()+2,color = 'k')
plt.plot(train_data[i][:,:,13].flatten()+1,color = 'k')
plt.plot(train_data[i][:,:,22].flatten(),color = 'k')
plt.plot(train_data[i][:,:,31].flatten()-1,color = 'k')
label_start, label_end, pred_start, pred_end = plot_high_light(train_label[i],pred_episode[i])
if len(pred_start)>0:
    for i in range(len(pred_start)):
        plt.axvspan(pred_start[i]*249, pred_end[i]*249, color="blue", alpha=0.3)
if len(label_start)>0:
    for i in range(len(label_start)):
        plt.axvspan(label_start[i]*249, label_end[i]*249, color="yellow", alpha=0.3)
plt.show()

In [None]:
pred_start

In [None]:
def plot_high_light(train_label, pred_episode):
    label_start = np.where(np.diff(train_label) == 1)[0]
    label_end = np.where(np.diff(train_label) == -1)[0]
    pred_start = np.where(np.diff(pred_episode) == 1)[0]
    pred_end = np.where(np.diff(pred_episode) == -1)[0]
    label_start, label_end = check_consistent(label_start,label_end, len(train_label))
    pred_start, pred_end = check_consistent(pred_start,pred_end, len(pred_episode))
    # if len(label_start)>0:
    #     plt.axvspan(label_start[0]*249, label_end[0]*249, color="yellow", alpha=0.3)

    return label_start, label_end, pred_start, pred_end


def check_consistent(start, end, total_len):
    if len(start) != len(end):
        if len(start)>0:
            end = [total_len]
        elif len(end)>0:
            start = [0]
    return start, end


In [None]:
i = 31
plot_high_light(train_label[i],pred_episode[i])

In [None]:
train_data[i]

In [None]:
torch.tensor([item for sublist in seq_len_list for item in sublist])

In [None]:
def combine_window_to_episode(data, seq_len, index=None):
    cum_sum_index = np.cumsum(seq_len)
    cum_sum_index = np.insert(cum_sum_index, 0, 0)

    assert len(data) == cum_sum_index[-1]

    data_out = [None] * (len(cum_sum_index) - 1)

    for i in range(1, len(cum_sum_index)):
        start_index, end_index = cum_sum_index[i - 1], cum_sum_index[i]
        episode_data = data[start_index:end_index]

        if index is None:
            out = episode_data
        else:
            episode_labeled = index[start_index:end_index]
            out = episode_data[episode_labeled]

        if len(out) > 0:
            data_out[i - 1] = out

    data_out = [segment for segment in data_out if segment is not None]

    return np.array(data_out, dtype=object)

In [None]:
[np.sign(tl.sum()) for tl in train_label]

In [None]:
[np.sign(tl.sum()) for tl in pred_episode]

In [None]:
clf_report = sklearn.metrics.classification_report([np.sign(tl.sum()) for tl in pred_episode], [np.sign(tl.sum()) for tl in train_label], digits=6)

print(f"Classification Report : \n{clf_report}")

In [None]:
sklearn.metrics.f1_score([np.sign(tl.sum()) for tl in pred_episode], [np.sign(tl.sum()) for tl in train_label])

In [None]:
np.where(np.diff(train_label[i]) == -1)[0]

In [None]:
machine_annot = annotations.annotation_dict['RNS_Test_Dataset_ErinConrad'].copy()

In [None]:
def get_start_stop(pred_episode):
    pred_start = np.where(np.diff(pred_episode) == 1)[0]
    pred_end = np.where(np.diff(pred_episode) == -1)[0]
    pred_start, pred_end = check_consistent(pred_start,pred_end, len(pred_episode))
    pred_start *= 249
    pred_end *= 249
    return pred_start, pred_end


def check_consistent(start, end, total_len):
    if len(start) != len(end):
        if len(start)>0:
            end = np.array([total_len], dtype=np.int64)
        elif len(end)>0:
            start = np.array([0], dtype=np.int64)
    return start, end

In [None]:
from datetime import datetime
def interpolate_time(index, start_index, end_index, start_timestamp, end_timestamp):
    return int((index-start_index)/(end_index-start_index)*(end_timestamp-start_timestamp)+start_timestamp)

def timestamp_to_utctime(ts):
    """
    :param ts: int - datetime timestamp
    :return: string - utc time
    """
    return datetime.utcfromtimestamp(ts * 1e-6)


In [None]:
machine_annot = machine_annot.reset_index(drop=True)
machine_annot.Dataset = 'RNS_Test_Dataset_DeepLearning'
machine_annot.Alias_ID = 'RNS_Example_DL'
machine_annot = machine_annot.drop(['Type_Description', 'Annotation_Channel', 'Channel_Code', 'Binary_Channel_Code','Annotation_Catalog_Index'], axis=1)

In [None]:
machine_annot

In [None]:
for index, row in machine_annot.iterrows():
    start_ind, end_ind = get_start_stop(pred_episode[index])
    machine_annot.at[index, 'Annotation_Start_Index'] = (row.Episode_Start_Index + start_ind).tolist()
    machine_annot.at[index, 'Annotation_End_Index'] = (row.Episode_Start_Index + end_ind).tolist()
    machine_annot.at[index, 'Class_Code'] = np.sign(pred_episode[index].sum()).item()
    machine_annot.at[index, 'Annotation_Start_Timestamp'] = [
        interpolate_time(st_ind, row.Episode_Start_Index, row.Episode_End_Index, row.Episode_Start_Timestamp,
                         row.Episode_End_Timestamp) for st_ind in (row.Episode_Start_Index + start_ind).tolist()]
    machine_annot.at[index, 'Annotation_End_Timestamp'] = [
        interpolate_time(ed_ind, row.Episode_Start_Index, row.Episode_End_Index, row.Episode_Start_Timestamp,
                         row.Episode_End_Timestamp) for ed_ind in (row.Episode_Start_Index + end_ind).tolist()]
    machine_annot.at[index, 'Annotation_Start_UTC_Time'] = [
        timestamp_to_utctime(interpolate_time(st_ind, row.Episode_Start_Index, row.Episode_End_Index, row.Episode_Start_Timestamp,
                         row.Episode_End_Timestamp)) for st_ind in (row.Episode_Start_Index + start_ind).tolist()]
    machine_annot.at[index, 'Annotation_End_UTC_Time'] = [
        timestamp_to_utctime(interpolate_time(ed_ind, row.Episode_Start_Index, row.Episode_End_Index, row.Episode_Start_Timestamp,
                         row.Episode_End_Timestamp)) for ed_ind in (row.Episode_Start_Index + end_ind).tolist()]


In [None]:
1442176484602143-interpolate_time(11192047, 11181530, 11204050 ,1442176442532000, 1442176532616000)

In [None]:
annotations.annotation_dict['RNS_Test_Dataset_ErinConrad']

In [None]:
ind-start_ind/end_ind - start_ind = ind_ts-st_ts/endst-st_ts

In [None]:
machine_annot.to_csv('machineprediction.csv', index=True)