In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

## Config
dataset = "training_AB"
path = "../" + dataset +"/"
kfold_split = 10
nan_to_neg = True
biased_regress = True
normal_equations = True
mm = False
std = False
numpy_load = True
nanfill = False

## ESN parameters
N_def = 100           # Neurons
scale_def = 0.50      # scaling
mem_def = 0.50        # memory
exponent_def = 1.0    # sigmoid exponent

# Script name struct for report
script_name = 'ESNtrainCV'
name_struct_meta = "_N_scale_mem"
name_struct = '_{:03d}_{:1.3f}_{:1.3f}'.format(N_def, scale_def, mem_def)

## Imports
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GroupKFold
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedKFold
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from sklearn.metrics import f1_score
#import matplotlib.pyplot as plt
import ESNtools

# Fix boundary nans (replicate head/tail vals)
def nan_bounds(feats):
    nanidx = np.where(np.isnan(feats))[0]
    pointer_left = 0
    pointer_right = len(feats)-1
    fix_left = pointer_left in nanidx
    fix_right = pointer_right in nanidx
    while fix_left:
        if pointer_left in nanidx:
            pointer_left += 1
            # print("pointer_left:", pointer_left)
        else:
            val_left = feats[pointer_left]
            feats[:pointer_left] = val_left*np.ones((1,pointer_left),dtype=np.float)
            fix_left = False

    while fix_right:
        if pointer_right in nanidx:
            pointer_right -= 1
            # print("pointer_right:", pointer_right)
        else:
            val_right = feats[pointer_right]
            feats[pointer_right+1:] = val_right*np.ones((1,len(feats)-pointer_right-1),dtype=np.float)
            fix_right = False 
        
# nan interpolation
def nan_interpolate(feats):
    nanidx = np.where(np.isnan(feats))[0]
    nan_remain = len(nanidx)
    nanid = 0
    while nan_remain > 0:
        nanpos = nanidx[nanid] 
        nanval = feats[nanpos-1]
        nan_remain -= 1

        nandim = 1
        initpos = nanpos

        # Check whether it extends
        while nanpos+1 in nanidx:
            nanpos += 1
            nanid += 1
            nan_remain -= 1
            nandim += 1
            # Average sides
            if np.isfinite(feats[nanpos+1]):
                nanval = 0.5 * (nanval + feats[nanpos+1])

        # Single value average    
        if nandim == 1:
            nanval = 0.5 * (nanval + feats[nanpos+1])
        feats[initpos:initpos+nandim] = nanval*np.ones((1,nandim),dtype=np.double)
        nanpos += 1
        nanid += 1    

## Read data functions
def read_challenge_data(input_file, return_header = False):
    with open(input_file, 'r') as f:
        header = f.readline().strip()
        column_names = header.split('|')
        data = np.loadtxt(f, delimiter='|')

    # ignore SepsisLabel column if present
    if column_names[-1] == 'SepsisLabel':
        column_names = column_names[:-1]
        data = data[:, :-1]
    return (data)

def read_challenge_data_label(input_file, return_header = False):
    with open(input_file, 'r') as f:
        header = f.readline().strip()
        column_names = header.split('|')
        data = np.loadtxt(f, delimiter='|')

    # ignore SepsisLabel column if present
    if column_names[-1] == 'SepsisLabel':
        sep_lab = data[:,-1] 
        column_names = column_names[:-1]
        data = data[:, :-1]
    if return_header:
        return (data, sep_lab, column_names)

    else:
        return (data, sep_lab)

## Get sepsis patients
def get_sepsis_patients(sepsis_label, patient):
    patient_sep = np.zeros(len(sepsis_label),dtype=np.int)
    for i in range(n):
        i_pat = np.where(patient==i)[0]
        patient_sep[i_pat] = int(np.sum(sepsis_label[i_pat])>0)*np.ones(len(i_pat), dtype=np.int)
        
    patient_sep_idx = np.where(patient_sep!=0)[0]
    patient_healthy_idx = np.where(patient_sep==0)[0]
    return patient_sep, patient_sep_idx, patient_healthy_idx


## Random seed
np.random.seed(seed=0)

## Create the feature matrix
features = []
patient = []
sepsis_label = []
dataloaded = False


## Read data 
if not numpy_load:
    ## Folder and files
    fnames = os.listdir(path)  
    fnames.sort()
    if 'README.md' in fnames:
        fnames.remove('README.md')
    print('last file: ', fnames[-1])
    
    n = len(fnames)
    print(n, ' files present')
    
    ## read data
    for i in range(n):
        input_file = os.path.join(path, fnames[i])
        if i ==0:
            data, sep_lab, columns = read_challenge_data_label(input_file, return_header=True)
        else: 
            data, sep_lab = read_challenge_data_label(input_file)
        features.append(data)
        sepsis_label.append(sep_lab)
        pat = i * np.ones((sep_lab.shape), dtype=np.int)
        patient.append(pat)

    feature_matrix = np.concatenate(features)
    del(features)
    sepsis_label = np.concatenate(sepsis_label)
    patient = np.concatenate(patient)
    dataloaded = True
    
else:

    
    npyfilename = "../npy/" + dataset + "_patient.npy"
    patient = np.load(npyfilename)
    print(npyfilename, " loaded")
    npyfilename = "../npy/" + dataset + "_Y.npy"
    sepsis_label = np.load(npyfilename)
    print(npyfilename, " loaded")

#ADD nanfill tag
    if nanfill:
        dataset = dataset + "_nanfill"    
    
    if mm:
        npyfilename = "../npy/" + dataset + "_mm.npy"
        mm = False
        print(npyfilename, '(mm) to be loaded')

    else:
        npyfilename = "../npy/" + dataset + ".npy"
        print(npyfilename, '(not mm) to be loaded')

    n = len(np.unique(patient))
    print(n, ' files present')
    
    dataloaded = True
    feature_matrix = np.load(npyfilename)

##Flatten patient
patient = patient.flatten()

## Separate pointers
feature_phys = feature_matrix[:,:-6]    ## Physiology
feature_demog = feature_matrix[:,-6:]   ## Demographics



In [None]:
np.shape(feature_matrix)

In [None]:
patient_sep, patient_sep_idx, patient_healthy_idx = get_sepsis_patients(sepsis_label, patient)

In [None]:
len(patient_sep)

In [None]:
len(feature_matrix)

In [None]:
len(patient_sep_idx)

In [None]:
np.where(patient_sep!=0)[0]

In [None]:
np.sum(sepsis_label[patient_healthy_idx])

In [None]:
len(patient_sep_idx)

In [None]:
len(patient_healthy_idx)

In [None]:
len(patient_sep_idx)+len(patient_healthy_idx)

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline


In [None]:
plt.plot(sepsis_label[patient_healthy_idx])

In [None]:
plt.plot(sepsis_label[patient_sep_idx])

In [None]:
patient[:6]

In [None]:
## Get sepsis patients
def get_sepsis_patients(sepsis_label, patient):
    patient_sep = np.zeros(len(sepsis_label),dtype=np.int)
    for i in range(n):
        i_pat = np.where(patient==i)[0]
        patient_sep[i_pat] = int(np.sum(sepsis_label[i_pat])>0)*np.ones(len(i_pat), dtype=np.int)
        
    patient_sep_idx = np.where(patient_sep!=0)[0]
    patient_healthy_idx = np.where(patient_sep==0)[0]
    return patient_sep, patient_sep_idx, patient_healthy_idx



In [None]:
patient[np.where(patient_sep==0)[0]]

In [None]:
np.where(patient_sep==0)[0]

In [None]:
plt.plot(sepsis_label[np.where(patient_sep!=0)[0]])

In [None]:
healthy_patient_list =  np.unique(patient[patient_healthy_idx])

In [None]:
sep_patient_list =  np.unique(patient[patient_sep_idx])

In [None]:
sep_patient_list

In [None]:
i_pat = np.where(patient==healthy_patient_list[0])[0]
i_pat

In [None]:

(np.nanmean(feature_matrix[i_pat,:], axis=0))

In [None]:
np.hstack((np.nanmean(feature_phys[i_pat,:], axis = 0), feature_demog[i_pat[-1],:]))

In [None]:
#Healthy mean of mean
H = []
for i in range(len (healthy_patient_list)):
    i_pat = np.where(patient==i)[0]
    H.append(np.hstack((np.nanmean(feature_phys[i_pat,:], axis = 0), feature_demog[i_pat[-1],:])))

In [None]:
np.nanmean(H, axis=0).reshape(1,-1)

In [None]:
HM = np.nanmean(H, axis=0).reshape(1,-1)
HM_phys = HM[:,:-6]
HM_demog = HM[:,-6:] 

In [None]:
np.repeat(HM[:,:34], 3, axis = 0)

In [None]:
HM

In [None]:
# np.savetxt(dataset + '_healthytemp.txt', HM, delimiter=', ', fmt='%1.17f')

## All-NaN slices

In [None]:
# Compute and save the template
patient_sep, patient_sep_idx, patient_healthy_idx = get_sepsis_patients(sepsis_label, patient)
healthy_patient_list =  np.unique(patient[patient_healthy_idx])
sep_patient_list =  np.unique(patient[patient_sep_idx])

In [None]:
htemplist = []
# for i in range(len (healthy_patient_list)):
# for i in range(10000):
for i in [286]:
    print (i)
    i_pat = np.where(patient==i)[0]
    htemplist.append(np.hstack((np.nanmean(feature_phys[i_pat,:], axis = 0), feature_demog[i_pat[-1],:])))
htemp = np.nanmean(htemplist, axis=0).reshape(1,-1)
htemp_phys = htemp[:,:-6]
htemp_demog = htemp[:,-6:] 

# stemplist = []
# for i in range(len (sep_patient_list)):
#     i_pat = np.where(patient==i)[0]
#     stemplist.append(np.hstack((np.nanmean(feature_phys[i_pat,:], axis = 0), feature_demog[i_pat[-1],:])))
# stemp = np.nanmean(stemplist, axis=0).reshape(1,-1)
# stemp_phys = stemp[:,:-6]
# stemp_demog = stemp[:,-6:] 


In [None]:
i_pat = np.where(patient==i)[0]
i_pat

In [None]:
np.nanmean(feature_phys[i_pat,:], axis = 0)

## Get the templates

In [None]:
## Get sepsis patients
def get_sepsis_patients(sepsis_label, patient):
    patient_sep = np.zeros(len(sepsis_label),dtype=np.int)
    for i in range(n):
        i_pat = np.where(patient==i)[0]
        patient_sep[i_pat] = int(np.sum(sepsis_label[i_pat])>0)*np.ones(len(i_pat), dtype=np.int)
        
    patient_sep_idx = np.where(patient_sep!=0)[0]
    patient_healthy_idx = np.where(patient_sep==0)[0]
    return patient_sep, patient_sep_idx, patient_healthy_idx

In [None]:
# Compute and save the template
patient_sep, patient_sep_idx, patient_healthy_idx = get_sepsis_patients(sepsis_label, patient)
healthy_patient_list =  np.unique(patient[patient_healthy_idx])
sep_patient_list =  np.unique(patient[patient_sep_idx])

In [None]:
htemplist = []
for i in range(len (healthy_patient_list)):
    i_pat = np.where(patient==i)[0]
    htemplist.append(np.hstack((np.nanmean(feature_phys[i_pat,:], axis = 0), feature_demog[i_pat[-1],:])))
htemp = np.nanmean(htemplist, axis=0).reshape(1,-1)
htemp_phys = htemp[:,:-6]
htemp_demog = htemp[:,-6:] 

stemplist = []
for i in range(len (sep_patient_list)):
    i_pat = np.where(patient==i)[0]
    stemplist.append(np.hstack((np.nanmean(feature_phys[i_pat,:], axis = 0), feature_demog[i_pat[-1],:])))
stemp = np.nanmean(stemplist, axis=0).reshape(1,-1)
stemp_phys = stemp[:,:-6]
stemp_demog = stemp[:,-6:] 


In [None]:
# np.savetxt(dataset + '_healthytemp.txt', htemp, delimiter=', ', fmt='%1.17f')
# np.savetxt(dataset + '_septemp.txt', stemp, delimiter=', ', fmt='%1.17f')