In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

## Config
dataset = "training"
path = "../" + dataset +"/"
nan_to_neg = True
biased_regress = True
normal_equations = True
mm = True
std = False
numpy_load = False

## ESN parameters
N_def = 100         # Neurons
scale_def = 0.001   # scaling
mem_def = 0.200     # memory
exponent_def = 1    # sigmoid exponent

# Script name struct for report
script_name = 'ESNtrain'
dl_ = '_'
name_struct_meta = "_N_scale_mem"
name_struct = '_{:03d}_{:1.3f}_{:1.3f}'.format(N_def, scale_def, mem_def)


In [2]:
## Imports
import numpy as np
import os
from sklearn.preprocessing import MinMaxScaler
import ESNtools
import scipy.linalg as linalg

In [3]:
## Random seed
np.random.seed(seed=0)

## Create the feature matrix
features = []
patient = []
sepsis_label = []
dataloaded = False




In [4]:
## Read data 
if not numpy_load:
    ## Folder and files
    fnames = os.listdir(path)  
    fnames.sort()
    if 'README.md' in fnames:
        fnames.remove('README.md')
                              ## !!!! test version
    fnames = fnames[:100]     ## !!!! test version
    print('last file: ', fnames[-1])
    
    n = len(fnames)
    print(n, ' files present')
    
    ## read data
    for i in range(n):
        input_file = os.path.join(path, fnames[i])
        if i ==0:
            data, sep_lab, columns = ESNtools.read_challenge_data_label(input_file, return_header=True)
        else: 
            data, sep_lab = ESNtools.read_challenge_data_label(input_file)
        features.append(data)
        sepsis_label.append(sep_lab)
        pat = i * np.ones((sep_lab.shape), dtype=np.int)
        patient.append(pat)

    feature_matrix = np.concatenate(features)
    del(features)
    sepsis_label = np.concatenate(sepsis_label)
    patient = np.concatenate(patient)
    dataloaded = True
    
else:
    if mm:
        npyfilename = "../../npy/" + dataset + "_mm.npy"
        mm = False
        print(npyfilename, '(mm) to be loaded')

    else:
        npyfilename = "../../npy/" + dataset + ".npy"
        print(npyfilename, '(mm) to be loaded')

    
    feature_matrix = np.load(npyfilename)
    npyfilename = "../../npy/" + dataset + "_patient.npy"
    patient = np.load(npyfilename)
    npyfilename = "../../npy/" + dataset + "_Y.npy"
    sepsis_label = np.load(npyfilename)

    n = len(np.unique(patient))
    print(n, ' files present')
    
    dataloaded = True


last file:  p000100.psv
100  files present


In [5]:
## Separate pointers
feature_phys = feature_matrix[:,:-6]    ## Physiology
feature_demog = feature_matrix[:,-6:]   ## Demographics

## Get sepsis patients
patient_sep = np.zeros(len(sepsis_label),dtype=np.int)
for i in range(n):
    i_pat = np.where(patient==i)[0]
    patient_sep[i_pat] = int(np.sum(sepsis_label[i_pat])>0)*np.ones(len(i_pat), dtype=np.int)
    
patient_sep_idx = patient[np.where(patient_sep!=0)]
patient_healthy_idx = patient[np.where(patient_sep==0)]

In [6]:
## Normalize mm(all) or std (sepsis, phys) vals, feature-based
if mm:
    scaler = MinMaxScaler()
    for i in range(n):
        i_pat = np.where(patient==i)[0]
        scaler.fit(feature_matrix[i_pat,:])
        feature_matrix[i_pat,:] = scaler.transform(feature_matrix[i_pat,:])

elif std:
    scaler = StandardScaler()
    scaler.fit(feature_phys[patient_healthy_idx,:])
    feature_phys[:,:] = scaler.transform(feature_phys[:,:])


  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = np.nanmax(X, axis=0)
  data_min = np.nanmin(X, axis=0)
  data_max = n

In [7]:
## nan to negative
if nan_to_neg:
    feature_matrix[np.isnan(feature_matrix)]=-1
    print("Changed nan to -1")
    

Changed nan to -1


In [8]:
## ESN Generation
N = N_def           # Neurons
mem = mem_def       # memory
scale = scale_def   # scaling factor


In [9]:
## Nonlinear mapping function
sigmoid_exponent = exponent_def
func = ESNtools.sigmoid


In [10]:
## Mask parameters
M = 2*np.random.rand(np.shape(feature_matrix)[1],N)-1
Mb = 2*np.random.rand(1,N)-1


In [11]:
## Perform ESN feed
ESN = ESNtools.feedESN(feature_matrix, N, M, Mb, scale, mem, func, sigmoid_exponent)
del feature_matrix

(3704, 101)
-11.699789993394877 11.468743713776908


In [12]:
ESNtools.feedESN

<function ESNtools.feedESN(features, neurons, mask, mask_bias, scale, mem, func, f_arg)>

In [13]:
# help(ESNtools.feedESN)

In [14]:
Y = sepsis_label

In [15]:
# Q, R = linalg.qr((np.matmul(np.transpose(ESN),ESN))) # QR decomposition with qr function (RtR)w = RtY
# y = np.dot(Q.T, np.matmul(np.transpose(ESN),Y)) # Let y=Q'.B using matrix multiplication
# x = linalg.solve(R, y) # Solve Rx=y

In [16]:
w0 = ESNtools.get_weights_biasedNE(ESN, Y)
w1 = ESNtools.get_weights_qr_biasedNE(ESN, Y)
w2 = ESNtools.get_weights_lu_biasedNE(ESN, Y)

In [30]:
# def get_weights_biasedNE(ESN, target):
#     """Computes ESN training weights solving (pinv) the NE linear system w/ bias.
#     Parameters
#     ----------
#     ESN : (np.array) Echo State Network state
    
#     target : (np.array) target labels to train with
    
#     """
#     Y_aux = np.matmul(np.transpose(ESN),target)
#     ESNinv = np.linalg.pinv(np.matmul(np.transpose(ESN),ESN))
#     w = np.matmul(ESNinv, Y_aux)
#     return w


def wrapper(func, *args, **kwargs):
    def wrapped():
        return func(*args, **kwargs)
    return wrapped

# wrapped = wrapper(ESNtools.get_weights_biasedNE, ESN, Y)
# wrapped = wrapper(ESNtools.get_weights_qr_biasedNE, ESN, Y)
wrapped = wrapper(ESNtools.get_weights_lu_biasedNE, ESN, Y)

In [31]:
import timeit
# s = """\
# ... ESNtools.get_weights_biasedNE(ESN, Y)
# ... """
timeit.timeit(wrapped, number=1000)

0.7097452870002599

In [19]:
w1

array([ 2.69733974e+04, -1.99680653e+03, -1.48445525e+04, -5.01689912e+04,
       -1.89086170e+04, -8.56532115e+03, -1.42420273e+04, -1.04194127e+04,
        1.68006819e+04, -5.35373673e+02,  3.79553032e+04,  5.74552853e+03,
       -2.41657880e+04, -7.48602179e+03, -2.73265752e+04, -2.32031045e+04,
        2.28139668e+04, -4.21500951e+04, -8.06462538e+03,  3.19945787e+04,
       -1.09397943e+04, -8.84986481e+03,  2.45236478e+04,  2.91163518e+04,
        1.65159120e+03,  1.96626034e+04, -2.67813887e+04,  9.90189403e+03,
        2.70038703e+04, -1.76419239e+04, -7.13594595e+03,  1.55157832e+04,
        2.86676917e+04, -1.01125481e+04, -1.01092611e+04, -8.53234675e+03,
        3.82835730e+04,  1.65240743e+04, -3.68834180e+04,  2.15447490e+04,
        2.52399917e+04,  2.68368668e+04,  1.94217313e+04, -1.80289516e+04,
        9.36991987e+03,  1.50448905e+04,  1.46627360e+04, -3.72251564e+03,
        1.30071223e+04,  3.00321739e+04,  6.77810111e+04,  5.70632776e+03,
       -1.31864959e+04,  

In [20]:
w0

array([ 2.69067292e+04, -2.02250942e+03, -1.48315272e+04, -5.01382855e+04,
       -1.89163543e+04, -8.52326826e+03, -1.42351470e+04, -1.03902018e+04,
        1.67909282e+04, -5.15971291e+02,  3.79357783e+04,  5.71200598e+03,
       -2.41345140e+04, -7.49254166e+03, -2.73248201e+04, -2.31848132e+04,
        2.28123363e+04, -4.21360961e+04, -8.03498622e+03,  3.19395615e+04,
       -1.09438160e+04, -8.83794775e+03,  2.44983330e+04,  2.91061478e+04,
        1.67967253e+03,  1.96400785e+04, -2.67719042e+04,  9.91398992e+03,
        2.69682001e+04, -1.76428907e+04, -7.13269669e+03,  1.55354931e+04,
        2.86386528e+04, -1.01231060e+04, -1.01281362e+04, -8.53294171e+03,
        3.82316342e+04,  1.64724004e+04, -3.68357825e+04,  2.15018692e+04,
        2.52371651e+04,  2.68221739e+04,  1.94152872e+04, -1.80021378e+04,
        9.39853109e+03,  1.50722152e+04,  1.46605640e+04, -3.71290746e+03,
        1.30169749e+04,  3.00243320e+04,  6.76981159e+04,  5.67756052e+03,
       -1.31739040e+04,  

In [21]:
w2

array([ 2.69731682e+04, -1.99638225e+03, -1.48445203e+04, -5.01686788e+04,
       -1.89082282e+04, -8.56511079e+03, -1.42420434e+04, -1.04191684e+04,
        1.68004408e+04, -5.35329629e+02,  3.79551234e+04,  5.74550814e+03,
       -2.41656415e+04, -7.48593572e+03, -2.73265691e+04, -2.32028230e+04,
        2.28139603e+04, -4.21502302e+04, -8.06443408e+03,  3.19944384e+04,
       -1.09396722e+04, -8.84973760e+03,  2.45233456e+04,  2.91162457e+04,
        1.65136077e+03,  1.96624865e+04, -2.67813668e+04,  9.90186326e+03,
        2.70037302e+04, -1.76419919e+04, -7.13588215e+03,  1.55157743e+04,
        2.86674756e+04, -1.01125993e+04, -1.01091571e+04, -8.53228790e+03,
        3.82833326e+04,  1.65238452e+04, -3.68831512e+04,  2.15445034e+04,
        2.52398908e+04,  2.68368197e+04,  1.94217394e+04, -1.80287899e+04,
        9.36991024e+03,  1.50446478e+04,  1.46625981e+04, -3.72247435e+03,
        1.30070161e+04,  3.00317718e+04,  6.77804851e+04,  5.70617503e+03,
       -1.31863211e+04,  