In [1]:
import pandas as pd
import numpy as np
import math
from sklearn import svm
from sklearn import preprocessing
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score
from sklearn import linear_model
from sklearn.kernel_ridge import KernelRidge

##init X train         
X = pd.read_csv("train_features.csv")

##init y train
y = pd.read_csv("train_labels.csv")

##init X train
X_test = pd.read_csv("test_features.csv")

##rearange 12 samples to one, because it's a time series
def reara12(arr):
    old_arr = arr
    rows, cols = len(old_arr), len(old_arr[0])
    new_arr = np.arange(rows * cols, dtype=float).reshape((int)(rows / 12), cols * 12)
    
    i_new = 0
    for i_old in range (0, rows, 12):        
        new_row = old_arr[i_old]
        for i in range (1, 12):
            new_row = np.append(new_row, old_arr[i_old + i])                    
        new_arr[i_new] = new_row
        i_new += 1
        
    return new_arr

##sometimes overflows
def sigmoid(x):
    sig = (1 / (1 + np.exp(-x)))
    return sig

sigmoid_v = np.vectorize(sigmoid)

##returns indices for all the specified labels
def label_to_ind(df, label_list):
    index = []
    for i in label_list:
        index += [df.columns.get_loc(i)]
    return index

##random oversampling function, assumption more 0 than 1. X: numpy array, y: pandas dataframe with labels
def ros(X, y, label_name):
    y_ind0 = np.array(y.index[y[label_name] == 0].tolist())
    y_ind1 = np.array(y.index[y[label_name] == 1].tolist())

    copy_ind = np.random.choice(y_ind1, len(y_ind0) - len(y_ind1), replace=True)
    
    os_samples = X[copy_ind[0]]
    os_samples = os_samples.reshape(1,os_samples.shape[0])    
    for i in copy_ind[1:]:
        copy_sample = X[i]
        os_samples = np.concatenate((os_samples, copy_sample.reshape(1, copy_sample.shape[0])), axis=0)
    
    X_val_os = np.concatenate((X, os_samples), axis=0)
    y_val_os = np.concatenate((y[label_name].values, np.ones(len(os_samples), dtype = float)), axis=0)
    return X_val_os, y_val_os

def test_roc_auc(model, X, y, label_target, label_list, imputer, scaler):
    X_test = imp.transform(X)
    X_test = scaler.transform(X_test)
    X_test = X_test[:, label_to_ind(X, label_list)]
    X_test = reara12(X_test)

    y_dec = model.decision_function(X_test)
    y_dec_prob = sigmoid_v(y_dec)
    y_true = y[label_target]
    
    return roc_auc_score(y_true, y_dec_prob)

def write_out(model, label_name, label_list, imputer, scaler):
    X_test = imp.transform(X_test)
    X_test = scaler.transform(X_test)
    X_test = X_test[:, label_to_ind(X, label_list)]
    X_test = reara12(X_test)
    
    y_dec_prob = sigmoid_v(model.decision_function(X_test))
    
    df = pd.DataFrame(columns=X.columns.values)
    df[label_name] = y_dec_prob
    df.to_csv(label_name + ".csv")    
    return
    




In [2]:
mv_row_indices = (X.isnull().sum()/ X.shape[0])*100 > 80
drop_ind = []
for i in range (len(mv_row_indices.values)):
    if mv_row_indices.values[i]: drop_ind += [i]

X_val = X.drop(X.columns[drop_ind], axis=1)
X_val

Unnamed: 0,pid,Time,Age,Temp,RRate,Glucose,ABPm,ABPd,SpO2,Heartrate,ABPs
0,1,3,34.0,36.0,16.0,,84.0,61.0,100.0,94.0,142.0
1,1,4,34.0,36.0,16.0,,81.0,62.5,100.0,99.0,125.0
2,1,5,34.0,36.0,18.0,,74.0,59.0,100.0,92.0,110.0
3,1,6,34.0,37.0,18.0,,66.0,49.5,100.0,88.0,104.0
4,1,7,34.0,,18.0,,63.0,48.0,100.0,81.0,100.0
...,...,...,...,...,...,...,...,...,...,...,...
227935,9999,8,85.0,,17.0,,78.0,,,80.0,110.0
227936,9999,9,85.0,,15.0,,88.0,,,83.0,123.0
227937,9999,10,85.0,36.0,22.0,,97.0,,98.0,80.0,138.0
227938,9999,11,85.0,,22.0,,84.0,,98.0,75.0,125.0


In [3]:
imp = SimpleImputer(missing_values=np.nan, strategy='mean').fit(X_val)
X_val = imp.transform(X_val)
scaler = preprocessing.StandardScaler().fit(X_val)
X_val = scaler.transform(X_val)
X_val = X_val[:,1:]
X_val = reara12(X_val)
X_val

array([[-8.51212785e-01, -1.70642596e+00, -1.63225346e+00, ...,
         9.06244608e-01, -9.38927981e-01, -9.54503395e-01],
       [-1.27529265e+00,  5.42565645e-01,  2.72206792e-14, ...,
         1.30533115e-01, -5.75995797e-01,  6.38688101e-01],
       [-1.06325272e+00,  3.60214974e-01, -3.54773847e+00, ...,
        -2.57322632e-01,  1.72257470e+00, -2.98483367e-01],
       ...,
       [-3.05306244e-03, -5.51538380e-01,  2.83231550e-01, ...,
         9.06244608e-01,  6.33778148e-01, -1.28251341e+00],
       [-1.27529265e+00,  1.63666967e+00,  2.72206792e-14, ...,
         9.06244608e-01,  9.36221634e-01,  1.57585957e+00],
       [-1.27529265e+00,  1.39353544e+00,  2.72206792e-14, ...,
        -1.10235233e-14, -3.34041008e-01,  2.63819514e-01]])

In [4]:
##adjust X_test
X_test_val = X_test.drop(X.columns[drop_ind], axis=1)
X_test_val = imp.transform(X_test_val)
X_test_val = scaler.transform(X_test_val)
X_test_val = X_test_val[:, 1:]
X_test_val = reara12(X_test_val)

In [5]:
##training models
m = linear_model.Ridge(alpha=0.5).fit(X_val, y["LABEL_RRate"].values)
m.score(X_val, y["LABEL_RRate"].values)

0.43528243650281784

In [6]:
##training model
label_name = "LABEL_RRate"
m = KernelRidge(alpha=2.0, kernel="poly", degree=5).fit(X_val, y[label_name].values)
m.score(X_val, y[label_name].values)

0.8874524181349948

In [19]:
##training model
label_name = "LABEL_ABPm"
m = KernelRidge(alpha=1.4, kernel="poly", degree=6).fit(X_val, y[label_name].values)
m.score(X_val, y[label_name].values)

  overwrite_a=False)


0.9615552096602582

In [21]:
df = pd.DataFrame(columns=y.columns.values)
df[label_name] = m.predict(X_test_val)
df.to_csv(label_name + ".csv")

In [17]:
df

Unnamed: 0,pid,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,,,,,,,,,,,,,,86.284593,,
1,,,,,,,,,,,,,,90.677904,,
2,,,,,,,,,,,,,,78.526634,,
3,,,,,,,,,,,,,,77.649211,,
4,,,,,,,,,,,,,,77.127554,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12659,,,,,,,,,,,,,,73.906821,,
12660,,,,,,,,,,,,,,83.523914,,
12661,,,,,,,,,,,,,,67.398512,,
12662,,,,,,,,,,,,,,105.937314,,
