In [10]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression, Ridge
from sklearn.metrics import roc_auc_score, f1_score
from sklearn.model_selection import train_test_split, GridSearchCV
import tensorflow as tf
from tensorflow import keras
from tqdm import tqdm
from tensorflow.keras import layers
import sklearn

In [2]:
X_train = pd.read_csv('task2_k49am2lqi/train_features.csv')
y_train = pd.read_csv('task2_k49am2lqi/train_labels.csv')

In [3]:
X_test = pd.read_csv('task2_k49am2lqi/test_features.csv')

In [4]:
X_test.head(14)

Unnamed: 0,pid,Time,Age,EtCO2,PTT,BUN,Lactate,Temp,Hgb,HCO3,...,Alkalinephos,SpO2,Bilirubin_direct,Chloride,Hct,Heartrate,Bilirubin_total,TroponinI,ABPs,pH
0,0,1,39.0,,,,,,,,...,,,,,,,,,,
1,0,2,39.0,,44.2,17.0,,36.0,10.2,13.0,...,119.0,100.0,,98.0,31.0,82.0,21.8,,119.0,
2,0,3,39.0,,,,,,,,...,,100.0,,,,78.0,,,125.0,7.34
3,0,4,39.0,,,,,,,,...,,100.0,,,,80.0,,,136.0,
4,0,5,39.0,,,,,,,,...,,100.0,,,,83.0,,,135.0,
5,0,6,39.0,,,,,36.0,,,...,,100.0,,,,88.0,,,144.0,
6,0,7,39.0,,38.5,20.0,,,9.1,16.0,...,109.0,100.0,,102.0,25.9,,26.4,,,
7,0,8,39.0,,,,,36.0,,,...,,100.0,,,,90.0,,,129.0,7.4
8,0,9,39.0,,,,,36.0,,,...,,100.0,,,,90.0,,,121.0,
9,0,10,39.0,,,,,36.0,,,...,,100.0,,,,85.0,,,120.0,


In [5]:
def process_data(X):
    X = X.fillna(0)
    X_new = pd.DataFrame(columns=X.columns).drop('Time', axis=1)
    length = X.shape[1] - 3

    for i in range(0, X.shape[0], 12):
        new_f = np.zeros(length + 2)
        for j in range(length):
            temp = 0
            counts = 0
            for k in range(12):
                t = k*X.iloc[i + k ,3 + j]
                temp += t
                if t != 0:
                    counts += k
        
            if counts == 0:
                new_f[j + 2] = -1 #No valid measurement
            else:
                new_f[j + 2] = temp/counts
    
        new_f[0] = X.iloc[i, 0]
        new_f[1] = X.iloc[i, 2]
    
        X_new.loc[i] = new_f
    
    return X_new

In [6]:
X_train = process_data(X_train)

In [7]:
X_test = process_data(X_test)

In [8]:
pid = X_test.values[:, 0]

In [11]:
scaler = sklearn.preprocessing.StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
X_train

array([[-1.7250923 , -1.70642596, -0.26397773, ..., -0.11519864,
        -0.27932138,  1.18638363],
       [-1.7241089 ,  0.54256564, -0.26397773, ..., -0.01013139,
         0.4492829 , -0.84423485],
       [-1.71427485,  0.36021497, -0.26397773, ..., -0.11519864,
        -0.27034466, -0.84423485],
       ...,
       [-0.63296652, -0.55153838, -0.26397773, ..., -0.11519864,
        -0.86416777,  1.19286211],
       [-0.63274798,  1.63666967, -0.26397773, ...,  0.11618708,
         0.64500149, -0.84423485],
       [-0.63263872,  1.39353544, -0.26397773, ..., -0.11519864,
         0.17215924, -0.84423485]])

In [13]:
X_test

array([[-1.72520157, -1.40250818, -0.26397773, ..., -0.11519864,
         0.3206691 ,  1.18624449],
       [-0.63242018, -0.00448637, -0.26397773, ..., -0.11519864,
        -0.20402138, -0.84423485],
       [-0.63220165,  0.6033492 , -0.26397773, ..., -0.11519864,
         0.229316  ,  1.20109378],
       ...,
       [-0.63340359, -0.67310549, -0.26397773, ..., -0.11519864,
        -0.72445126,  1.19098577],
       [-0.63318505, -0.61232194, -0.26397773, ..., -0.11519864,
         0.02897869,  1.21527622],
       [-0.63285725, -0.30840415, -0.26397773, ..., -0.11519864,
        -0.17251331,  1.17857774]])

In [14]:
header = ['pid', 'LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST', 'LABEL_Alkalinephos', 'LABEL_Bilirubin_total', 'LABEL_Lactate', 'LABEL_TroponinI', 'LABEL_SaO2', 'LABEL_Bilirubin_direct', 'LABEL_EtCO2','LABEL_Sepsis','LABEL_RRate', 'LABEL_ABPm', 'LABEL_SpO2', 'LABEL_Heartrate']

In [15]:
#Task1
t1_labels = ['LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST', 'LABEL_Alkalinephos', 'LABEL_Bilirubin_total', 'LABEL_Lactate', 'LABEL_TroponinI', 'LABEL_SaO2', 'LABEL_Bilirubin_direct', 'LABEL_EtCO2']
t1_train = np.array(y_train[t1_labels])
#Task2
t2_labels = ['LABEL_Sepsis']
t2_train = np.array(y_train[t2_labels])
#Task3
t3_labels = ['LABEL_RRate', 'LABEL_ABPm', 'LABEL_SpO2', 'LABEL_Heartrate']
t3_train = np.array(y_train[t3_labels])

In [16]:
X_train1, X_test1, y_train1, y_test1 = train_test_split(X_train, t1_train, test_size=0.2)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X_train, t2_train, test_size=0.2)
X_train3, X_test3, y_train3, y_test3 = train_test_split(X_train, t3_train, test_size=0.2)

In [24]:
y_train33 = [y_train3[:,0], y_train3[:,1], y_train3[:,2], y_train3[:,3]]
y_test33 = [y_test3[:,0], y_test3[:,1], y_test3[:,2], y_test3[:,3]]

In [18]:
model1 = keras.Sequential(
    [
        keras.Input(shape=(X_train[0].size,)),
        layers.Dense(50, activation="relu", name="layer1"),
        layers.Dense(30, activation="relu", name="layer2"),
        layers.Dense(25, activation="relu", name="layer4"),
        layers.Dense(t1_train.shape[1], activation='sigmoid', name="layer5")
    ]
)

model2 = keras.Sequential(
    [
        keras.Input(shape=(X_train[0].size,)),
        layers.Dense(50, activation="relu", name="layer1"),
        layers.Dense(30, activation="relu", name="layer2"),
        layers.Dense(25, activation="relu", name="layer4"),
        layers.Dense(1, activation='sigmoid', name="layer5")
    ]
)


input3 = keras.Input(shape=(X_train[0].size,))
x1 = layers.Dense(50, activation="relu")(input3)
x2 = layers.Dense(50, activation="relu")(input3)
x3 = layers.Dense(50, activation="relu")(input3)
x4 = layers.Dense(50, activation="relu")(input3)
        
y1 = layers.Dense(30, activation="relu")(x1)
y2 = layers.Dense(30, activation="relu")(x2)
y3 = layers.Dense(30, activation="relu")(x3)
y4 = layers.Dense(30, activation="relu")(x4)

z1 = layers.Dense(10, activation="relu")(y1)
z2 = layers.Dense(10, activation="relu")(y2)
z3 = layers.Dense(10, activation="relu")(y3)
z4 = layers.Dense(10, activation="relu")(y4)

out1 = layers.Dense(1, activation="linear")(y1)
out2 = layers.Dense(1, activation="linear")(y2)
out3 = layers.Dense(1, activation="linear")(y3)
out4 = layers.Dense(1, activation="linear")(y4)

model3 = keras.Model(inputs=input3, outputs=[out1,out2,out3,out4])

In [19]:
def coeff_determination(y_true, y_pred):
    from tensorflow.keras import backend as K
    SS_res =  K.sum(K.square( y_true-y_pred ))
    SS_tot = K.sum(K.square( y_true - K.mean(y_true) ) )
    return ( 1 - SS_res/(SS_tot + K.epsilon()) )

In [20]:
model1.compile(loss=tf.keras.losses.MeanSquaredError(),optimizer='adam',metrics=[tf.keras.metrics.AUC(curve = 'ROC')])
model2.compile(loss='binary_crossentropy',optimizer='adam',metrics=[tf.keras.metrics.AUC(curve = 'ROC')])
model3.compile(loss=tf.keras.losses.MeanSquaredError(), optimizer='adam',metrics=[coeff_determination])

In [21]:
history1 = model1.fit(
    X_train1,
    y_train1,
    batch_size=64,
    epochs=5,
    validation_data=(X_test1, y_test1),
)
model1.save('NN/model1')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: NN/model1/assets


In [22]:
history2 = model2.fit(
    X_train2,
    y_train2,
    batch_size=64,
    epochs=5,
    validation_data=(X_test2, y_test2),
)
model2.save('NN/model2')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: NN/model2/assets


In [25]:
history3 = model3.fit(
    X_train3,
    y_train33,
    batch_size=64,
    epochs=5,
    validation_data=(X_test3, y_test33),
)
model3.save('NN/model3')

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
INFO:tensorflow:Assets written to: NN/model3/assets


In [26]:
result1 = model1.predict(X_test)
result2 = model2.predict(X_test)
result3 = model3.predict(X_test)

In [27]:
upload = np.zeros((X_test.shape[0], len(header)))
upload[:,0] = pid
upload[:,1:11] = result1
upload[:,11] = result2.ravel()
upload[:,12] = result3[0].ravel()
upload[:,13] = result3[1].ravel()
upload[:,14] = result3[2].ravel()
upload[:,15] = result3[3].ravel()

In [28]:
df = pd.DataFrame(data=upload, columns=header)

In [29]:
df.to_csv('submission_NN.zip', index=False, float_format='%.3f', compression = 'zip')

In [30]:
df.head()

Unnamed: 0,pid,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,0.0,0.26581,0.491567,0.993333,0.997281,0.989154,0.285337,0.003007,0.146806,0.08023,0.000538,0.034907,15.742373,96.04171,113.454903,93.057205
1,10001.0,0.037342,0.051367,0.259923,0.232308,0.247498,0.057214,0.082473,0.077523,0.021445,0.016459,0.048245,18.177958,86.130508,95.157013,100.844078
2,10003.0,0.085885,0.055274,0.19933,0.186426,0.172321,0.242847,0.05895,0.300332,0.011529,0.06307,0.042426,17.160162,76.417061,97.444443,84.896194
3,10004.0,0.021565,0.061153,0.327598,0.354521,0.291046,0.046008,0.103588,0.07268,0.024463,0.019998,0.029717,16.473656,75.027458,93.063416,83.850525
4,10005.0,0.071278,0.021842,0.095166,0.082266,0.092101,0.061106,0.01645,0.084345,0.00271,0.002344,0.019375,19.878077,75.398003,99.937836,72.366646
