In [1]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression, LassoCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.multioutput import MultiOutputClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

VITALS = ['LABEL_RRate', 'LABEL_ABPm', 'LABEL_SpO2', 'LABEL_Heartrate']
TESTS = ['LABEL_BaseExcess', 'LABEL_Fibrinogen', 'LABEL_AST', 'LABEL_Alkalinephos', 'LABEL_Bilirubin_total',
         'LABEL_Lactate', 'LABEL_TroponinI', 'LABEL_SaO2',
         'LABEL_Bilirubin_direct', 'LABEL_EtCO2']

In [8]:
train_features = pd.read_csv('train_features.csv')
test_features = pd.read_csv('test_features.csv')

train_labels = pd.read_csv('train_labels.csv').sort_values(by='pid')

pids = test_features['pid'].drop_duplicates().sort_values().reset_index(drop=True)

train_features.columns

Index(['pid', 'Time', 'Age', 'EtCO2', 'PTT', 'BUN', 'Lactate', 'Temp', 'Hgb',
       'HCO3', 'BaseExcess', 'RRate', 'Fibrinogen', 'Phosphate', 'WBC',
       'Creatinine', 'PaCO2', 'AST', 'FiO2', 'Platelets', 'SaO2', 'Glucose',
       'ABPm', 'Magnesium', 'Potassium', 'ABPd', 'Calcium', 'Alkalinephos',
       'SpO2', 'Bilirubin_direct', 'Chloride', 'Hct', 'Heartrate',
       'Bilirubin_total', 'TroponinI', 'ABPs', 'pH'],
      dtype='object')

In [3]:
def feature_engineer(data):
    data = (data.groupby('pid').mean()).fillna(data.median())
    data = data.drop('Time', axis=1).sort_values(by='pid')

    scaler = StandardScaler()
    return scaler.fit_transform(data)


In [10]:
X = feature_engineer(train_features)

X_submission = feature_engineer(test_features)

TypeError: 'numpy.int64' object is not callable

In [12]:
y = train_labels[TESTS]
model = MultiOutputClassifier(SVC(kernel='linear', probability=True)).fit(X, y)

y_pred = model.predict_proba(X)

[0.8652395157045095,
 0.9406913493595368,
 0.7882084576241446,
 0.7950517634672749,
 0.7910159677136339,
 0.8362870679066503,
 0.9280575539568345,
 0.8311984558694507,
 0.9687664502544306,
 0.9582382874188454,
 0.943323390068433]

In [7]:
y_pred

[array([-1.03852868, -1.07151806, -0.55395272, ..., -1.13191134,
        -1.09950827,  2.25331592]),
 array([-1.03660963, -0.99984996, -1.14804001, ..., -1.00035048,
        -1.01085038, -1.0475097 ]),
 array([-1.07093593, -1.1006274 , -0.6147735 , ..., -0.95168275,
        -0.94553485, -1.31751577]),
 array([-1.07393636, -1.08074635, -0.60056261, ..., -0.93485329,
        -0.94020295, -1.31207019]),
 array([-1.06742096, -1.0857644 , -0.58036547, ..., -0.94757973,
        -0.9380218 , -1.27911068]),
 array([-1.06920706, -1.14116867, -1.12920917, ..., -1.09375562,
        -1.06374028,  0.91514772]),
 array([-1.03609388, -0.99976901, -1.00026155, ..., -0.93327067,
        -0.99469113, -1.19544631]),
 array([-1.191059  , -1.11668952, -1.02692942, ..., -1.06006594,
        -1.13923201, -0.2720695 ]),
 array([-1.05394391, -1.04471008, -0.77021658, ..., -1.00092157,
        -1.00966428, -1.03875948]),
 array([-1.06078934, -1.02060016, -1.11117509, ..., -0.34566652,
        -0.96675277, -1.23

In [48]:
labels_tests = pd.DataFrame(np.transpose(y_pred), columns=(TESTS + ['LABEL_Sepsis']))
labels_tests.head(20)

Unnamed: 0,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis
0,0.430861,0.526732,0.999519,0.999433,0.99949,0.477557,0.0088,0.281171,0.127607,0.211423,0.134063
1,0.120834,0.055385,0.308823,0.309664,0.316681,0.132354,0.183899,0.196462,0.029559,0.037826,0.030964
2,0.252097,0.04774,0.174448,0.171533,0.177333,0.157479,0.064857,0.209998,0.023352,0.038265,0.05124
3,0.915501,0.536227,0.990402,0.989842,0.991532,0.517301,0.009557,0.90063,0.150142,1.4e-05,0.051194
4,0.211334,0.027014,0.358793,0.357392,0.367929,0.130878,0.150482,0.164371,0.024251,0.024981,0.080673
5,0.074344,0.021954,0.197548,0.194029,0.192804,0.069889,0.169441,0.091993,0.018711,0.043749,0.031822
6,0.03149,0.052753,0.345306,0.339252,0.341091,0.348342,0.244687,0.442916,0.058988,0.233205,0.059942
7,0.633419,0.156491,0.042263,0.039758,0.037145,0.375436,0.046826,0.725814,0.025754,0.015169,0.046069
8,0.222891,0.050431,0.055883,0.051884,0.045562,0.157093,0.097805,0.220345,0.02186,0.040236,0.047755
9,0.483607,0.030227,0.419012,0.416288,0.45564,0.13872,0.142036,0.18282,0.033683,0.010799,0.15385


In [49]:
scores = []
y_pred = []

for vital in VITALS:
    y = train_labels[vital]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    model = LassoCV(random_state=32).fit(X_train, y_train)
    scores.append(model.score(X_test, y_test))
    y_pred.append(model.predict(X_submission))

scores

[0.39493444559915114,
 0.6010213826254593,
 0.3619866585828676,
 0.6120118970537956]

In [50]:
y_pred

[array([14.76442732, 17.93430219, 18.78633528, ..., 18.01603238,
        19.56639414, 18.13346382]),
 array([81.52201788, 85.73393602, 73.08058334, ..., 77.34402299,
        94.08561361, 84.35816898]),
 array([98.79399897, 96.50970159, 95.85093817, ..., 98.25844989,
        97.49659173, 98.6043507 ]),
 array([ 86.15570832,  96.80844278,  70.03263898, ...,  86.72350595,
        118.35054716, 107.8817294 ])]

In [51]:
labels_vitals = pd.DataFrame(np.transpose(y_pred), columns=VITALS)
labels_vitals.head()

Unnamed: 0,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,14.764427,81.522018,98.793999,86.155708
1,17.934302,85.733936,96.509702,96.808443
2,18.786335,73.080583,95.850938,70.032639
3,18.224721,85.094112,98.097519,93.821351
4,19.669912,88.520382,96.345225,89.402206


In [52]:
result = pd.concat([pids, labels_tests, labels_vitals], axis=1)

In [53]:
result.to_csv('prediction.zip', index=False, float_format='%.3f', compression='zip')

In [55]:
pred = pd.read_csv('sample.zip')
pred

Unnamed: 0,pid,LABEL_BaseExcess,LABEL_Fibrinogen,LABEL_AST,LABEL_Alkalinephos,LABEL_Bilirubin_total,LABEL_Lactate,LABEL_TroponinI,LABEL_SaO2,LABEL_Bilirubin_direct,LABEL_EtCO2,LABEL_Sepsis,LABEL_RRate,LABEL_ABPm,LABEL_SpO2,LABEL_Heartrate
0,0,0.940,0.341,0.597,0.651,0.557,0.745,0.224,0.363,0.506,0.643,0.162,18.796,82.511,96.947,84.12
1,10001,0.773,0.320,0.451,0.152,0.001,0.525,0.276,0.327,0.316,0.656,0.486,18.796,82.511,96.947,84.12
2,10003,0.741,0.211,0.348,0.153,0.859,0.446,0.406,0.607,0.757,0.290,0.451,18.796,82.511,96.947,84.12
3,10004,0.147,0.312,0.733,0.129,0.356,0.367,0.931,0.715,0.434,0.005,0.785,18.796,82.511,96.947,84.12
4,10005,0.255,0.746,0.587,0.743,0.248,0.330,0.071,0.291,0.399,0.217,0.040,18.796,82.511,96.947,84.12
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12659,9989,0.943,0.541,0.373,0.944,0.562,0.594,0.838,0.938,0.401,0.195,0.647,18.796,82.511,96.947,84.12
12660,9991,0.561,0.040,0.095,0.667,0.918,0.323,0.784,0.343,0.552,0.047,0.916,18.796,82.511,96.947,84.12
12661,9992,0.112,0.962,0.967,0.564,0.064,0.545,0.210,0.853,0.429,0.829,0.093,18.796,82.511,96.947,84.12
12662,9994,0.892,0.540,0.868,0.201,0.259,0.632,0.282,0.810,0.724,0.074,0.936,18.796,82.511,96.947,84.12
