In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.impute import KNNImputer
import tensorflow as tf
print("TensorFlow version:", tf.__version__)

TensorFlow version: 2.8.4


In [2]:
class Preprocessor:
    def __init__(self,missing_threshold = 0.9):
        self.missing_threshold = missing_threshold
        
        self.columns_to_drop_ = None
        self.columns_with_NaN = None
        self.imputer_ = None
        self.scaler = MinMaxScaler()
        self.oversampler = RandomOverSampler()

    def fit(self,X):
        self.columns_to_drop_ = X.columns[X.isnull().mean() > self.missing_threshold].to_list()
        X.drop(columns = self.columns_to_drop_, axis = 1, inplace = True)

        self.imputer_ = KNNImputer(n_neighbors=5, weights='distance')
        self.imputer_.fit(X)    
        self.scaler.fit(X)

        return self

    
    def transform(self,X, y = None, oversampler = False):
        
        if all(col in X.columns for col in self.columns_to_drop_):
            X.drop(columns=self.columns_to_drop_, axis = 1, inplace = True)

        X = pd.DataFrame(self.imputer_.transform(X), columns = X.columns)
        X_scaled = self.scaler.transform(X)
        X = pd.DataFrame(X_scaled, columns = X.columns)

        if oversampler:
            X, y = self.oversampler.fit_resample(X,y)
        
        return X, y
    
    def fit_transform(self,X,y = None):
        self.fit(X)
        return self.transform(X,y,oversampler=True)




In [3]:

df = pd.read_csv('hospital_deaths_train_08_rt42.csv')
target_variable = 'In-hospital_death'

X = df.drop(['recordid',target_variable], axis = 1)
y = df[target_variable]



preprocessor = Preprocessor()

X_train_filled, y_train_filled = preprocessor.fit_transform(X, y)
train_dataset = tf.data.Dataset.from_tensor_slices((X_train_filled, y_train_filled))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(32)

In [4]:
X_train_filled.head()

Unnamed: 0,Age,Gender,Height,Weight,CCU,CSRU,SICU,DiasABP_first,GCS_first,Glucose_first,...,Platelets_last,SysABP_last,TroponinT_last,WBC_last,Weight_last,pH_last,MechVentStartTime,MechVentDuration,MechVentLast8Hour,UrineOutputSum
0,0.52,1.0,0.014564,0.155328,0.0,1.0,0.0,0.40146,1.0,0.147465,...,0.104097,0.474308,0.20769,0.134087,0.181148,0.539683,0.049318,0.031707,0.0,0.081081
1,0.626667,1.0,0.011763,0.265984,0.0,0.0,0.0,0.321168,0.416667,0.081106,...,0.148394,0.521739,0.121164,0.12601,0.265984,0.619048,0.047569,0.937282,1.0,0.351351
2,0.426667,1.0,0.007947,0.196311,0.0,0.0,1.0,0.335766,0.416667,0.127189,...,0.062016,0.351779,0.00094,0.059774,0.18335,0.666667,0.006296,0.982578,1.0,0.432432
3,0.706667,1.0,0.014564,0.663934,0.0,0.0,0.0,0.368843,0.0,0.16682,...,0.213732,0.487216,0.005671,0.211632,0.663934,0.460317,0.038125,0.95122,1.0,0.405405
4,0.84,1.0,0.012843,0.260246,0.0,0.0,1.0,0.357261,1.0,0.138249,...,0.142857,0.47944,0.147351,0.11147,0.260246,0.622583,0.071918,0.685234,0.627353,0.272125


In [5]:
y_train_filled.tail()

4445    1
4446    1
4447    1
4448    1
4449    1
Name: In-hospital_death, dtype: int64

In [10]:
model = tf.keras.models.Sequential([
  tf.keras.layers.InputLayer(input_shape=X_train_filled.shape[1]),
  tf.keras.layers.Dense(1024, activation='relu'),
    tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dense(516, activation='relu'),
    tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dense(516, activation='relu'),
    tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dense(516, activation='relu'),
    tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid')
])

In [11]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [12]:
model.fit(train_dataset,batch_size=1024,  epochs=10,shuffle=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x204919750a0>

In [13]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 1024)              113664    
                                                                 
 batch_normalization (BatchN  (None, 1024)             4096      
 ormalization)                                                   
                                                                 
 dense_8 (Dense)             (None, 516)               528900    
                                                                 
 batch_normalization_1 (Batc  (None, 516)              2064      
 hNormalization)                                                 
                                                                 
 dense_9 (Dense)             (None, 516)               266772    
                                                                 
 batch_normalization_2 (Batc  (None, 516)             