In [49]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout,BatchNormalization
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras.optimizers import Adam
from keras.utils.vis_utils import plot_model
from sklearn.preprocessing import scale,minmax_scale
from tensorflow.keras import initializers, optimizers
import io
import requests
import re
import warnings
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif
from keras.regularizers import l2
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier

In [50]:
df = pd.read_csv("train.csv")
df2 = df
test_data = pd.read_csv("test.csv")
df_test=test_data.copy()

In [51]:
def preprocess(df):
    
    #extract the title feature
    def getMedian(df,name):
        return df[df["Title"]==name].Age.dropna().median()
    
    def replace_missing_age(df):
        df.loc[(df["Age"].isnull())&(df["Title"]=="Mr"),"Age"] =  getMedian(df,"Mr") 
        df.loc[(df["Age"].isnull())&(df["Title"]=="Miss"),"Age"] =  getMedian(df,"Miss")
        df.loc[(df["Age"].isnull())&(df["Title"]=="Mrs"),"Age"] =  getMedian(df,"Mrs")
        df.loc[(df["Age"].isnull())&(df["Title"]=="Master"),"Age"] = getMedian(df,"Master")
        df.loc[(df["Age"].isnull())&(df["Title"]=="Rare"),"Age"] =  getMedian(df,"Rare")
        return df
    
    def title(df):
        df['Title'] = df.Name.str.extract(' ([A-Za-z]+)\.', expand=False)
        #print(df["Title"].describe())
        df['Title'] = df['Title'].replace(['Lady',"Ms",'Countess','Capt', 'Col',\
 	'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')
        df = replace_missing_age(df)
        map_title = {"Mr": 1, "Miss": 2, "Mrs": 3, "Master": 4, "Rare": 5}
        df["Title"] = df["Title"].map(map_title)
        df["Title"] = df["Title"].fillna(1)
        return df
    
    df = title(df)
    
    #dont need names and cabins had a lot of missing values so get rid of it
    #also we dont need the ticket number
    def drop_values(df):
        df = df.drop("Cabin",axis = 1)
        df = df.drop("Name",axis = 1)
        df = df.drop("Ticket",axis = 1)
        return df
    
    df = drop_values(df)
    
    
    genders = {"male":1,"female":0}
    df["Sex"] = df["Sex"].map(genders)
    
    df["Embarked"] = df["Embarked"].fillna("S")
    
    ports = {"S":0,"C":1,"Q":2}
    
    df["Embarked"] = df["Embarked"].map(ports)
    
    
    def isalone(df):
        df['FamilySize'] = df['SibSp'] + df['Parch'] + 1
        df["isalone"]=0
        df.loc[df["FamilySize"]==1,"isalone"]=1
        #df.drop("FamilySize",axis = 1)
        return df
    df = isalone(df)
    def get_fares(df):
        df.loc[df["Fare"]<=7.91,"Fare"]=0
        df.loc[(df["Fare"]<=14)&(df["Fare"]>7.91),"Fare"]=1
        df.loc[(df["Fare"]<=25)&(df["Fare"]>14),"Fare"]=2
        df.loc[(df["Fare"]<=31)&(df["Fare"]>25),"Fare"]=3
        df.loc[(df["Fare"]<=69)&(df["Fare"]>31),"Fare"]=4
        df.loc[(df["Fare"]<=99)&(df["Fare"]>69),"Fare"]=5
        df.loc[(df["Fare"]<=250)&(df["Fare"]>99),"Fare"]=6
        df.loc[df["Fare"]>250,"Fare"]=7
        return df
    
    def fare_pclass_feature(df):
        df["fare*pclass"] = df["Pclass"]*df["Fare"]
        return df
    
    
    df.Fare = df.Fare.fillna(df.Fare.mean())
    df = get_fares(df)
    df = fare_pclass_feature(df)
     
    #categorises the age of the people
    def categAge(df):
        df.loc[df["Age"]<=16,"Age"]=0
        df.loc[(df["Age"]>16) & (df["Age"]<=32),"Age"] = 1
        df.loc[(df["Age"]>32) & (df["Age"]<=48),"Age"] =2
        df.loc[(df["Age"]>48) & (df["Age"]<=64),"Age"] =3
        df.loc[(df["Age"]>60),"Age"] =4
        return df
    df = categAge(df)
    return df

df = preprocess(df)
df_prescaled = df.copy()

def scaler_fun(df):
    #data scaling
    df_scaled = df.drop("Survived",axis = 1)
    df_scaled = scale(df_scaled)
    #df_scaled = minmax_scale(df_scaled)
    cols = df.columns.tolist()
    cols.remove("Survived")
    df_scaled = pd.DataFrame(df_scaled,columns = cols,index = df.index)
    df_scaled = pd.concat([df_scaled,df["Survived"]],axis = 1)
    df = df_scaled.copy()
    return df

pass_id_train = df["PassengerId"].copy()
df = scaler_fun(df)
X = df.loc[:, (df.columns != "Survived") & (df.columns != "PassengerId")]
Y = df.loc[:,"Survived"]
x_train,x_test,y_train,y_test = train_test_split(X,Y,test_size = 0.1,random_state = 42)


In [52]:
model = Sequential()
model.add(Dense(64,activation = "relu",input_dim= X.shape[1],kernel_initializer="normal"))
model.add(Dropout(0.5))
model.add(BatchNormalization())
model.add(Dense(32,activation = "relu",kernel_initializer="normal"))
model.add(Dropout(0.3))
model.add(BatchNormalization())
model.add(Dense(16,activation = "relu",kernel_initializer="normal"))
model.add(Dropout(0.1))
model.add(BatchNormalization())
model.add(Dense(16,activation = "relu",kernel_initializer="normal"))
model.add(Dense(1,activation = "sigmoid"))
adam = Adam(learning_rate = 2E-4)
model.compile(loss="binary_crossentropy", optimizer = adam,metrics = ["accuracy"])
callback = keras.callbacks.EarlyStopping(monitor='loss',patience=25)
history = model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs = 1000,batch_size=801,callbacks = [callback])

Train on 801 samples, validate on 90 samples
Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000


Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
Epoch 73/1000
Epoch 74/1000
Epoch 75/1000
Epoch 76/1000
Epoch 77/1000
Epoch 78/1000
Epoch 79/1000
Epoch 80/1000
Epoch 81/1000
Epoch 82/1000
Epoch 83/1000
Epoch 84/1000
Epoch 85/1000
Epoch 86/1000
Epoch 87/1000
Epoch 88/1000
Epoch 89/1000
Epoch 90/1000
Epoch 91/1000
Epoch 92/1000
Epoch 93/1000
Epoch 94/1000
Epoch 95/1000
Epoch 96/1000
Epoch 97/1000
Epoch 98/1000
Epoch 99/1000
Epoch 100/1000
Epoch 101/1000
Epoch 102/1000
Epoch 103/1000
Epoch 104/1000
Epoch 105/1000
Epoch 106/1000
Epoch 107/1000
Epoch 108/1000
Epoch 109/1000
Epoch 110/1000
Epoch 111/1000
Epoch 112/1000
Epoch 113/1000


Epoch 114/1000
Epoch 115/1000
Epoch 116/1000
Epoch 117/1000
Epoch 118/1000
Epoch 119/1000
Epoch 120/1000
Epoch 121/1000
Epoch 122/1000
Epoch 123/1000
Epoch 124/1000
Epoch 125/1000
Epoch 126/1000
Epoch 127/1000
Epoch 128/1000
Epoch 129/1000
Epoch 130/1000
Epoch 131/1000
Epoch 132/1000
Epoch 133/1000
Epoch 134/1000
Epoch 135/1000
Epoch 136/1000
Epoch 137/1000
Epoch 138/1000
Epoch 139/1000
Epoch 140/1000
Epoch 141/1000
Epoch 142/1000
Epoch 143/1000
Epoch 144/1000
Epoch 145/1000
Epoch 146/1000
Epoch 147/1000
Epoch 148/1000
Epoch 149/1000
Epoch 150/1000
Epoch 151/1000
Epoch 152/1000
Epoch 153/1000
Epoch 154/1000
Epoch 155/1000
Epoch 156/1000
Epoch 157/1000
Epoch 158/1000
Epoch 159/1000
Epoch 160/1000
Epoch 161/1000
Epoch 162/1000
Epoch 163/1000
Epoch 164/1000
Epoch 165/1000
Epoch 166/1000
Epoch 167/1000
Epoch 168/1000


Epoch 169/1000
Epoch 170/1000
Epoch 171/1000
Epoch 172/1000
Epoch 173/1000
Epoch 174/1000
Epoch 175/1000
Epoch 176/1000
Epoch 177/1000
Epoch 178/1000
Epoch 179/1000
Epoch 180/1000
Epoch 181/1000
Epoch 182/1000
Epoch 183/1000
Epoch 184/1000
Epoch 185/1000
Epoch 186/1000
Epoch 187/1000
Epoch 188/1000
Epoch 189/1000
Epoch 190/1000
Epoch 191/1000
Epoch 192/1000
Epoch 193/1000
Epoch 194/1000
Epoch 195/1000
Epoch 196/1000
Epoch 197/1000
Epoch 198/1000
Epoch 199/1000
Epoch 200/1000
Epoch 201/1000
Epoch 202/1000
Epoch 203/1000
Epoch 204/1000
Epoch 205/1000
Epoch 206/1000
Epoch 207/1000
Epoch 208/1000
Epoch 209/1000
Epoch 210/1000
Epoch 211/1000
Epoch 212/1000
Epoch 213/1000
Epoch 214/1000
Epoch 215/1000
Epoch 216/1000
Epoch 217/1000
Epoch 218/1000
Epoch 219/1000
Epoch 220/1000
Epoch 221/1000
Epoch 222/1000
Epoch 223/1000
Epoch 224/1000


Epoch 225/1000
Epoch 226/1000
Epoch 227/1000
Epoch 228/1000
Epoch 229/1000
Epoch 230/1000
Epoch 231/1000
Epoch 232/1000
Epoch 233/1000
Epoch 234/1000
Epoch 235/1000
Epoch 236/1000
Epoch 237/1000
Epoch 238/1000
Epoch 239/1000
Epoch 240/1000
Epoch 241/1000
Epoch 242/1000
Epoch 243/1000
Epoch 244/1000
Epoch 245/1000
Epoch 246/1000
Epoch 247/1000
Epoch 248/1000
Epoch 249/1000
Epoch 250/1000
Epoch 251/1000
Epoch 252/1000
Epoch 253/1000
Epoch 254/1000
Epoch 255/1000
Epoch 256/1000
Epoch 257/1000
Epoch 258/1000
Epoch 259/1000
Epoch 260/1000
Epoch 261/1000
Epoch 262/1000
Epoch 263/1000
Epoch 264/1000
Epoch 265/1000
Epoch 266/1000
Epoch 267/1000
Epoch 268/1000
Epoch 269/1000
Epoch 270/1000
Epoch 271/1000
Epoch 272/1000
Epoch 273/1000
Epoch 274/1000
Epoch 275/1000
Epoch 276/1000
Epoch 277/1000
Epoch 278/1000
Epoch 279/1000
Epoch 280/1000


Epoch 281/1000
Epoch 282/1000
Epoch 283/1000
Epoch 284/1000
Epoch 285/1000
Epoch 286/1000
Epoch 287/1000
Epoch 288/1000
Epoch 289/1000
Epoch 290/1000
Epoch 291/1000
Epoch 292/1000
Epoch 293/1000
Epoch 294/1000
Epoch 295/1000
Epoch 296/1000
Epoch 297/1000
Epoch 298/1000
Epoch 299/1000
Epoch 300/1000
Epoch 301/1000
Epoch 302/1000
Epoch 303/1000
Epoch 304/1000
Epoch 305/1000
Epoch 306/1000
Epoch 307/1000
Epoch 308/1000
Epoch 309/1000
Epoch 310/1000
Epoch 311/1000
Epoch 312/1000
Epoch 313/1000
Epoch 314/1000
Epoch 315/1000
Epoch 316/1000
Epoch 317/1000
Epoch 318/1000
Epoch 319/1000
Epoch 320/1000
Epoch 321/1000
Epoch 322/1000
Epoch 323/1000
Epoch 324/1000
Epoch 325/1000
Epoch 326/1000
Epoch 327/1000
Epoch 328/1000
Epoch 329/1000
Epoch 330/1000
Epoch 331/1000
Epoch 332/1000
Epoch 333/1000
Epoch 334/1000
Epoch 335/1000
Epoch 336/1000


Epoch 337/1000
Epoch 338/1000
Epoch 339/1000
Epoch 340/1000
Epoch 341/1000
Epoch 342/1000
Epoch 343/1000
Epoch 344/1000
Epoch 345/1000
Epoch 346/1000
Epoch 347/1000
Epoch 348/1000
Epoch 349/1000
Epoch 350/1000
Epoch 351/1000
Epoch 352/1000
Epoch 353/1000
Epoch 354/1000
Epoch 355/1000
Epoch 356/1000


Below is the Hyperparameter search function, I have used it but I found that changing the parameters by hand worked better.

In [None]:
adam = Adam(learning_rate = 3E-4)
def create_model(dropout=0.3,init="glorot_normal",optimizer=adam):
    model = Sequential()
    model.add(Dense(64,activation = "relu",input_dim= X.shape[1],kernel_initializer=init))
    model.add(Dropout(dropout))
    model.add(BatchNormalization())
    model.add(Dense(32,activation = "relu",kernel_initializer=init))
    model.add(Dropout(dropout))
    model.add(BatchNormalization())
    model.add(Dense(16,activation = "relu",kernel_initializer=init))
    model.add(Dropout(dropout))
    model.add(BatchNormalization())
    model.add(Dense(16,activation = "relu",kernel_initializer=init))
    model.add(Dense(1,activation = "sigmoid"))
    model.compile(loss="binary_crossentropy", optimizer = optimizer,metrics = ["accuracy"])
    return model
#standard setup for the Gridsearch to find Hyperparameters
def gridsearch_fun():
    model = KerasClassifier(build_fn=create_model, verbose=0)
    batch_size = [400,550,800]
    epochs = [300,500,600]
    init = ['glorot_uniform', 'normal']
    optimizer = ["Adagrad","Adam"]
    param_grid = dict(batch_size=batch_size, epochs=epochs,init = init,dropout=[0.1,0.2,0.3,0.4],optimizer = optimizer)
    grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
    grid_result = grid.fit(X,Y)
    # summarize results
    print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
    means = grid_result.cv_results_['mean_test_score']
    stds = grid_result.cv_results_['std_test_score']
    params = grid_result.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))
gridsearch_fun()


In [None]:
df_before = df_test.copy()
df_test =preprocess(df_test)
passenger_id = df_test["PassengerId"].copy()
def scaler_fun2(df):
    #data scaling
    df_scaled = df.copy()
    df_scaled = scale(df_scaled)
    #df_scaled = minmax_scale(df_scaled)
    cols = df.columns.tolist()
    df_scaled = pd.DataFrame(df_scaled,columns = cols,index = df.index)
    df = df_scaled.copy()
    return df
df_test = scaler_fun2(df_test)
df_test_X = df_test.loc[:,(df_test.columns!="PassengerId")]

In [48]:
model = load_model("model_super_79.4.h5")
model.summary()

Model: "sequential_210"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1046 (Dense)           (None, 64)                768       
_________________________________________________________________
dropout_627 (Dropout)        (None, 64)                0         
_________________________________________________________________
batch_normalization_621 (Bat (None, 64)                256       
_________________________________________________________________
dense_1047 (Dense)           (None, 32)                2080      
_________________________________________________________________
dropout_628 (Dropout)        (None, 32)                0         
_________________________________________________________________
batch_normalization_622 (Bat (None, 32)                128       
_________________________________________________________________
dense_1048 (Dense)           (None, 16)             

In [None]:
y_pred = model.predict(df_test_X)
y_pred = pd.DataFrame(y_pred, columns =["Survived"])
y_pred.loc[y_pred["Survived"]>=0.5]=1
y_pred.loc[y_pred["Survived"]<0.5]=0
y_pred = pd.concat([passenger_id,y_pred],axis = 1)
y_pred = y_pred.astype(int)
#y_pred.to_csv("predictionss.csv",index = False)