In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from keras.layers import Activation, Dropout, Dense, LSTM
from keras.models import Sequential
from keras.optimizers import Adam
from keras import regularizers
from keras.callbacks import EarlyStopping
from keras.activations import softmax

Using TensorFlow backend.
  return f(*args, **kwds)


In [2]:
train = pd.read_csv('train_clean.csv', )
test = pd.read_csv('test_clean.csv')

In [3]:
# label Encode categorical variables
train['Sex'] = train['Sex'].astype('category')
# convert to category codes
train['Sex'] = train['Sex'].cat.codes
test['Sex'] = test['Sex'].astype('category')
# convert to category codes
test['Sex'] = test['Sex'].cat.codes

In [4]:
# remove unnecessary features
train.drop(['Cabin', 'Name', 'Ticket', 'PassengerId'], axis=1, inplace=True)
test.drop(['Cabin', 'Name', 'Ticket', 'PassengerId'], axis=1, inplace=True)

In [58]:
categorical = ['Embarked', 'Title']

for var in categorical:
    train = pd.concat([train, 
                    pd.get_dummies(train[var], prefix=var)], axis=1)
    del train[var]
for var in categorical:
    test = pd.concat([test, 
                    pd.get_dummies(test[var], prefix=var)], axis=1)
    del test[var]

In [59]:
# scaling continous variables
continuous = ['Age', 'Fare', 'Parch', 'Pclass', 'SibSp', 'Family_Size']

scaler = StandardScaler()
# scaling train data
for var in continuous:
    train[var] = train[var].astype('float64')
    train[var] = scaler.fit_transform(train[var].values.reshape(-1, 1))
    
# scaling test data
for var in continuous:
    test[var] = test[var].astype('float64')
    test[var] = scaler.fit_transform(test[var].values.reshape(-1, 1))


In [60]:
X = train[pd.notnull(train['Survived'])].drop(['Survived'], axis=1)
y = train[pd.notnull(train['Survived'])]['Survived']
X_test = test[pd.isnull(test['Survived'])].drop(['Survived'], axis=1)

In [61]:
xTrain, xVal, yTrain, yVal = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [62]:
print(xTrain.shape, yTrain.shape)

(712, 16) (712,)


In [89]:
def create_model(dropout_rate=0.0, reg=0.0):
    model = Sequential()
    model.add(Dense(12, input_shape=(X.shape[1],) ,kernel_regularizer=regularizers.l2(reg), activation='linear'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [90]:
def f_optimize(params):
    model = create_model(**params)
    model.fit(xTrain, yTrain, verbose=2, epochs=10)
    loss,metric = model.evaluate(xVal, yVal)
    return {'loss': loss, 'status': STATUS_OK}

In [91]:
# hyperparams to optimize
space = {'dropout_rate': hp.quniform('dropout_rate', 0.1, 0.5, 0.01),
        'reg': hp.uniform('reg', 0.00001, 0.9),}

# num of trials to perform 
num_trials = 10
trials = Trials()
best = fmin(f_optimize, space, algo=tpe.suggest, max_evals=num_trials, trials=trials)

Epoch 1/1
 - 2s - loss: 11.4930 - acc: 0.5267
Epoch 1/1
 - 2s - loss: 2.2761 - acc: 0.5955
Epoch 1/1
 - 2s - loss: 12.3058 - acc: 0.5253
Epoch 1/1
 - 2s - loss: 1.2958 - acc: 0.5660
Epoch 1/1
 - 2s - loss: 8.3115 - acc: 0.6587
Epoch 1/1
 - 2s - loss: 1.2276 - acc: 0.5941
Epoch 1/1
 - 2s - loss: 0.7921 - acc: 0.6629
Epoch 1/1
 - 2s - loss: 6.6713 - acc: 0.5801
Epoch 1/1
 - 2s - loss: 6.0444 - acc: 0.3778
Epoch 1/1
 - 2s - loss: 5.4643 - acc: 0.5885


In [92]:
best

{'dropout_rate': 0.3, 'reg': 0.008898512366528503}

In [94]:
# fit model with optimal hyperparameters
model = create_model(**best)
model.fit(xTrain, yTrain, epochs=10)

Epoch 1/1


<keras.callbacks.History at 0x14b780cc0>

In [95]:
prediction = model.predict(X_test)

In [96]:
prediction

array([[0.4751991 ],
       [0.46702346],
       [0.46818447],
       [0.47364777],
       [0.48152685],
       [0.4803409 ],
       [0.49576455],
       [0.4808486 ],
       [0.4759658 ],
       [0.47185415],
       [0.47206327],
       [0.4789177 ],
       [0.49807885],
       [0.46037015],
       [0.48440075],
       [0.4782236 ],
       [0.48217663],
       [0.4677883 ],
       [0.4889258 ],
       [0.46216443],
       [0.46470472],
       [0.49987653],
       [0.49132663],
       [0.48653698],
       [0.49217725],
       [0.45934832],
       [0.50434893],
       [0.4670218 ],
       [0.48177075],
       [0.45827764],
       [0.46700576],
       [0.47772998],
       [0.47779715],
       [0.47670293],
       [0.4773832 ],
       [0.4690663 ],
       [0.49443915],
       [0.4949919 ],
       [0.47472548],
       [0.47520566],
       [0.4603631 ],
       [0.48715407],
       [0.4664371 ],
       [0.48598656],
       [0.48486766],
       [0.47462362],
       [0.47082594],
       [0.477