## Introduction 
This kernel is based on my more general Titanic kernel which can be found [here][1], and aims to explore the use of neural networks (NNs) with the keras library for python. Data exploration is performed in the other kernel. 

## Preprocessing of training data
Explanations for the preprocessing steps can be found [here][1].
  [1]: https://www.kaggle.com/steffanj/titanic/titanic-preprocessing-eda-and-ml-in-python

In [None]:
# First, let's load the training data to see what we're dealing with. 
# We will import the file to a pandas DataFrame:
import pandas as pd
train_data = pd.read_csv('../input/train.csv')

# Let's set the index of our DataFrame to the PassengerId column, 
# since that column is assigned arbitrarily (but uniquely) and thus should 
# not be regarded a real 'feature' of the data:
train_data.set_index(['PassengerId'], inplace=True)

# Use regex to extract the titles (Mr., Miss. etc.) of the passengers in 
# order to use as a categorical/nominal feature:
import re
import numpy as np
patt = re.compile('\s(\S+\.)') # 1 whitespace character followed by several 
# non-whitespace characters followed by a dot
titles = np.array([re.search(patt, i)[1] for i in train_data['Name'].values])

# Include the titles as a new feature 'Title' in the DataFrame, 
# and drop the 'Name' feature:
train_data = train_data.assign(Title=titles)
train_data = train_data.drop('Name', axis=1)

# Regroup infrequently occuring titles:  
train_data['Title'] = train_data['Title'].replace('Mlle.','Miss.')
train_data['Title'] = train_data['Title'].replace('Ms.','Miss.')  
train_data['Title'] = train_data['Title'].replace('Mme.','Mrs.')
train_data['Title'] = train_data['Title'].replace(['Capt.','Col.','Major.'],'Army.')
train_data['Title'] = train_data['Title'].replace(['Countess.','Don.','Jonkheer.','Lady.','Sir.'],'Noble.')

# Drop the 'Ticket' column:
train_data = train_data.drop('Ticket', axis=1)

# Extract letters from cabin codes. Passengers without a cabin will 
# have entries of 'None':
def getCabinCat(cabin_code):
	if pd.isnull(cabin_code):
		cat = 'None' # Use a string so that it is clear that this is 
					 # a category on its own
	else:
		cat = cabin_code[0]
	return cat

cabin_cats = np.array([getCabinCat(cc) for cc in train_data['Cabin'].values])

# Add this as a new 'Cabin_cat' feature to the DataFrame, remove the 'Cabin' feature:
train_data = train_data.assign(Cabin_cat=cabin_cats)
train_data = train_data.drop('Cabin', axis=1)

# Encoding categorical features ('Sex', 'Embarked', 'Title' and 'Cabin_cat') in a 
# numerical format, using  sklearn.preprocessing.LabelEncoder and 
# sklearn.preprocessing.OneHotEncoder. Missing values/Nans will be imputed using a 
# nearest neighbor approach (see [here][1] for explanations). 

from sklearn.preprocessing import LabelEncoder

# Converting to numerical features
# Sex feature
le_sex = LabelEncoder()
sex_numerical = le_sex.fit_transform(train_data['Sex'])
sex_numerical_classes = le_sex.classes_

# Title feature
le_title = LabelEncoder()
title_numerical = le_title.fit_transform(train_data['Title'])
title_numerical_classes = le_title.classes_

# Cabin_cat feature
le_cabin_cat = LabelEncoder()
cabin_cat_numerical = le_cabin_cat.fit_transform(train_data['Cabin_cat'])
cabin_cat_numerical_classes = le_cabin_cat.classes_

from sklearn.preprocessing import OneHotEncoder
# Sex feature
enc_sex = OneHotEncoder(sparse=False)
sex_onehot = enc_sex.fit_transform(sex_numerical.reshape(-1,1))

# Title feature
enc_title = OneHotEncoder(sparse=False)
title_onehot = enc_title.fit_transform(title_numerical.reshape(-1,1))

# Cabin_cat feature
enc_cabin_cat = OneHotEncoder(sparse=False)
cabin_cat_onehot = enc_cabin_cat.fit_transform(cabin_cat_numerical.reshape(-1,1))

def pdAssignWithOHLabel(df, column, onehot_labeled, class_labels):
	to_assign = {}
	for c_idx, label in enumerate(class_labels):
		to_assign[column+'_'+label] = onehot_labeled[:,c_idx]
	df = df.assign(**to_assign)
	return df

# Sex feature
train_data = pdAssignWithOHLabel(train_data, 'Sex', 
								 sex_onehot, sex_numerical_classes)
train_data = train_data.drop('Sex',axis=1)

# Title feature
train_data = pdAssignWithOHLabel(train_data, 'Title', 
								 title_onehot, title_numerical_classes)
train_data = train_data.drop('Title',axis=1)

# Cabin_cat feature
train_data = pdAssignWithOHLabel(train_data, 'Cabin_cat', 
							cabin_cat_onehot, cabin_cat_numerical_classes)
train_data = train_data.drop('Cabin_cat',axis=1)

# Set outliers in 'Fare' column to less extreme values
mu = train_data['Fare'].mean()
sd = train_data['Fare'].std()
row_mask = train_data['Fare']>mu+5*sd
train_data.set_value(row_mask, 'Fare', mu+5*sd);

# Standard scaling of all features except the 'Embarked' and 'Age' feature, 
# because both need to be imputed:
from sklearn.preprocessing import StandardScaler
sc_tmp = StandardScaler()
tmp_scaled = train_data.copy().drop(['Embarked','Age','Survived'], axis=1) # create a copy of the data
tmp_scaled = pd.DataFrame(sc_tmp.fit_transform(tmp_scaled),columns=tmp_scaled.columns, index=tmp_scaled.index)

# Add the non-scaled features to this temporary DataFrame
tmp_scaled = tmp_scaled.assign(Survived=train_data['Survived'])
tmp_scaled = tmp_scaled.assign(Embarked=train_data['Embarked'])
tmp_scaled = tmp_scaled.assign(Age=train_data['Age'])

from sklearn.neighbors import KDTree
tmp = tmp_scaled.copy().drop(['Survived','Age','Embarked'], axis=1).values
row_idx = pd.isnull(train_data['Embarked'])
tree = KDTree(tmp)
dist, ind = tree.query(tmp[[62, 830]], k=6) 
# The k nearest neighbors include the passenger itself, 
# so we specify k=6 to get the 5 nearest neighbors

# Impute missing values in the 'Embarked' feature with an 'S':
train_data.set_value([62, 830], 'Embarked', 'S');

# Encode the values with numerical labels
le_embarked = LabelEncoder()
embarked_numerical = le_embarked.fit_transform(train_data['Embarked'])
embarked_numerical_classes = le_embarked.classes_

# One-Hot encoding
enc_embarked = OneHotEncoder(sparse=False)
embarked_onehot = enc_embarked.fit_transform(embarked_numerical.reshape(-1,1))

# Add new features
train_data = pdAssignWithOHLabel(train_data, 'Embarked', embarked_onehot, 
								 embarked_numerical_classes)
tmp_scaled = pdAssignWithOHLabel(tmp_scaled, 'Embarked', embarked_onehot, 
								 embarked_numerical_classes)
# Drop old feature
train_data = train_data.drop('Embarked',axis=1)
tmp_scaled = tmp_scaled.drop('Embarked',axis=1)

# The new columns need to be standard-scaled:
sc_tmp = StandardScaler()
tmp = tmp_scaled[['Embarked_C', 'Embarked_Q', 'Embarked_S']].copy()
tmp = pd.DataFrame(sc_tmp.fit_transform(tmp),columns=tmp.columns, index=tmp.index)

# Drop the unscaled features from train_data 
tmp_scaled = tmp_scaled.drop(['Embarked_C', 'Embarked_Q', 'Embarked_S'], 
							 axis=1)

# Assign the scaled features to train_data
tmp_scaled = tmp_scaled.assign(Embarked_C=tmp['Embarked_C'])
tmp_scaled = tmp_scaled.assign(Embarked_Q=tmp['Embarked_Q'])
tmp_scaled = tmp_scaled.assign(Embarked_S=tmp['Embarked_S'])

# Impute 'Age' feature:
def knnImpute(ori_arr, tmp_imp_arr, feature, k=6): # improved one
	from sklearn.neighbors import KDTree
	row_idx = ori_arr[pd.isnull(ori_arr[feature])].index.tolist()
	tree = KDTree(tmp_imp_arr) # tmp_arr is the array without 
						   # the null-containing feature
	for nan_v in row_idx:
		dist, ind = tree.query(tmp_imp_arr[nan_v,:].reshape(1,-1), k)
		nn_vals = ori_arr[feature].loc[ind[0][1:]]
		imp_val = np.floor(np.nanmean(nn_vals))+0.5  
		ori_arr.set_value(nan_v, feature, imp_val)
	return ori_arr

tmp_imp = tmp_scaled.copy().drop('Age', axis = 1).values
train_data = knnImpute(train_data, tmp_imp, 'Age', 8)

# Extract training data (without Survived feature) and class labels
columns = ['Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Sex_female', 'Sex_male',
	   'Title_Army.', 'Title_Dr.', 'Title_Master.', 'Title_Miss.', 'Title_Mr.',
	   'Title_Mrs.', 'Title_Noble.', 'Title_Rev.', 'Cabin_cat_A',
	   'Cabin_cat_B', 'Cabin_cat_C', 'Cabin_cat_D', 'Cabin_cat_E',
	   'Cabin_cat_F', 'Cabin_cat_G', 'Cabin_cat_None', 'Cabin_cat_T',
	   'Embarked_C', 'Embarked_Q', 'Embarked_S']

train_data_df = train_data # Keep the full train_data DataFrame for later usage
train_labels = train_data['Survived'].values.ravel()
train_data = train_data[columns].values
sc_training = StandardScaler()
sc_training = sc_training.fit(train_data)

In [None]:
# General import statements
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
from sklearn.pipeline import Pipeline

## Keras 
We'll work on implementing a keras classifier here. Let's start by making the necessary imports:

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.callbacks import EarlyStopping
from sklearn.preprocessing import StandardScaler

Now, let's define a function that will return a keras classifier built according to some parameters; we need this function to make a wrapper that will work in an sklearn pipeline:

In [None]:
def create_ann_model(layers, dropout_rate=None, loss='binary_crossentropy', optimizer='adam'):
    model = Sequential()
    for i, layer in enumerate(layers):
        if len(layer) > 2:
            model.add(Dense(layer[0], activation=layer[1], input_shape=layer[2]))
        else:
            model.add(Dense(layer[0], activation=layer[1]))
            
        if dropout_rate:
            if 0 < i < len(layers):
                model.add(Dropout(dropout_rate))    
    model.compile(loss=loss, optimizer=optimizer, metrics=["accuracy"])
    return model

I have initially trained models using an exhaustive grid search with the following parameters: 

    layers = [
        [(27,'relu',(train_data.shape[1],)), (5, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (10, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (15, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (27, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (20, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (15, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (10, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (5, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (20, 'relu'), (10, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (10, 'relu'), (10, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (10, 'relu'), (10, 'relu'), (10, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (10, 'relu'), (5, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (5, 'relu'), (5, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (14, 'relu'), (7, 'relu'), (4, 'relu'), (1, 'relu')]
            ] dropout_rate = [None, 0.1, 0.15, 0.2, 0.25, 0.3, 0.5] optimizer = ['adam','sgd','adamax'] epochs = [10, 30, 50]

The top performing classifiers using these parameters used 3 hidden layers, low or zero values for drop-out, and 30 or 50 epochs. A new gridsearch was performed using 3 - 5 hidden layers, various low values for drop-out, and with more epochs:

    layers = [
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (20, 'relu'), (10, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (10, 'relu'), (10, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (10, 'relu'), (10, 'relu'), (10, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (10, 'relu'), (5, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (5, 'relu'), (5, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (14, 'relu'), (7, 'relu'), (4, 'relu'), (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (20, 'relu'), (10, 'relu'), (10, 'relu'), 
         (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (10, 'relu'), (10, 'relu'), (10, 'relu'), 
         (1, 'relu')],    
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (20, 'relu'), (20, 'relu'), (20, 'relu'), 
         (1, 'relu')],    
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (15, 'relu'), (10, 'relu'), (5, 'relu'), 
         (1, 'relu')],
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (20, 'relu'), (10, 'relu'), (10, 'relu'), 
         (10, 'relu'), (1, 'relu')],    
        [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (20, 'relu'), (20, 'relu'), (20, 'relu'), 
         (20, 'relu'), (1, 'relu')],        
        [(27,'relu',(train_data.shape[1],)), (22, 'relu'), (16, 'relu'), (12, 'relu'), (8, 'relu'), 
         (4, 'relu'), (1, 'relu')],        
            ]
    dropout_rate = [None, 0.01, 0.03, 0.05, 0.07, 0.1, 0.12, 0.14, 0.16]
    optimizer = ['adam','sgd','adamax']
    epochs = [50, 100, 150]

Training all those models (in a cross-validated manner) takes a lot of time, which is why the code above was put in a text field. It is provided as reference. 

Some of the parameters that turned out to perform best are defined below and used in the computation of the final predictions.

## Training models

In [None]:
# Model creation, compile and fit parameters
layers = [
    [(27,'relu',(train_data.shape[1],)), (20, 'relu'), (20, 'relu'), (10, 'relu'), 
     (10, 'relu'), (1, 'relu')], 
        ]
dropout_rate = [0.07]
optimizer = ['adamax']
epochs= [200]

# Create a parameter grid of the above parameters
# Even though only one parameter configuration will be checked here,
# you could use the code below in conjunction with the more elaborate
# grid parameters introduced above
param_grid = dict(clf__layers=layers, clf__dropout_rate=dropout_rate, 
                  clf__optimizer=optimizer, clf__epochs=epochs)

# Create pipeline of standardscaler and classifier
clf = KerasClassifier(build_fn=create_ann_model, verbose=0)
sc = StandardScaler()
pipeline = Pipeline([('sc', sc),('clf', clf)])

# Initiate and fit GridSearchCV
gs = GridSearchCV(pipeline, param_grid, cv=None, n_jobs=-1, verbose=0)
gs.fit(train_data, train_labels)

# Save results of all tested classifiers
from time import localtime, strftime
timestr = strftime("%Y-%m-%d-%H.%M", localtime())
cvstr = 'cv_scores_'+timestr+'.csv'
cv = pd.DataFrame(gs.cv_results_) 
cv.to_csv(cvstr)

# Retrain best classifier on all available training data
best_clf_retrained = gs.best_estimator_.fit(train_data, train_labels)

In [None]:
# Show results, sorted on mean test score
cv[['mean_test_score', 'param_clf__layers', 'param_clf__dropout_rate', 
    'param_clf__epochs', 'param_clf__optimizer']].sort_values('mean_test_score', ascending=False)

The best classifier from the results above will be used to make predictions on the test set.

## Preprocessing test samples
We will assume for a moment that test set data only needs imputation on the 'Age' feature. Many operations from above need to be applied to the test set as well; loading the data, creating a DataFrame, One-Hot encoding the data, scaling the data:

In [None]:
# Load test data
test_data = pd.read_csv('../input/test.csv')

# Set DataFrame index
test_data.set_index(['PassengerId'], inplace=True)

# Extract passenger titles
patt = re.compile('\s(\S+\.)') # 1 whitespace character followed by several 
# non-whitespace characters followed by a dot
titles_test = np.array([re.search(patt, i)[1] for i in test_data['Name'].values])
# In the test data provided with the project (which will not be the test data on which
# my models will be evaluated), the title of 'Dona.' was present, which was not encountered
# in the training data. Passengers with the title of 'Dona.' will be merged with 'Noble.'. 
# New features that are encountered in the real test data but which are not accounted 
# for individually in this preprocessing stage will be assigned the most occuring value
# for this feature (Mr.)
test_data = test_data.assign(Title=titles_test)
test_data = test_data.drop('Name', axis=1)
# Regroup some Title values
test_data['Title'] = test_data['Title'].replace('Mlle.','Miss.')
test_data['Title'] = test_data['Title'].replace('Ms.','Miss.')  
test_data['Title'] = test_data['Title'].replace('Mme.','Mrs.')
test_data['Title'] = test_data['Title'].replace(['Capt.','Col.','Major.'],'Army.')
test_data['Title'] = test_data['Title'].replace(['Countess.','Don.', 'Dona.', 'Jonkheer.','Lady.','Sir.'],'Noble.')
# Set unknown values for Title feature to Mr
b_mask = test_data['Title'].isin(['Mr.','Sir.','Master.','Miss.','Mrs.','Lady.','Army.','Rev.', 'Noble.', 'Dr.'])
b_mask = ~b_mask
if b_mask.sum() > 0:
    patt2 = re.compile('.*')
    titles_test = test_data['Title'].copy()
    titles_test = titles_test.loc[b_mask].replace(patt2,'Mr.')
    test_data = test_data.drop('Title', axis=1)
    test_data = test_data.assign(Title=titles_test)

# Drop Ticket feature
test_data = test_data.drop('Ticket', axis=1)

# Generate Cabin_cat feature
cabin_cats = np.array([getCabinCat(cc) for cc in test_data['Cabin'].values])
test_data = test_data.assign(Cabin_cat=cabin_cats)
test_data = test_data.drop('Cabin', axis=1)
    
# Converting textual categorical features to numbers
sex_numerical = le_sex.transform(test_data['Sex'])
title_numerical = le_title.transform(test_data['Title'])
cabin_cat_numerical = le_cabin_cat.transform(test_data['Cabin_cat'])
embarked_numerical = le_embarked.transform(test_data['Embarked'])

# One-Hot encoding
sex_onehot = enc_sex.transform(sex_numerical.reshape(-1,1))
title_onehot = enc_title.transform(title_numerical.reshape(-1,1))
cabin_cat_onehot = enc_cabin_cat.transform(cabin_cat_numerical.reshape(-1,1))
embarked_onehot = enc_embarked.transform(embarked_numerical.reshape(-1,1))

# Add One-Hot labels to DataFrame
test_data = pdAssignWithOHLabel(test_data, 'Sex', 
                                 sex_onehot, sex_numerical_classes)
test_data = test_data.drop('Sex',axis=1)
test_data = pdAssignWithOHLabel(test_data, 'Title', 
                                 title_onehot, title_numerical_classes)
test_data = test_data.drop('Title',axis=1)
test_data = pdAssignWithOHLabel(test_data, 'Cabin_cat', 
                            cabin_cat_onehot, cabin_cat_numerical_classes)
test_data = test_data.drop('Cabin_cat',axis=1)
test_data = pdAssignWithOHLabel(test_data, 'Embarked', 
                            embarked_onehot, embarked_numerical_classes)
test_data = test_data.drop('Embarked',axis=1)

# Impute missing data in all features
# Add training and test data together, to more accurately find nearest neighbors
all_data = train_data_df.drop('Survived',axis=1).append(test_data)

# Define updated knnImpute function:
def knnImputeTest(test_arr, all_arr, tmp_imp_arr, feature, k=6): # improved one
    from sklearn.neighbors import KDTree
    row_idx = test_arr[pd.isnull(test_arr[feature])].index.tolist()
    tree = KDTree(tmp_imp_arr.values) # tmp_imp_arr is the scaled array without 
                                      # the null-containing feature
    #row_idx = np.add(row_idx, -1)
    for nan_v in row_idx:
        dist, ind = tree.query(tmp_imp_arr.loc[nan_v].values.reshape(1,-1), k)
        nn_vals = all_arr[feature].loc[ind[0][1:]]
        imp_val = np.floor(np.nanmean(nn_vals))+0.5 
        # Per the documentation on this Kaggle data set, estimated
        # 'Age' values are of the form x.5
        test_arr = test_arr.set_value(nan_v, feature, imp_val)
        all_arr = all_arr.set_value(nan_v, feature, imp_val)
    return test_arr, all_arr

feats = []
for feat in all_data.columns:
    feats.append(feat)
need_imp = np.empty([0,2])
for feat in feats:
    if pd.isnull(all_data[feat]).sum() > 0:
        need_imp = np.append(need_imp, np.array([[feat,pd.isnull(all_data[feat]).sum()]]), axis=0)

# Sort features by number of imputations that need to be performed
sort_idx = need_imp[:,1].argsort()
need_imp = need_imp[sort_idx]

for ii in range(len(need_imp)):
    tmp = all_data = train_data_df.copy().drop('Survived',axis=1).append(test_data)
    tmp = tmp.drop(list(need_imp[ii:,0]), axis=1)
    sc = StandardScaler()
    tmp_scaled =  pd.DataFrame(sc.fit_transform(tmp), columns=tmp.columns, index = tmp.index)
    test_data, all_data = knnImputeTest(test_data, all_data, tmp_scaled, need_imp[ii,0], 11)

# Check if imputation was done correctly:
if pd.isnull(test_data).sum().sum() > 0:
    raise ImputeError('{} NaNs in the data, so data was not imputed correctly'.format(
        pd.isnull(test_data).sum().sum()))

# Make sure column order is the same as in the training data, so that scaling can be performed
columns = ['Age', 'Pclass', 'SibSp', 'Parch', 'Fare', 'Sex_female', 'Sex_male',
       'Title_Army.', 'Title_Dr.', 'Title_Master.', 'Title_Miss.', 'Title_Mr.',
       'Title_Mrs.', 'Title_Noble.', 'Title_Rev.', 'Cabin_cat_A',
       'Cabin_cat_B', 'Cabin_cat_C', 'Cabin_cat_D', 'Cabin_cat_E',
       'Cabin_cat_F', 'Cabin_cat_G', 'Cabin_cat_None', 'Cabin_cat_T',
       'Embarked_C', 'Embarked_Q', 'Embarked_S']
test_data = test_data[columns]
    
# Scale the test data using the StandardScaler that was fit on the original training data,
# 'sc_training'
tmp = test_data.copy()
tmp = pd.DataFrame(sc_training.transform(tmp),columns=test_data.columns, index=test_data.index)
test_data_scaled = tmp

## Predicting test samples

In [None]:
# Classify the test data with best classifier
preds = np.empty([test_data_scaled.values.shape[0],1])
preds = best_clf_retrained.predict(test_data_scaled)
preds = np.round(preds).astype('int')

# Write predictions to csv file
preds_csv = pd.DataFrame(test_data.index.values, columns=['PassengerId'])
preds_csv = preds_csv.assign(Survived=preds)
csv_str = 'preds_'+timestr+'.csv'
preds_csv.to_csv(csv_str, index=False)

preds_csv