In [1]:
# Required Packages
#!pip install -q pandas_profiling
import numpy as np
import pandas as pd
import pandas_profiling

from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold


import time
from datetime import timedelta

Using TensorFlow backend.


In [2]:
# some configuratin flags and variables
verbose=1 # Use in classifier

# Input files
train_url='https://raw.githubusercontent.com/muranjan/datarepo/master/titanic/train.csv'
test_url='https://raw.githubusercontent.com/muranjan/datarepo/master/titanic/test.csv'

# defeine random seed for reproducibility
seed = 123
np.random.seed(seed)

# read training data
train = pd.read_csv(train_url,index_col='PassengerId')

print(train.shape)
print(train.columns)


(891, 11)
Index(['Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket',
       'Fare', 'Cabin', 'Embarked'],
      dtype='object')


In [3]:
# preview the training dara
#pandas_profiling.ProfileReport(train)
# getting error pandas_profiling like TypeError: concat() got an unexpected keyword argument 'join_axes' 
train[:10]


Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [4]:
train.isnull().sum()
#train['Embarked'].value_counts()

Survived      0
Pclass        0
Name          0
Sex           0
Age         177
SibSp         0
Parch         0
Ticket        0
Fare          0
Cabin       687
Embarked      2
dtype: int64

In [0]:
def FeatureEngineering(df):
    # Drop unwanted features
    df = df.drop(['Name', 'Ticket', 'Cabin'], axis=1)
    
    # Fill missing data: Age and Fare with the mean, Embarked with most frequent value
    df[['Age']]       = df[['Age']].fillna(value=df[['Age']].mean())
    df[['Fare']]      = df[['Fare']].fillna(value=df[['Fare']].mean())
    df[['Embarked']]  = df[['Embarked']].fillna(value=df['Embarked'].value_counts().idxmax())
    
    # Convert categorical  features into numeric
    df['Sex'] = df['Sex'].map( {'female': 1, 'male': 0} ).astype(int)
      
    # Convert Embarked to one-hot
    enbarked_one_hot = pd.get_dummies(df['Embarked'], prefix='Embarked')
    df = df.drop('Embarked', axis=1)
    df = df.join(enbarked_one_hot)

    return df

In [6]:
train = FeatureEngineering(train)
train.isnull().sum()

Survived      0
Pclass        0
Sex           0
Age           0
SibSp         0
Parch         0
Fare          0
Embarked_C    0
Embarked_Q    0
Embarked_S    0
dtype: int64

In [0]:
# Let's create X & Y from the train data where Y will have target variable i.e Survived
X = train.drop(['Survived'], axis=1).values.astype(float)
Y = train['Survived'].values

# Appliying StandardScaler will make Mean for each column as 9 and Standard Deviation as 1.

scale = StandardScaler()
X = scale.fit_transform(X)



In [0]:
def create_model():
    
    # create model
    model = Sequential()
    model.add(Dense(16, input_dim=X.shape[1],  activation='relu'))
    model.add(Dense(8,  activation='relu'))
    model.add(Dense(4,  activation='relu'))
    model.add(Dense(1,  activation='sigmoid'))
    
    # Compile model
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [9]:

# Create a classifier with best parameters
modelFinal = KerasClassifier(build_fn=create_model,epochs=100, batch_size=5, verbose=1)

modelFinal.fit(X, Y)

# Read test data
test = pd.read_csv(test_url,index_col='PassengerId')

# Use FeatureEngineering Function which we used for Train data
test = FeatureEngineering(test)

# Create X_test
X_test = test.values.astype(float)

# Scaling
X_test = scale.transform(X_test)

# Predict 'Survived'
prediction = modelFinal.predict(X_test)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [0]:
submission = pd.DataFrame({'PassengerId': test.index,'Survived': prediction[:,0]})

submission.sort_values('PassengerId', inplace=True)    
submission.to_csv('Titanic-test_results.csv', index=False)