In [28]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics
import warnings 
from sklearn.exceptions import ConvergenceWarning
warnings.filterwarnings('ignore', category=ConvergenceWarning)

In [2]:
raw_test_df = pd.read_csv('test.csv')
raw_train_df = pd.read_csv('train.csv')

train_df = raw_train_df.copy()
test_df = raw_test_df.copy()
test_df

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0000,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.0,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.5,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,,0,0,359309,8.0500,,S


In [3]:
# one hot encoding
def encode_categories(df):
    # One hot encode Embarked
    df = df.join(pd.get_dummies(df.pop('Embarked')))
    
    return df

In [4]:
features = ['Pclass','Sex','Age','SibSp','Parch','Fare','Cabin','C','Q','S']

# Drop Nan Values from the Age and Fare Columns
train_df = train_df.fillna(train_df.mean())


test_df = test_df.fillna(test_df.mean())

test_df

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.50000,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.00000,1,0,363272,7.0000,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.00000,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.00000,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.00000,1,1,3101298,12.2875,,S
...,...,...,...,...,...,...,...,...,...,...,...
413,1305,3,"Spector, Mr. Woolf",male,30.27259,0,0,A.5. 3236,8.0500,,S
414,1306,1,"Oliva y Ocana, Dona. Fermina",female,39.00000,0,0,PC 17758,108.9000,C105,C
415,1307,3,"Saether, Mr. Simon Sivertsen",male,38.50000,0,0,SOTON/O.Q. 3101262,7.2500,,S
416,1308,3,"Ware, Mr. Frederick",male,30.27259,0,0,359309,8.0500,,S


In [5]:
# We need to encode the sex, cabin, and embarket columns

train_df['Sex'] = train_df['Sex'].map(lambda x: 1 if x == 'male' else 0)
train_df['Cabin'] = train_df['Cabin'].map(lambda x: 0 if pd.isna(x) else 1)
train_df = encode_categories(train_df)


test_df['Sex'] = test_df['Sex'].map(lambda x: 1 if x == 'male' else 0)
test_df['Cabin'] = test_df['Cabin'].map(lambda x: 0 if pd.isna(x) else 1)
test_df = encode_categories(test_df)

In [6]:
# Survived is our target column
train_df_target = train_df['Survived']
# Features are all columns except for the target column, name, and IDs
train_df_features = train_df.drop(['Survived','Name','PassengerId','Ticket'],axis = 1)


In [21]:
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler, MinMaxScaler
import sklearn.neural_network as NN



model = NN.MLPClassifier()
model.fit(train_df_features,train_df_target)



MLPClassifier()

In [22]:
model.get_params().keys()

dict_keys(['activation', 'alpha', 'batch_size', 'beta_1', 'beta_2', 'early_stopping', 'epsilon', 'hidden_layer_sizes', 'learning_rate', 'learning_rate_init', 'max_fun', 'max_iter', 'momentum', 'n_iter_no_change', 'nesterovs_momentum', 'power_t', 'random_state', 'shuffle', 'solver', 'tol', 'validation_fraction', 'verbose', 'warm_start'])

In [23]:
# Activation Function 
mlpclassifier__activation = ['relu','identity','logistic','tanh']
# Number of hidden layers and nodes
hidden_layer_sizes = [(50,50,50), (40,60,60,40), (100,)]
# Solver Used
solver = ['sgd','adam']
# Regularization Parameter
alpha = [0.0001,0.05]
# weight updates
learning_rate = ['constant','adaptive']


In [29]:
from sklearn.model_selection import GridSearchCV


parameters = {'activation': mlpclassifier__activation,'hidden_layer_sizes': hidden_layer_sizes,'solver': solver,'alpha': alpha,'learning_rate':learning_rate}

clf = GridSearchCV(model, param_grid = parameters, cv = 5, n_jobs = -1)
clf.fit(train_df_features,train_df_target)
clf.best_params_


{'activation': 'logistic',
 'alpha': 0.05,
 'hidden_layer_sizes': (40, 60, 60, 40),
 'learning_rate': 'constant',
 'solver': 'adam'}

In [30]:
model = model.set_params(**clf.best_params_)
model.fit(train_df_features,train_df_target)

MLPClassifier(activation='logistic', alpha=0.05,
              hidden_layer_sizes=(40, 60, 60, 40))

In [31]:
x_test = pd.get_dummies(test_df[features])
predictions = model.predict(x_test)

output = pd.DataFrame({'PassengerId': test_df.PassengerId, 'Survived': predictions})
output.to_csv('NNsubmission.csv',index = False)