In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from scikeras.wrappers import KerasClassifier

In [None]:
input_data = pd.read_csv("train.csv", index_col='PassengerId')

def set_age(name):
    if(name.find('Mr.') != -1): #male - married or not
        return 30
    elif(name.find('Mrs.') != -1): #female - married
        return 45
    elif(name.find('Ms.') != -1): #female - married or not 
        return 30
    elif(name.find('Miss.') != -1): #female - not married 
        return 20
    else:
        return 45

def clean_dataset(dataset):
    popular_port = dataset['Embarked'].dropna().mode()[0] 
    print("This port is most popular and replace all 'NA' to " + popular_port)
    dataset['Embarked'] = dataset['Embarked'].fillna(popular_port) 
    dataset['Companions'] = dataset['SibSp'] + dataset['Parch'] #number of people travelling with
    dataset['Sex'] = dataset['Sex'].map( {'female': 1, 'male': 0} ).astype(int) #Map sex to 0-1\n",
    embarked_one_hot = pd.get_dummies(dataset['Embarked'], prefix='Embarked') #Embarks as one-hot
    dataset = dataset.join(embarked_one_hot) 
    dataset['Age'] = dataset.apply(
                            lambda row: set_age(row['Name']) if np.isnan(row['Age']) else row['Age'], axis=1)
    
    dataset = dataset.drop(['Cabin', 'Ticket', 'Name', 'SibSp', 'Parch', 'Embarked'], axis=1) #drop useless columns
    return(dataset)
    

train_data = clean_dataset(input_data)
train_data.to_csv('train_data_results.csv', index=False)
print("Prepared data:")
train_data

In [None]:
 # X and Y ('survived;) axis
 X = train_data.drop(['Survived'], axis=1).values.astype(float)

 Y = train_data['Survived'].values

 def create_model(optimizer='adam', init='uniform'):
     #create model
     model = keras.Sequential()
     model.add( keras.layers.Dense(16, input_dim=X.shape[1], kernel_initializer=init, activation='relu') )
     model.add( keras.layers.Dense(8, kernel_initializer=init, activation='relu') )
     model.add( keras.layers.Dense(4, kernel_initializer=init, activation='relu') )
     model.add( keras.layers.Dense(1, kernel_initializer=init, activation='sigmoid') )

     #compile model
     model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
     print("Model created")
     return model

In [None]:
epochs = 200
batch_size = 5
init = 'glorot_uniform'
optimizer = 'rmsprop'

#create classfier
model_predictions = KerasClassifier(
    model=create_model, optimizer=optimizer, init=init, epochs=epochs,
    batch_size=batch_size)

model_predictions.fit(X,Y)

In [None]:
#predictions for one person
d = {'Pclass': [3], 'Sex': [1], 'Age': [22.0], 'Fare': [5.222], 
            'Companions': [2], 'Embarked_C': [1], 'Embarked_Q': [0], 'Embarked_S':[0]}
df = pd.DataFrame(data=d)

X_data = df.values.astype(float)

prediction = model_predictions.predict(X_data)
if(prediction):
    print("You survived!")
else:
    print("Better luck next time...")
print(prediction)

In [None]:
#predictions using test dataset from kaggle
test_data = pd.read_csv("test.csv", index_col='PassengerId')

test_data = clean_dataset(test_data) #prepare data to same format as train data
#print(test_data.isnull().sum())  #one row has Fare NAN
test_data = test_data.fillna(10.000)
#print(test_data)
X_test = test_data.values.astype(float)
test_predictions = model_predictions.predict(X_test)
#print(test_predictions)
# create file to see the score on kaggle
submission = pd.DataFrame({
                'PassengerId': test_data.index,
                'Survived': test_predictions,
})
submission.sort_values('PassengerId', inplace=True)
submission.to_csv('submission.csv', index=False)

#prepare same dataframe as test for charts purposes
test_data['Survived'] = test_predictions

test_data.to_csv('test_data_results.csv', index = False)
test_data