In [None]:
# https://code.visualstudio.com/docs/python/data-science-tutorial
import pandas as pd
import numpy as np
# Read Titanic dataset
data = pd.read_csv('data.csv')



In [None]:
# Fix missing values and update data types
data.replace('?', np.nan, inplace= True)
data = data.astype({"age": np.float64, "fare": np.float64})

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Generate plots
fig, axs = plt.subplots(ncols=5, figsize=(30,5))
sns.violinplot(x="survived", y="age", hue="sex", data=data, ax=axs[0])
sns.pointplot(x="sibsp", y="survived", hue="sex", data=data, ax=axs[1])
sns.pointplot(x="parch", y="survived", hue="sex", data=data, ax=axs[2])
sns.pointplot(x="pclass", y="survived", hue="sex", data=data, ax=axs[3])
sns.violinplot(x="survived", y="fare", hue="sex", data=data, ax=axs[4])

In [None]:
# Correlate the relationship between all variables and survival
data.replace({'male': 1, 'female': 0}, inplace=True)
data.corr().abs()[["survived"]]

In [None]:
# Create new column relatives 
data['relatives'] = data.apply (lambda row: int((row['sibsp'] + row['parch']) > 0), axis=1)
data.corr().abs()[["survived"]]

In [None]:
# Drop low correlation columns and incomplete rows
data = data[['sex', 'pclass','age','relatives','fare','survived']].dropna()
data.corr().abs()[["survived"]]

In [None]:
# Divide up dataset into training and validation data
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(data[['sex','pclass','age','relatives','fare']], data.survived, test_size=0.2, random_state=0)

In [None]:
# Normalize data
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(x_train)
X_test = sc.transform(x_test)

In [None]:
# Train with Naïve Bayes algorithm
from sklearn.naive_bayes import GaussianNB
model = GaussianNB()
model.fit(X_train, y_train)


In [None]:
# Try against test data
from sklearn import metrics
predict_test = model.predict(X_test)
print(metrics.accuracy_score(y_test, predict_test))

In [None]:
# Use Keras to construct neural network 
from keras.models import Sequential
from keras.layers import Dense
# Create model
model = Sequential()
model.add(Dense(5, kernel_initializer = 'uniform', activation = 'relu', input_dim = 5))
model.add(Dense(5, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(1, kernel_initializer = 'uniform', activation = 'sigmoid'))
model.summary()

In [None]:
model.compile(optimizer="adam", loss='binary_crossentropy', metrics=['accuracy'])
model.fit(X_train, y_train, batch_size=32, epochs=50)

In [None]:
# Try against test data
y_pred = model.predict_classes(X_test)
print(metrics.accuracy_score(y_test, y_pred))