In [None]:
import pandas as pd
import numpy as np
import warnings

warnings.filterwarnings('ignore')
train_dataset = pd.read_csv('../input/titanic/train.csv')
test_dataset = pd.read_csv('../input/titanic/test.csv')

train_data = train_dataset.drop("Survived", axis=1)
whole_dataset = pd.concat([train_data, test_dataset], axis=0)

In [None]:
whole_dataset.head()

In [None]:
whole_dataset.info()

In [None]:
whole_dataset.describe()

In [None]:
whole_dataset.describe(include=['O'])

In [None]:
whole_dataset.drop(["Ticket","Cabin"], axis=1, inplace=True)

In [None]:
whole_dataset["Embarked"] = whole_dataset["Embarked"].fillna("S")
whole_dataset["Fare"] = whole_dataset["Fare"].fillna(whole_dataset["Fare"].median())

In [None]:
whole_dataset['Title'] = whole_dataset['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
train_dataset['Title'] = train_dataset['Name'].str.extract(' ([A-Za-z]+)\.', expand=False)
whole_dataset.drop("Name",axis=1,inplace=True)
train_dataset.drop("Name",axis=1,inplace=True)

In [None]:
title_mapping = {"Mr": "Mr", "Miss": "Miss", "Mrs": "Mrs", "Master": "Other", "Dr":"Other", "Rev":"Other", "Col": "Other", "Major": "Other", "Mlle": "Other","Countess": "Other", "Ms": "Other", "Lady": "Other", "Jonkheer": "Other", "Don": "Other", "Dona" : "Other", "Mme": "Other","Capt": "Other","Sir": "Other" }
whole_dataset['Title'] = whole_dataset['Title'].map(title_mapping)
train_dataset['Title'] = train_dataset['Title'].map(title_mapping)

In [None]:
whole_dataset["FamilySize"] = whole_dataset["SibSp"] + whole_dataset["Parch"] + 1

In [None]:
whole_dataset.info()

In [None]:
whole_dataset.head()

In [None]:
pclass_band = train_dataset[['Pclass', 'Survived']].groupby(['Pclass'], as_index=False).mean().sort_values(by='Pclass', ascending=True)
pclass_mapping = dict(pclass_band.values)
whole_dataset["Pclass"] = whole_dataset["Pclass"].map(pclass_mapping)
pclass_band

In [None]:
sex_band = train_dataset[['Sex', 'Survived']].groupby(['Sex'], as_index=False).mean().sort_values(by='Sex', ascending=True)
sex_mapping = dict(sex_band.values)
whole_dataset["Sex"] = whole_dataset["Sex"].map(sex_mapping)
sex_band

In [None]:
embarked_band = train_dataset[['Embarked', 'Survived']].groupby(['Embarked'], as_index=False).mean().sort_values(by='Embarked', ascending=True)
embarked_mapping = dict(embarked_band.values)
whole_dataset["Embarked"] = whole_dataset["Embarked"].map(embarked_mapping)
embarked_band

In [None]:
title_band = train_dataset[['Title', 'Survived']].groupby(['Title'], as_index=False).mean().sort_values(by='Title', ascending=True)
title_mapping = dict(title_band.values)
whole_dataset["Title"] = whole_dataset["Title"].map(title_mapping)
title_band

In [None]:
fill_data = whole_dataset.drop(["PassengerId"], axis=1)
fill_train_data = fill_data[~fill_data["Age"].isnull()]
fill_train_label = fill_train_data["Age"]
fill_train_data.drop("Age", axis=1, inplace=True)
fill_test_data = fill_data[fill_data["Age"].isnull()]
fill_test_data.drop("Age", axis=1, inplace=True)

In [None]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(fill_train_data, fill_train_label)
fill_test_age = lr.predict(fill_test_data)
fill_test_age[fill_test_age < 0] = 0
whole_dataset["Age"][whole_dataset["Age"].isnull()] = fill_test_age

In [None]:
whole_dataset.info()

In [None]:
whole_dataset.head()

In [None]:
from sklearn.preprocessing import StandardScaler

standard_scaler = StandardScaler()
whole_dataset.drop('PassengerId', axis=1, inplace=True)
whole_dataset = standard_scaler.fit_transform(whole_dataset)

In [None]:
labeled_data = whole_dataset[:len(train_dataset)]
unlabeled_data = whole_dataset[len(train_dataset):]
labels = train_dataset["Survived"]

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split

training_data, validation_data, training_label, validation_label = train_test_split(labeled_data, labels, test_size=0.20, stratify=labels, random_state=42)
print("train_shape: ", training_data.shape)
print("test_shape: ", validation_data.shape)

In [None]:
model = Sequential()
model.add(Dense(units=32, activation='relu', input_shape=[9]))
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=32, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(units=32, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(units=32, activation='relu'))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
callback1 = EarlyStopping(restore_best_weights=True, monitor='val_loss', patience=5, verbose=1)
history = model.fit(training_data, training_label, validation_data=(validation_data, validation_label), callbacks=[callback1], epochs=30, verbose=2)

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history["accuracy"], label='train_acc')
plt.plot(history.history["val_accuracy"], label='val_acc')
plt.title('Learning curve for model')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
plt.plot(history.history["loss"], label='train_loss')
plt.plot(history.history["val_loss"], label='val_loss')
plt.title('Learning curve for model')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
prediction = model.predict_classes(unlabeled_data).flatten()
test_id = test_dataset["PassengerId"]
submission = pd.DataFrame({"PassengerId": test_id,"Survived": prediction})
submission.to_csv('submission.csv', index=False)
submission = pd.read_csv('submission.csv')