## 1. Titanic ML Models

1. Random Forest
2. LGBM
3. XGB
4. LinearSVC
5. GaussianNB

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import seaborn as sb

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_df=pd.read_csv('/kaggle/input/titanic/train.csv')
test_df=pd.read_csv('/kaggle/input/titanic/test.csv')
train_df.head()

In [None]:
test_df.head()

### Clearly class matters as per the graph

In [None]:
sb.barplot(x="Pclass", y="Survived", data=train_df, color="b")

### And Gender also matters

In [None]:
sb.barplot(x="Sex", y="Survived", data=train_df, color="b")

In [None]:
sb.barplot(x="SibSp", y="Survived", data=train_df)

In [None]:
sb.barplot(x="Parch", y="Survived", data=train_df)

In [None]:
sb.barplot(x="Survived", y="Fare", data=train_df)

In [None]:
sb.barplot(x="Embarked", y="Survived", data=train_df)

### Checking for null values

In [None]:
train_df=train_df.drop("PassengerId",axis=1)
train_df=train_df.drop("Name",axis=1)
train_df=train_df.drop("Ticket",axis=1)
train_df=train_df.drop("Cabin",axis=1)

In [None]:
test_passenger_Id= test_df["PassengerId"]
test_df=test_df.drop("PassengerId",axis=1)
test_df=test_df.drop("Name",axis=1)
test_df=test_df.drop("Ticket",axis=1)
test_df=test_df.drop("Cabin",axis=1)

In [None]:
train_df.head()

### Now we need to check for the data type as we can only enter ints/floats to our model

In [None]:
for cols in train_df:
    print("col : {} -- {}= {}".format(type(train_df[cols][0]),cols,train_df[cols].isnull().sum()))

In [None]:
for cols in test_df:
    print("col : {} -- {}= {}".format(type(test_df[cols][0]),cols,test_df[cols].isnull().sum()))

### Filling in the missing values 

In [None]:
train_df = train_df.fillna(train_df['Age'].mean())
train_df = train_df.fillna(train_df['Embarked'].mode())

test_df = test_df.fillna(test_df['Age'].mean())
test_df = test_df.fillna(test_df['Fare'].mean())

### LabelEncoding the categorical values to change them to int

In [None]:
from sklearn.preprocessing import LabelEncoder

# train_df['Sex'] = train_df['Sex'].astype('category')
# train_df['Embarked'] = train_df['Embarked'].astype('category')
# train_df = pd.get_dummies(train_df)
# train_df=train_df.drop('Embarked_29.69911764705882')

# test_df['Sex'] = test_df['Sex'].astype('category')
# test_df['Embarked'] = test_df['Embarked'].astype('category')
# test_df = pd.get_dummies(test_df)
labelencoder = LabelEncoder()
train_df['Sex'] = labelencoder.fit_transform(train_df['Sex'].astype(str))
train_df['Embarked'] = labelencoder.fit_transform(train_df['Embarked'].astype(str))

test_df['Sex'] = labelencoder.fit_transform(test_df['Sex'].astype(str))
test_df['Embarked'] = labelencoder.fit_transform(test_df['Embarked'].astype(str))

test_df.head()

## Importing ML Libraries

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import MultinomialNB
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

X_train,X_val,Y_train,Y_val = train_test_split(train_df.drop('Survived',axis=1),train_df["Survived"],test_size=0.2, random_state=42)

### In all of my submissions, RandomForest stood out for the results.

In [None]:
model = RandomForestClassifier(criterion='entropy',n_estimators=200,max_depth=9,random_state=7,class_weight='balanced')
model.fit(X_train, Y_train)
Y_pred = model.predict(X_val)
print(accuracy_score(Y_pred,Y_val))

### Checking out other models. Uncomment and Run

In [None]:
# model = LGBMClassifier(random_state=10,max_depth= 50,learning_rate=0.05)
# model.fit(X_train, Y_train)
# Y_pred = model.predict(X_val)
# print(accuracy_score(Y_pred,Y_val))

In [None]:
# model = XGBClassifier()
# model.fit(X_train, Y_train)
# Y_pred = model.predict(X_val)
# print(accuracy_score(Y_pred,Y_val))

In [None]:
# model = LinearSVC(random_state=0, tol=1e-5)
# model.fit(X_train, Y_train)
# Y_pred = model.predict(X_val)
# print(accuracy_score(Y_pred,Y_val))

In [None]:
# model = GaussianNB()
# model.fit(X_train, Y_train)
# Y_pred = model.predict(X_val)
# print(accuracy_score(Y_pred,Y_val))

## Saving the Submission File

In [None]:
predictions = model.predict(test_df)
output = pd.DataFrame({'PassengerId': test_passenger_Id, 'Survived': predictions})
output.to_csv('my_submission.csv', index=False)
print("Your submission was successfully saved!")

## 2. Titanic Deep Learning Model

In [None]:
from keras.models import Sequential, load_model
from keras.layers import Dense, Dropout, Flatten, Activation,InputLayer
from keras.optimizers import Adam, RMSprop
from keras.callbacks import TensorBoard, EarlyStopping, ModelCheckpoint

### This step converts our dataframes as passable to the Nueral Networks

In [None]:
train_df=pd.get_dummies(train_df)
test_df= pd.get_dummies(test_df)

train_df.head()

### A Simple Nueral Network. You can try your hands on changing the architecture

In [None]:
t_model = Sequential()

t_model.add(InputLayer(input_shape=(7,)))
t_model.add(Dense(1024, activation='relu'))
t_model.add(Dense(512, activation='relu'))
t_model.add(Dense(256, activation='relu'))
t_model.add(Dense(128, activation='relu'))
t_model.add(Dense(64, activation='relu'))

t_model.add(Dense(32, activation='relu'))
t_model.add(Dense(1, activation='sigmoid'))

t_model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [None]:
t_model.fit(train_df.drop('Survived',axis=1),train_df["Survived"], epochs=100, batch_size=10,verbose=1)

In [None]:
# preds= t_model.predict(test_df)
# predictions= [0 if pred < 0.5 else 1 for pred in preds]
# output = pd.DataFrame({'PassengerId': test_passenger_Id, 'Survived': predictions})
# output.to_csv('my_submission.csv', index=False)
# print("Your submission was successfully saved!")