**1 Gather the Data**

Add the dataset of Titanic into the programe

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

**2 Data Preparation**

* Split our data into test training data and test data.
* Split both training data and test data into features data and label data
* Handle with the missing data(Handle with the problems caused by NaN)

**2.1 Prepare the traing data**

In [None]:
train = pd.read_csv('/kaggle/input/titanic/train.csv')

In [None]:
train.head()

In my opinion, 'Name' and 'Cabin' and 'Embarked' have nothing to do with the possibility of survival of the passengers. So I set up 'df1' with the rest of the features of the passengers.

In [None]:
df1 = train[['PassengerId','Survived','Pclass','Sex','Age','SibSp','Parch','Fare']]

In [None]:
df1.head()

In [None]:
df1.shape

Count the number of 'NaN'

In [None]:
p = df1.isnull().sum().sum()
print(p)

In my opinion, filling the 'NaN' with other data which are fake or calculated artificially will decrease the accuracy of our prediction model in this case. So I choose to delete the rows which contain the missing data.

In [None]:
df1_dl=df1.dropna(axis = 0)

Make sure there is no missing data.

In [None]:
q = df1_dl.isnull().sum().sum()
print(q)

Try to use '1' or '0' to describe 'Sex' instead of strings.
* 1 -> male
* 0 -> female

* Set up a new column named 'Sex_value' 
* Set the 'Sex_value' of every passenger according to 'Sex'

In [None]:
df1_dl['Sex_value'] = df1_dl['Sex']

In [None]:
df1_dl

In [None]:
df1_dl.loc[df1_dl['Sex'] == 'male','Sex_value'] = 1
df1_dl.loc[df1_dl['Sex'] == 'female','Sex_value'] = 0

In [None]:
df1_dl

Set up 'df1_train' without column 'Sex'

In [None]:
df1_train = df1_dl[['Survived','Pclass','Age','SibSp','Parch','Fare','Sex_value']]

In [None]:
df1_train

Split features and labels

In [None]:
x_train = df1_train.iloc[:,1:]
y_train = df1_train.iloc[:,0]

In [None]:
x_train.head()

In [None]:
y_train.head()

**2.2 Prepare the test data**
* Almost the same as preparing the training data

In [None]:
test = pd.read_csv('/kaggle/input/titanic/test.csv')

In [None]:
test

In [None]:
df2 = test[['PassengerId','Pclass','Sex','Age','SibSp','Parch','Fare']]
G_S = pd.read_csv('/kaggle/input/titanic/gender_submission.csv')

In [None]:
df2

In [None]:
G_S

Add the column 'Survived' of dataframe 'Survived' into dataframe 'df2' in order to delete the rows which contain missing data conveniently.

In [None]:
df2['Survived'] = G_S['Survived']

In [None]:
df2

In [None]:
m = df2.isnull().sum().sum()
print(m)

In [None]:
df2_dl=df2.dropna(axis = 0)

In [None]:
n = df2_dl.isnull().sum().sum()
print(n)

In [None]:
df2_dl

In [None]:
df2_dl['Sex_value'] = df2_dl['Sex']
df2_dl.loc[df2_dl['Sex'] == 'male','Sex_value'] = 1
df2_dl.loc[df2_dl['Sex'] == 'female','Sex_value'] = 0

In [None]:
df2_dl

In [None]:
df2_test = df2_dl[['Pclass','Age','SibSp','Parch','Fare','Sex_value','Survived']]

In [None]:
df2_test

In [None]:
x_test = df2_test.iloc[:,0:6]
y_test = df2_test.iloc[:,6]

In [None]:
x_test

In [None]:
y_test

Check the shape of dataframe 

In [None]:
print(x_train.shape,y_train.shape,x_test.shape,y_test.shape)

Make sure there is no NaN in our data frame

In [None]:
x1 = x_train.isnull().sum().sum()
y1 = y_train.isnull().sum().sum()
x2 = x_test.isnull().sum().sum()
y2 = y_test.isnull().sum().sum()
print(x1,y1,x2,y2)

**3 Train and Evaluate the Model**

Let's see how LogisticRegression perform in this case 

In [None]:
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression()
clf.fit(x_train,y_train)

In [None]:
clf.score(x_test,y_test)

**4 Predict the Particular Data**

In [None]:
clf.predict(x_test[301:306])

In [None]:
y_test[301:306]

**5 Other Models**

* 5.1 SVM
* 5.2 RandomForestClassifier

>>5.1 SVM

In [None]:
from sklearn import svm

clf = svm.SVC()
clf.fit(x_train,y_train)

In [None]:
clf.score(x_test,y_test)

>>5.2 RandomForestClassifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier()
clf.fit(x_train,y_train)

In [None]:
clf.score(x_test,y_test)

In [None]:
import tensorflow as tf
model = tf.keras.models.Sequential([
  tf.keras.layers.Dense(256, activation='relu', input_shape = [x_train.shape[1]]),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(128, activation='relu'),  
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(64, activation='relu'),  
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(1)                     
])

optimizer = tf.keras.optimizers.RMSprop(0.001)

model.compile(optimizer=optimizer,
              loss='mse',
              metrics=['mae','mse'])

In [None]:
print(model.summary())

In [None]:
tf.keras.utils.plot_model(
    model,
    to_file='model.png',
    show_shapes=True,
    show_layer_names=True,
    rankdir='TB',
)

In [None]:
class PrintDot(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 100 == 0: print('')
        print('.', end='')

EPOCHS = 2000

history = model.fit(
    x_train, y_train,
    epochs=EPOCHS, 
    validation_data=(x_test, y_test), 
    verbose=0,
    callbacks=[PrintDot()],
)

In [None]:
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()