# Implemenation Deep Learning for Dataset Titanic 

In [0]:
#Some librarys
import numpy as np
import pandas as pd

### Mount My Drive

In [20]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


###  Colleting Data

In [21]:
X_train = pd.read_csv(r'/content/drive/My Drive/Titanic/train.csv')
X_test_truth = pd.read_csv(r'/content/drive/My Drive/Titanic/test.csv')
X_train.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


### Data Wrabling

In [22]:
# Delete some columns that is not concerned
def drop_not_concerned_columns(data, columns):
    return data.drop(columns, axis=1)

columns = ['PassengerId', 'Name', 'Ticket', 'Fare', 'Cabin', 'Embarked']
X_train = drop_not_concerned_columns(X_train, columns)
X_test_truth = drop_not_concerned_columns(X_test_truth, columns)
X_train.head(5)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch
0,0,3,male,22.0,1,0
1,1,1,female,38.0,1,0
2,1,3,female,26.0,0,0
3,1,1,female,35.0,1,0
4,0,3,male,35.0,0,0


In [23]:
nan_columns = ['Age', 'SibSp', 'Parch']
print(len(X_train))
X_train = X_train.dropna()
X_test_truth = X_test_truth.dropna()
print(len(X_train))

891
714


In [24]:
#normalize 
def dummy_data(data, columns):
    for column in columns:
        data = pd.concat([data, pd.get_dummies(data[column], prefix=column)], axis=1)
        data = data.drop(column, axis=1)
        
    return data

dum_columns = ['Pclass']
X_train = dummy_data(X_train, dum_columns)
X_test_truth = dummy_data(X_test_truth, dum_columns)
X_train.head(5)

Unnamed: 0,Survived,Sex,Age,SibSp,Parch,Pclass_1,Pclass_2,Pclass_3
0,0,male,22.0,1,0,0,0,1
1,1,female,38.0,1,0,1,0,0
2,1,female,26.0,0,0,0,0,1
3,1,female,35.0,1,0,1,0,0
4,0,male,35.0,0,0,0,0,1


### Pre-processing

In [25]:
# Transforming Sex to int and Normalize Age
from sklearn.preprocessing import LabelEncoder, StandardScaler

def sex_int(data):
    le = LabelEncoder()
    le.fit(['male', 'female'])
    data['Sex'] = le.transform(data['Sex'])
    return data

def normalize_age(data):
    ss = StandardScaler()
    data['Age'] = ss.fit_transform(data['Age'].values.reshape(-1, 1))
    return data

X_train = sex_int(X_train)
X_train = normalize_age(X_train)
X_test_truth = sex_int(X_test_truth)
X_test_truth = normalize_age(X_test_truth)

X_train.head(5)

Unnamed: 0,Survived,Sex,Age,SibSp,Parch,Pclass_1,Pclass_2,Pclass_3
0,0,1,-0.530377,1,0,0,0,1
1,1,0,0.571831,1,0,1,0,0
2,1,0,-0.254825,0,0,0,0,1
3,1,0,0.365167,1,0,1,0,0
4,0,1,0.365167,0,0,0,0,1


In [26]:
def split_valid_test_data(data, fraction=0.8):
    data_y = data["Survived"]
    data_x = data.drop(["Survived"], axis=1)

    train_valid_split_idx = int(len(data_x) * fraction)
    train_x = data_x[:train_valid_split_idx]
    train_y = data_y[:train_valid_split_idx]

    valid_test_split_idx = (len(data_x) - train_valid_split_idx) // 2
    test_x = data_x[train_valid_split_idx + valid_test_split_idx:]
    test_y = data_y[train_valid_split_idx + valid_test_split_idx:]

    return train_x.values, train_y.values.reshape(-1, 1), test_x.values, test_y.values.reshape(-1, 1)

X_train, y_train, X_test, y_test = split_valid_test_data(X_train)

from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train)

print("X_train:{}".format(X_train.shape))
print("train_y:{}".format(y_train.shape))

print("X_val:{}".format(X_val.shape))
print("y_val:{}".format(y_val.shape))

print("X_test:{}".format(X_test.shape))
print("y_test:{}".format(y_test.shape))

X_train:(428, 7)
train_y:(428, 1)
X_val:(143, 7)
y_val:(143, 1)
X_test:(72, 7)
y_test:(72, 1)


### Train & Test and Accuracy Check

In [50]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import RMSprop, Adam, SGD

model = Sequential()


model.add(Dense(512, input_dim=X_train.shape[1]))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(256, input_dim=512))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(128, input_dim=256))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(64, input_dim=128))
model.add(Activation('relu'))
model.add(Dropout(0.25))

model.add(Dense(32, input_dim=32))
model.add(Activation('relu'))
model.add(Dropout(0.15))

model.add((Dense(1, input_dim=32)))
model.add(Activation('sigmoid'))

sgd = RMSprop(lr=0.0001)
model.compile(optimizer = sgd,
              loss = 'mean_squared_error',
              metrics = ['accuracy'])

model.fit(x=X_train, y=y_train, batch_size=512, epochs=500, verbose=1, 
          validation_data=(X_val, y_val))


Train on 428 samples, validate on 143 samples
Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
E

<keras.callbacks.History at 0x7f5645ea7748>

In [51]:
score = model.evaluate(X_test, y_test)
print("")
print("Test loss:{0}".format(score[0]))
print("Test accuracy:{0}".format(score[1]))


Test loss:0.10805530101060867
Test accuracy:0.8472222222222222
