# Implemenation Deep Learning for Dataset Titanic 

In [0]:
#Some librarys
import numpy as np
import pandas as pd

### Mount My Drive

In [148]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


###  Colleting Data

In [149]:
X_train = pd.read_csv(r'/content/drive/My Drive/Titanic/train.csv')
X_test_truth = pd.read_csv(r'/content/drive/My Drive/Titanic/test.csv')
X_train.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


### Data Wrabling

In [150]:
# Delete some columns that is not concerned
def drop_not_concerned_columns(data, columns):
    return data.drop(columns, axis=1)

columns = ['PassengerId', 'Name', 'Ticket', 'Cabin', 'Embarked']
X_train = drop_not_concerned_columns(X_train, columns)
X_test_truth = drop_not_concerned_columns(X_test_truth, columns)
X_train.head(5)

Unnamed: 0,Survived,Pclass,Sex,Age,SibSp,Parch,Fare
0,0,3,male,22.0,1,0,7.25
1,1,1,female,38.0,1,0,71.2833
2,1,3,female,26.0,0,0,7.925
3,1,1,female,35.0,1,0,53.1
4,0,3,male,35.0,0,0,8.05


In [151]:
nan_columns = ['Age', 'SibSp', 'Parch']
print(len(X_train))
X_train = X_train.dropna()
X_test_truth = X_test_truth.dropna()
print(len(X_train))

891
714


In [152]:
#normalize 
def dummy_data(data, columns):
    for column in columns:
        data = pd.concat([data, pd.get_dummies(data[column], prefix=column)], axis=1)
        data = data.drop(column, axis=1)
        
    return data

dum_columns = ['Pclass']
X_train = dummy_data(X_train, dum_columns)
X_test_truth = dummy_data(X_test_truth, dum_columns)
X_train.head(5)

Unnamed: 0,Survived,Sex,Age,SibSp,Parch,Fare,Pclass_1,Pclass_2,Pclass_3
0,0,male,22.0,1,0,7.25,0,0,1
1,1,female,38.0,1,0,71.2833,1,0,0
2,1,female,26.0,0,0,7.925,0,0,1
3,1,female,35.0,1,0,53.1,1,0,0
4,0,male,35.0,0,0,8.05,0,0,1


### Pre-processing

In [153]:
# Transforming Sex to int and Normalize Age
from sklearn.preprocessing import LabelEncoder, StandardScaler

def sex_int(data):
    le = LabelEncoder()
    le.fit(['male', 'female'])
    data['Sex'] = le.transform(data['Sex'])
    return data

def normalize_age(data):
    ss = StandardScaler()
    data['Age'] = ss.fit_transform(data['Age'].values.reshape(-1, 1))
    return data

X_train = sex_int(X_train)
X_train = normalize_age(X_train)
X_test_truth = sex_int(X_test_truth)
X_test_truth = normalize_age(X_test_truth)

X_train.head(5)

Unnamed: 0,Survived,Sex,Age,SibSp,Parch,Fare,Pclass_1,Pclass_2,Pclass_3
0,0,1,-0.530377,1,0,7.25,0,0,1
1,1,0,0.571831,1,0,71.2833,1,0,0
2,1,0,-0.254825,0,0,7.925,0,0,1
3,1,0,0.365167,1,0,53.1,1,0,0
4,0,1,0.365167,0,0,8.05,0,0,1


In [154]:
def split_valid_test_data(data, fraction=0.8):
    data_y = data["Survived"]
    data_x = data.drop(["Survived"], axis=1)

    train_valid_split_idx = int(len(data_x) * fraction)
    train_x = data_x[:train_valid_split_idx]
    train_y = data_y[:train_valid_split_idx]

    valid_test_split_idx = (len(data_x) - train_valid_split_idx) // 2
    test_x = data_x[train_valid_split_idx + valid_test_split_idx:]
    test_y = data_y[train_valid_split_idx + valid_test_split_idx:]

    return train_x.values, train_y.values.reshape(-1, 1), test_x.values, test_y.values.reshape(-1, 1)

X_train, y_train, X_test, y_test = split_valid_test_data(X_train)

from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train)

print("X_train:{}".format(X_train.shape))
print("train_y:{}".format(y_train.shape))

print("X_val:{}".format(X_val.shape))
print("y_val:{}".format(y_val.shape))

print("X_test:{}".format(X_test.shape))
print("y_test:{}".format(y_test.shape))

X_train:(428, 8)
train_y:(428, 1)
X_val:(143, 8)
y_val:(143, 1)
X_test:(72, 8)
y_test:(72, 1)


### Train & Test and Accuracy Check

In [0]:
# from keras.utils import np_utils

# print(y_train.shape)
# y_train = np_utils.to_categorical(y_train)
# y_val = np_utils.to_categorical(y_val)
# y_test = np_utils.to_categorical(y_test)

In [157]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import RMSprop, Adam, SGD

model = Sequential()


model.add(Dense(512, input_dim=X_train.shape[1]))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(128, input_dim=512))
model.add(Activation('relu'))
model.add(Dropout(0.5))

model.add(Dense(128, input_dim=128))
model.add(Activation('relu'))
model.add(Dropout(0.50))

model.add(Dense(64, input_dim=128))
model.add(Dropout(0.5))

# model.add(Dense(2, activation='softmax'))

model.add((Dense(1, input_dim=64)))
model.add(Activation('sigmoid'))

sgd = Adam(lr=0.01, beta_1=0.9)
model.compile(optimizer = sgd,
              loss = 'binary_crossentropy',
              metrics = ['accuracy'])

model.fit(x=X_train, y=y_train, batch_size=512, epochs=200, verbose=1, 
          validation_data=(X_val, y_val))


Train on 428 samples, validate on 143 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
E

<keras.callbacks.History at 0x7f56404bdbe0>

In [158]:

score = model.evaluate(X_test, y_test)
print("")
print("Test loss:{0}".format(score[0]))
print("Test accuracy:{0}".format(score[1]))


Test loss:0.3719721304045783
Test accuracy:0.8611111111111112


In [159]:
test_data = pd.read_csv(r'/content/drive/My Drive/Titanic/test.csv')
test_data = test_data.drop(['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Ticket', 'Cabin', 'Embarked', 'Fare', 'Name'], axis=1)
test_data.head()

Unnamed: 0,PassengerId
0,892
1,893
2,894
3,895
4,896


In [201]:
predicted = model.predict(X_test_truth, verbose=1)
print("Y_predicted: {}".format(predicted[1]))
result = list()
for predict in predicted:
    if predict > 0.5:
        result.append(1)
    else:
        result.append(0)
print(len(result))
print(len(predicted))
result = pd.DataFrame({"Survived":result})
result.head(5)

Y_predicted: [0.32393408]
331
331


Unnamed: 0,Survived
0,0
1,0
2,0
3,0
4,0


In [0]:
result.to_csv('/content/drive/My Drive/Titanic/test-output.csv')