In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.font_manager as fm
import math
font = fm.FontProperties(fname='../input/acmeregular/Acme-Regular.ttf')

train = pd.read_csv("../input/titanic/train.csv")
test = pd.read_csv("../input/titanic/test.csv")
submission = pd.read_csv("../input/titanic/gender_submission.csv")

train.head(3)

# Data Analysis

### 1. PassengerId

I think 'PassengerId' has no meanning. So I dropped it

In [None]:
train = train.drop('PassengerId',axis=1)
test = test.drop('PassengerId',axis=1)
train.head(3)

### 2. Pclass

Pclass is a important data. Look these charts.

The 1st class's survived rate is 63%, the 2nd class's survived rate is 47.3%, and the 3rd class's survived rate is 24.2%

In [None]:
total = dict(train['Pclass'].value_counts())
survived = dict(train.groupby('Pclass')['Survived'].sum())

explode = (0, 0.15)

fig, ax = plt.subplots(figsize=(25,10),facecolor="#363336")
ax.patch.set_facecolor('#363336')

spec = gridspec.GridSpec(ncols=12, nrows=8, figure=fig)
ax1 = fig.add_subplot(spec[:5, :4],facecolor="#363336")
ax2 = fig.add_subplot(spec[:5, 4:8],facecolor="#363336")
ax3 = fig.add_subplot(spec[:5, 8:],facecolor="#363336")
ax4 = fig.add_subplot(spec[7:, :],facecolor="#363336")


ax1.pie([survived[1],total[1]-survived[1]], explode=explode, shadow=True, startangle=90, colors =['#F2F2F2','#706B70'])
ax2.pie([survived[2],total[2]-survived[2]], explode=explode, shadow=True, startangle=90, colors =['#F2F2F2','#706B70'])
ax3.pie([survived[3],total[3]-survived[3]], explode=explode, shadow=True, startangle=90, colors =['#F2F2F2','#706B70'])

ax1.text(s="Survived",x=-0.48,y=0.2, font=font,fontsize=35,va='center',ha='center',color='#706B70')
ax1.text(s=f"{round(survived[1]/total[1] * 100,1)}%",x=-0.42,y=-0.2, font=font,fontsize=55,va='center',ha='center',color='#706B70')
ax1.text(s="1st Class",x=0,y=-1.3, font=font,fontsize=40,va='center',ha='center',color='#F2F2F2')


ax2.text(s="Survived",x=-0.48,y=0.2, font=font,fontsize=30,va='center',ha='center',color='#706B70')
ax2.text(s=f"{round(survived[2]/total[2] * 100,1)}%",x=-0.46,y=-0.1, font=font,fontsize=45,va='center',ha='center',color='#706B70')
ax2.text(s="2nd Class",x=0,y=-1.3, font=font,fontsize=40,va='center',ha='center',color='#F2F2F2')

ax3.text(s="Survived",x=-0.48,y=0.5, font=font,fontsize=20,va='center',ha='center',color='#706B70')
ax3.text(s=f"{round(survived[3]/total[3] * 100,1)}%",x=-0.46,y=0.3, font=font,fontsize=35,va='center',ha='center',color='#706B70')
ax3.text(s="3rd Class",x=0.1,y=-1.3, font=font,fontsize=40,va='center',ha='center',color='#F2F2F2')

ax4.barh(y=0, width=sum(total.values()), color = '#F2F2F2', alpha=0.4)
ax4.barh(y=0, width=total[1], color = '#F2F2F2', alpha=0.8)
ax4.barh(y=0, width=total[1] + total[2], color = '#F2F2F2', alpha=0.7)

ax4.text(s=f"3rd Class : {round(total[3]/sum(total.values()) * 100,2)}%", x = 550, y = 0, font =font, fontsize=30,va='center')
ax4.text(s=f"2nd Class : {round(total[2]/sum(total.values()) * 100,2)}%", x = 230, y = 0, font =font, fontsize=30,va='center')
ax4.text(s=f"1st Class : {round(total[1]/sum(total.values()) * 100,2)}%", x = 40, y = 0, font =font, fontsize=30,va='center')

ax4.text(s="The proportion of Class", x = 0, y =1, font=font, fontsize=30, color ='white',va='top')


ax.axis("off")
ax4.axis("off")
ax4.set_xlim(0,sum(total.values()))
ax.text(s="The influence of class", x= 0,y=1.05, font=font,color='#F5E9F5',fontsize=50)
plt.show()

### 3. Name / Sex
If you change some name, You can make meaningful data

In [None]:
def change_name(name):
    if "Mr." in name: return "Mr"
    elif "Mrs." in name: return "Mrs"
    elif "Miss." in name: return "Miss"
    elif "Master." in name: return "Master"
    elif "Ms.": return "Ms"
    else: return "No"
train['Name'] = train['Name'].apply(change_name)
test['Name'] = test['Name'].apply(change_name)

First, I extract the data from name, like "Mr", "Mrs" ...

In [None]:
name = train.groupby('Name')['Survived'].mean()
name = sorted([*name.items()], key = lambda x : x[1], reverse=True)

sex = train.groupby('Sex')['Survived'].mean()
sex = sorted([*sex.items()], key = lambda x : x[1], reverse=True)

total = dict(train['Pclass'].value_counts())
survived = dict(train.groupby('Pclass')['Survived'].sum())

fig, ax = plt.subplots(figsize=(25,10),facecolor="#363336")
ax.patch.set_facecolor('#363336')

spec = gridspec.GridSpec(ncols=10, nrows=1, figure=fig)
ax1 = fig.add_subplot(spec[:, :7],facecolor="#363336")
ax2 = fig.add_subplot(spec[:, 7:],facecolor="#363336")

ax1.bar(x = [*map(lambda x : x[0], name)], height = [*map(lambda x : x[1], name)], width=0.6,color = "#F2F2F2",alpha=0.7)
for i,v in enumerate([*map(lambda x : x[0], name)]):
    ax1.text(s=v, x=i, y=0, va='top',ha='center', font=font, color ='#F2F2F2',fontsize=30)
for i,v in enumerate([*map(lambda x : x[1], name)]):
    ax1.text(s=f"{round(v*100,1)}%", x=i, y=v, va='bottom',ha='center', font=font, color ='#F2F2F2',fontsize=30)

ax1.axis("off")
ax2.axis("off")

x_pos = [0,0.5]
ax2.bar(x = x_pos, height = [*map(lambda x : x[1], sex)], width=0.4,color = ["#F05B5B",'#5B79F0'],alpha=0.7)
color = ["#F05B5B",'#5B79F0']

for i,v in enumerate([*map(lambda x : x[0], sex)]):
    ax2.text(s=v, x=x_pos[i], y=0, va='top',ha='center', font=font, color =color[i],fontsize=30)
for i,v in enumerate([*map(lambda x : x[1], sex)]):
    ax2.text(s=f"{round(v*100,1)}%", x=x_pos[i], y=v, va='bottom',ha='center', font=font, color =color[i],fontsize=30)

ax.text(s="The influence of Name/Sex ", x= 0,y=1.05, font=font,color='#F5E9F5',fontsize=50)
ax.axis('off')

plt.show()

Then, I check them. Mrs's survived rate is 79.2% and Miss 69.8%, Master 57.5%, Ms 44.4% , Mr's survived only 15.7%

Similarly, Female's survived rate is 74.2% but, Male's survived rate only 18.9%

### 4. Age

In [None]:
age_total = dict(train['Age'].value_counts())
age_total = sorted(age_total.items(), key=lambda x : x[0])
tmp = [*map(lambda x: x[0]//3 *3,age_total)]

age_tmp = {}
for a,b in zip(tmp, [*map(lambda x : x[1], age_total)]):
    if a in age_tmp.keys():
        age_tmp[a] +=b
    else:
        age_tmp[a]=b

age_survived = dict(train[train['Survived']==1]['Age'].value_counts())
age_survived = sorted(age_survived.items(), key=lambda x : x[0])
tmp = [*map(lambda x: x[0]//3 *3,age_survived)]

age_tmp_s = {}
for a,b in zip(tmp, [*map(lambda x : x[1], age_survived)]):
    if a in age_tmp_s.keys():
        age_tmp_s[a] +=b
    else:
        age_tmp_s[a]=b
    
fig, ax = plt.subplots(figsize=(25,10),facecolor="#363336")
ax.patch.set_facecolor('#363336')

spec = gridspec.GridSpec(ncols=10, nrows=1, figure=fig)
ax1 = fig.add_subplot(spec[:, :],facecolor="#363336")

ax1.plot([*map(lambda x : x[0], age_tmp.items())],[*map(lambda x : x[1], age_tmp.items())])
ax1.fill_between([*map(lambda x : x[0], age_tmp.items())], 0, [*map(lambda x : x[1], age_tmp.items())], color='#85C0EA',alpha=0.3)
ax1.plot([*map(lambda x : x[0], age_tmp_s.items())],[*map(lambda x : x[1], age_tmp_s.items())])
ax1.fill_between([*map(lambda x : x[0], age_tmp_s.items())], 0, [*map(lambda x : x[1], age_tmp_s.items())], color='#EA8881',alpha=0.3)
ax1.set_xlim(0,)
ax1.set_ylim(0,)
ax1.text(s="Total", x=80,y=70,font=font,fontsize=30, color='#85C0EA',va='center',ha='right')
ax1.text(s="Survived", x=80,y=65,font=font,fontsize=30, color='#EA8881',va='center',ha='right')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.spines['left'].set_visible(False)
ax.axis("off")
ax1.set_xticks([])
ax1.set_yticks([])
for i in range(0, 81,10):
    ax1.text(s=i, x=i, y=0, font=font, fontsize=25, color = "#F2F2F2",va='top',ha='center',alpha=0.8)
for i in range(10, 80,10):
    ax1.text(s=i, x=0, y=i, font=font, fontsize=25, color = "#F2F2F2",va='center',ha='right',alpha=0.8)

ax1.text(s="The influence of Age", x=1, y=75, font=font, fontsize=50, color = "#F2F2F2", va='center', ha='left')

plt.show()

You can know that, the young generation's survived rate is lower than others

In [None]:
print("train 'Age' colunms's NaN : ",train['Age'].isnull().sum())
print("test 'Age' colunms's NaN : ",test['Age'].isnull().sum())

train[train['Age'].isnull()]['Name'].value_counts()

In [None]:
median_value = dict(train.groupby('Name')['Age'].median())
train['Age'] = train.apply(lambda x : int(median_value[x.Name]) if math.isnan(x.Age) else int(x.Age) ,axis=1)
test['Age'] = test.apply(lambda x : int(median_value[x.Name]) if math.isnan(x.Age) else int(x.Age) ,axis=1)

### 5. SibSp / Parch

In [None]:
sib_total = dict(train['SibSp'].value_counts())
sib_total = sorted(sib_total.items(), key = lambda x : x[0])

sib_sur = dict(train[train['Survived'] == 1]['SibSp'].value_counts())
sib_sur = sorted(sib_sur.items(), key = lambda x : x[0])

par_total = dict(train['Parch'].value_counts())
par_total = sorted(par_total.items(), key = lambda x : x[0])

par_sur = dict(train[train['Survived'] == 1]['Parch'].value_counts())
par_sur = sorted(par_sur.items(), key = lambda x : x[0])

fig, ax = plt.subplots(figsize=(25,10),facecolor="#363336")
ax.patch.set_facecolor('#363336')

spec = gridspec.GridSpec(ncols=10, nrows=1, figure=fig)
ax1 = fig.add_subplot(spec[:, :5],facecolor="#363336")
ax2 = fig.add_subplot(spec[:, 5:],facecolor="#363336")


ax1.plot([*map(lambda x : x[0],sib_total)],[*map(lambda x : x[1],sib_total)])
ax1.fill_between([*map(lambda x : x[0],sib_total)], 0, [*map(lambda x : x[1],sib_total)], color='#85C0EA',alpha=0.3)

ax1.plot([*map(lambda x : x[0],sib_sur)],[*map(lambda x : x[1],sib_sur)])
ax1.fill_between([*map(lambda x : x[0],sib_sur)], 0, [*map(lambda x : x[1],sib_sur)], color='#EA8881',alpha=0.3)

ax1.set_xlim(0,)
ax1.set_ylim(0,)
ax1.text(s="Total", x=8,y=500,font=font,fontsize=30, color='#85C0EA',va='center',ha='right')
ax1.text(s="Survived", x=8,y=470,font=font,fontsize=30, color='#EA8881',va='center',ha='right')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.spines['left'].set_visible(False)
ax.axis("off")
ax1.set_xticks([])
ax1.set_yticks([])
for i in range(0, 9):
    ax1.text(s=i, x=i, y=0, font=font, fontsize=25, color = "#F2F2F2",va='top',ha='center',alpha=0.8)
for i in range(100,600,100):
    ax1.text(s=i, x=0, y=i, font=font, fontsize=25, color = "#F2F2F2",va='center',ha='right',alpha=0.8)

ax1.text(s="The influence of SibSp", x=1, y=600, font=font, fontsize=50, color = "#F2F2F2", va='center', ha='left')

ax2.plot([*map(lambda x : x[0],par_total)],[*map(lambda x : x[1],par_total)])
ax2.fill_between([*map(lambda x : x[0],par_total)], 0, [*map(lambda x : x[1],par_total)], color='#85C0EA',alpha=0.3)

ax2.plot([*map(lambda x : x[0],par_sur)],[*map(lambda x : x[1],par_sur)])
ax2.fill_between([*map(lambda x : x[0],par_sur)], 0, [*map(lambda x : x[1],par_sur)], color='#EA8881',alpha=0.3)

ax2.set_xlim(0,)
ax2.set_ylim(0,)
ax2.text(s="Total", x=6,y=560,font=font,fontsize=30, color='#85C0EA',va='center',ha='right')
ax2.text(s="Survived", x=6,y=520,font=font,fontsize=30, color='#EA8881',va='center',ha='right')
ax2.spines['right'].set_visible(False)
ax2.spines['top'].set_visible(False)
ax2.spines['left'].set_visible(False)
ax2.set_xticks([])
ax2.set_yticks([])
for i in range(0, 6):
    ax2.text(s=i, x=i, y=0, font=font, fontsize=25, color = "#F2F2F2",va='top',ha='center',alpha=0.8)
for i in range(100,700,100):
    ax2.text(s=i, x=0, y=i, font=font, fontsize=25, color = "#F2F2F2",va='center',ha='right',alpha=0.8)

ax2.text(s="The influence of Parch", x=1, y=660, font=font, fontsize=50, color = "#F2F2F2", va='center', ha='left')

plt.show()

FamilySize has some influence. I can't explain detail, but If familysize is bigger, the survived rate is higher

### 6. Ticket

I can't find any meaning in Ticket

In [None]:
train = train.drop('Ticket',axis=1)
test = test.drop('Ticket',axis=1)

### 7. Fare

In [None]:
train['Fare'] = train['Fare'].apply(lambda x : 100 if x >= 100 else x)
test['Fare'] = test['Fare'].apply(lambda x : 100 if x >= 100 else x)

In [None]:
fare_total = dict(train['Fare'].value_counts())
fare_total = sorted(fare_total.items(), key=lambda x : x[0])
tmp = [*map(lambda x: x[0]//5 *5,fare_total)]

fare_tmp = {}
for a,b in zip(tmp, [*map(lambda x : x[1], fare_total)]):
    if a in fare_tmp.keys():
        fare_tmp[a] +=b
    else:
        fare_tmp[a]=b

fare_survived = dict(train[train['Survived']==1]['Fare'].value_counts())
fare_survived = sorted(fare_survived.items(), key=lambda x : x[0])
tmp = [*map(lambda x: x[0]//5 *5,fare_survived)]

fare_tmp_s = {}
for a,b in zip(tmp, [*map(lambda x : x[1], fare_survived)]):
    if a in fare_tmp_s.keys():
        fare_tmp_s[a] +=b
    else:
        fare_tmp_s[a]=b
    
fig, ax = plt.subplots(figsize=(25,10),facecolor="#363336")
ax.patch.set_facecolor('#363336')

spec = gridspec.GridSpec(ncols=10, nrows=1, figure=fig)
ax1 = fig.add_subplot(spec[:, :],facecolor="#363336")

ax1.plot([*map(lambda x : x[0], fare_tmp.items())],[*map(lambda x : x[1], fare_tmp.items())])
ax1.fill_between([*map(lambda x : x[0], fare_tmp.items())], 0, [*map(lambda x : x[1], fare_tmp.items())], color='#85C0EA',alpha=0.3)
ax1.plot([*map(lambda x : x[0], fare_tmp_s.items())],[*map(lambda x : x[1], fare_tmp_s.items())])
ax1.fill_between([*map(lambda x : x[0], fare_tmp_s.items())], 0, [*map(lambda x : x[1], fare_tmp_s.items())], color='#EA8881',alpha=0.3)
ax1.set_xlim(0,)
ax1.set_ylim(0,)
ax1.text(s="Total", x=100,y=280,font=font,fontsize=30, color='#85C0EA',va='center',ha='right')
ax1.text(s="Survived", x=100,y=255,font=font,fontsize=30, color='#EA8881',va='center',ha='right')
ax1.spines['right'].set_visible(False)
ax1.spines['top'].set_visible(False)
ax1.spines['left'].set_visible(False)
ax.axis("off")
ax1.set_xticks([])
ax1.set_yticks([])
for i in range(0, 101,10):
    ax1.text(s=i, x=i, y=0, font=font, fontsize=25, color = "#F2F2F2",va='top',ha='center',alpha=0.8)
for i in range(50, 350,50):
    ax1.text(s=i, x=0, y=i, font=font, fontsize=25, color = "#F2F2F2",va='center',ha='right',alpha=0.8)

ax1.text(s="The influence of Fare", x=6, y=320, font=font, fontsize=50, color = "#F2F2F2", va='center', ha='left')

plt.show()

Fare showed big influence. I think it's relation with class. The higher the fee, the higher the survival rate.

### 8. Cabin
Cabin has many NaN value. So, I think it's hard to handling so, I dropped it

In [None]:
print("Cabin's NaN value : ",train['Cabin'].isnull().sum())
print("Cabin's NaN percent : ",round(train['Cabin'].isnull().sum()/len(train['Cabin'])*100,2), "%")

train = train.drop("Cabin", axis=1)
test = test.drop("Cabin", axis=1)

### 9 Embarked

In [None]:
total = dict(train['Embarked'].value_counts())
survived = dict(train.groupby('Embarked')['Survived'].sum())

explode = (0, 0.15)

fig, ax = plt.subplots(figsize=(25,10),facecolor="#363336")
ax.patch.set_facecolor('#363336')

spec = gridspec.GridSpec(ncols=12, nrows=8, figure=fig)
ax1 = fig.add_subplot(spec[:5, :4],facecolor="#363336")
ax2 = fig.add_subplot(spec[:5, 4:8],facecolor="#363336")
ax3 = fig.add_subplot(spec[:5, 8:],facecolor="#363336")
ax4 = fig.add_subplot(spec[7:, :],facecolor="#363336")

ax1.pie([survived['S'],total['S']-survived['S']], explode=explode, shadow=True, startangle=90, colors =['#F2F2F2','#706B70'])
ax2.pie([survived['C'],total['C']-survived['C']], explode=explode, shadow=True, startangle=90, colors =['#F2F2F2','#706B70'])
ax3.pie([survived['Q'],total['Q']-survived['Q']], explode=explode, shadow=True, startangle=90, colors =['#F2F2F2','#706B70'])

ax1.text(s="Survived",x=-0.48,y=0.4, font=font,fontsize=30,va='center',ha='center',color='#706B70')
ax1.text(s=f"{round(survived['S']/total['S'] * 100,1)}%",x=-0.42,y=0.1, font=font,fontsize=40,va='center',ha='center',color='#706B70')
ax1.text(s="Embarked: S",x=0,y=-1.3, font=font,fontsize=40,va='center',ha='center',color='#F2F2F2')

ax2.text(s="Survived",x=-0.48,y=0.2, font=font,fontsize=35,va='center',ha='center',color='#706B70')
ax2.text(s=f"{round(survived['C']/total['C'] * 100,1)}%",x=-0.46,y=-0.1, font=font,fontsize=50,va='center',ha='center',color='#706B70')
ax2.text(s="Embarked: C",x=0,y=-1.3, font=font,fontsize=40,va='center',ha='center',color='#F2F2F2')

ax3.text(s="Survived",x=-0.48,y=0.3, font=font,fontsize=30,va='center',ha='center',color='#706B70')
ax3.text(s=f"{round(survived['Q']/total['Q'] * 100,1)}%",x=-0.46,y=0.0, font=font,fontsize=43,va='center',ha='center',color='#706B70')
ax3.text(s="Embarked: Q",x=0.1,y=-1.3, font=font,fontsize=40,va='center',ha='center',color='#F2F2F2')

ax4.barh(y=0, width=sum(total.values()), color = '#F2F2F2', alpha=0.4)
ax4.barh(y=0, width=total['S'], color = '#F2F2F2', alpha=0.8)
ax4.barh(y=0, width=total['S'] + total['C'], color = '#F2F2F2', alpha=0.7)

ax4.text(s=f"Q : {round(total['Q']/sum(total.values()) * 100,2)}%", x = 815, y = 0, font =font, fontsize=30,va='center')
ax4.text(s=f"C : {round(total['C']/sum(total.values()) * 100,2)}%", x = 700, y = 0, font =font, fontsize=30,va='center')
ax4.text(s=f"S : {round(total['S']/sum(total.values()) * 100,2)}%", x = 250, y = 0, font =font, fontsize=30,va='center')

ax4.text(s="The proportion of Embarked", x = 0, y =1, font=font, fontsize=30, color ='white',va='top')


ax.axis("off")
ax4.axis("off")
ax4.set_xlim(0,sum(total.values()))
ax.text(s="The influence of Embarked", x= 0,y=1.05, font=font,color='#F5E9F5',fontsize=50)
plt.show()

In Embarked: C's survived rate is 55.4%. I didn't put it in the text. There were many 1st class passengers in C. This effect is presumed to have resulted in the following results.

# Data Preprocessing

I delete some columns like passengerId, ticket, cabin. And, I changed name, make max Fare (100).


### Change to numeric
I will change object to numeric using pandas get dummies function

In [None]:
train.head(3)

In [None]:
train = pd.get_dummies(train,columns=['Pclass','Name','Sex','Embarked'],drop_first=True)
test = pd.get_dummies(test,columns=['Pclass','Name','Sex','Embarked'],drop_first=True)
train.head(3)

In [None]:
y = train['Survived']
X = train.drop('Survived',axis=1)

#### Scaler
Now I will using MinMaxScaler

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X = scaler.fit_transform(X)
test = scaler.transform(test)

# Make Model

This time, I using pytorch.

In [None]:
import torch
import torch.nn as nn
from torch import optim

from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset
from torch.utils.data.dataset import random_split

torch.manual_seed(100)

In [None]:
X = torch.tensor(X,dtype=torch.float32)
y = torch.tensor(y,dtype = torch.float32)
test = torch.tensor(test,dtype=torch.float32)
y = y.reshape(-1,1)

dataset = TensorDataset(X, y)
train_dataset, val_dataset = random_split(dataset, [713,178])
train_dataset = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_dataset = DataLoader(val_dataset, batch_size=1, shuffle=True)

First, You should change datatype numpy to torch.tensor, and change y's shape
and split the dataset to train, validation dataset. ( I split it 8:2 )

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model = nn.Sequential().to(device)
model.add_module('fc1', nn.Linear(13, 13))
model.add_module('dropout1', nn.Dropout(0.1))
model.add_module('relu1', nn.ReLU())
model.add_module('fc2', nn.Linear(13, 13))
model.add_module('dropout2', nn.Dropout(0.1))
model.add_module('relu2', nn.ReLU())
model.add_module('fc3', nn.Linear(13, 10))
model.add_module('dropout3', nn.Dropout(0.1))
model.add_module('relu3', nn.ReLU())
model.add_module('fc4', nn.Linear(10, 5))
model.add_module('relu4', nn.ReLU())
model.add_module('fc5', nn.Linear(5, 1))
model.add_module('Sigmoid1', nn.Sigmoid())
print(model)

I made MLP model.

In [None]:
loss_fn = nn.BCELoss().to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)

The loss function is Binary classification and, optimizer is adam

### Train

In [None]:
nb_epochs = 4
for epoch in range(nb_epochs + 1):
    for batch_idx, samples in enumerate(train_dataset):
        x_train, y_train = samples
        optimizer.zero_grad()
        prediction = model(x_train)
        cost = loss_fn(prediction, y_train)
        cost.backward()
        optimizer.step()
        
        if batch_idx%250 == 0:
            print('Epoch {:4d}/{} Batch {}/{} Cost: {:.6f}'.format(
                epoch, nb_epochs, batch_idx+1, len(train_dataset),
                cost.item()
                ))
    validation_data_eval = []
    for batch_idx, samples in enumerate(val_dataset):
        x_train, y_train = samples
        prediction = model(x_train)
        cost = loss_fn(prediction, y_train)
        validation_data_eval.append(cost.item())
    print("validation cost : ", np.mean(validation_data_eval))

### Predict

In [None]:
pred = model(test)
pred = pd.DataFrame(pred.tolist())

pred[0] = pred[0].apply(lambda x : 1 if x>=0.5 else 0)

Our output is taken sigmoid. So, I will change it if 0.5 > x is 1 and others 0

In [None]:
submission['Survived'] = pred
submission.to_csv("test.csv",index=None)

It's finish. Now make submission