In [None]:
https://www.kaggle.com/dr1t10/titanicnet-with-pytorch?scriptVersionId=5419927

- Pclass: The ticket class;
- AgeBand: Age split into 4 bands using K-Means. Ranges from 0 to 3 with ascending Age;
- IsMale: Sex in binary form;
- InGroup: Is 1 if the passenger is in a group; otherwise 0;
- InWcg: Is 1 if the passenger is in a woman-child-group; otherwise 0;
- WcgAllSurvived: Equal to 1 if all members of its woman-child-group survived; otherwise 0;
- WcgAllDied: The opposite of WcgAllSurvived.

In [2]:
# import package
import pandas as pd
import numpy as np
import torch
import torch.nn.functional as F

import matplotlib.pyplot as plt

# set numpy seed for reproducibility
np.random.seed(seed=123)

In [31]:
dataset = pd.read_csv("../data/titanic.csv", sep=',')[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Survived']]
#dataset.columns = ['f1', 'f2', 'f3', 'f4', 'species']
dataset.head()

Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Survived
0,3,male,22.0,1,0,7.25,0
1,1,female,38.0,1,0,71.2833,1
2,3,female,26.0,0,0,7.925,1
3,1,female,35.0,1,0,53.1,1
4,3,male,35.0,0,0,8.05,0


In [32]:
dataset.dtypes

Pclass        int64
Sex          object
Age         float64
SibSp         int64
Parch         int64
Fare        float64
Survived      int64
dtype: object

In [33]:
dataset['Age'].fillna((dataset['Age'].mean()), inplace=True)

In [34]:
dataset = pd.get_dummies(dataset, prefix=['Sex'])

In [35]:
dataset.describe()

Unnamed: 0,Pclass,Age,SibSp,Parch,Fare,Survived,Sex_female,Sex_male
count,891.0,891.0,891.0,891.0,891.0,891.0,891.0,891.0
mean,2.308642,29.699118,0.523008,0.381594,32.204208,0.383838,0.352413,0.647587
std,0.836071,13.002015,1.102743,0.806057,49.693429,0.486592,0.47799,0.47799
min,1.0,0.42,0.0,0.0,0.0,0.0,0.0,0.0
25%,2.0,22.0,0.0,0.0,7.9104,0.0,0.0,0.0
50%,3.0,29.699118,0.0,0.0,14.4542,0.0,0.0,1.0
75%,3.0,35.0,1.0,0.0,31.0,1.0,1.0,1.0
max,3.0,80.0,8.0,6.0,512.3292,1.0,1.0,1.0


In [36]:
dataset = dataset[['Pclass', 'Sex_male', 'Sex_female', 'Age', 'SibSp', 'Parch', 'Fare', 'Survived']]

In [37]:
dataset = dataset.values
dataset

array([[ 3.    ,  1.    ,  0.    , ...,  0.    ,  7.25  ,  0.    ],
       [ 1.    ,  0.    ,  1.    , ...,  0.    , 71.2833,  1.    ],
       [ 3.    ,  0.    ,  1.    , ...,  0.    ,  7.925 ,  1.    ],
       ...,
       [ 3.    ,  0.    ,  1.    , ...,  2.    , 23.45  ,  0.    ],
       [ 1.    ,  1.    ,  0.    , ...,  0.    , 30.    ,  1.    ],
       [ 3.    ,  1.    ,  0.    , ...,  0.    ,  7.75  ,  0.    ]])

In [38]:
train_size = 0.8
train_size = int(len(dataset) * train_size)
np.random.shuffle(dataset)
train, test = dataset[:train_size,:], dataset[train_size:,:]
print ('Dataset size: {0}. Train size: {1} - Test size: {2}'.format(len(dataset), len(train), len(test)))

Dataset size: 891. Train size: 712 - Test size: 179


In [39]:
# get train and set data into numpy nd.array
x_train = train[:,:7]
x_test = test[:,:7]
y_train = train[:,7]
y_test = test[:,7]

In [47]:
batch_size = len(x_train) # 50
num_epochs = 1000
learning_rate = 0.01
size_hidden = 100
n_feature = 7
n_output = 2
batch_no = int(len(x_train) / batch_size)

In [48]:
net = torch.nn.Sequential(
    torch.nn.Linear(n_feature, size_hidden),
    torch.nn.ReLU(),
    torch.nn.Linear(size_hidden, size_hidden),
    torch.nn.Linear(size_hidden, n_output),
    torch.nn.Sigmoid()
)

In [49]:
criterion = torch.nn.CrossEntropyLoss()

In [50]:
optimizer = torch.optim.SGD(net.parameters(), lr=learning_rate)

In [56]:
y_train

array([1., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1., 1., 1., 0.,
       1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 1., 1., 1., 0.,
       0., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 1., 0., 0.,
       0., 1., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1.,
       1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 0., 1.,
       1., 0., 1., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
       1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0.,
       0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0.,
       1., 0., 0., 0., 0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 1., 0., 1.,
       0., 0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1.,
       0., 0., 0., 0., 1., 1., 0., 1., 1., 0., 1., 0., 1., 0., 1., 1., 1.,
       0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 1., 0., 1., 0.

In [51]:
from sklearn.utils import shuffle
from torch.autograd import Variable
running_loss = 0.0

#inputs = Variable(torch.tensor(x_train).float())
#labels = Variable(torch.tensor(y_train).float())



for epoch in range(num_epochs):
    
    for i in range(batch_no):
        start = i * batch_size
        end = start + batch_size
        inputs = Variable(torch.tensor(x_train[start:end]).float())
        labels = Variable(torch.tensor(y_train[start:end]).long())
        
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        #print("outputs",outputs)
        #print("outputs",outputs,outputs.shape,"labels",labels, labels.shape)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        
    print('Epoch {}'.format(epoch+1), "loss: ",running_loss)
    running_loss = 0.0

Epoch 1 loss:  0.6872281432151794
Epoch 2 loss:  0.6829332709312439
Epoch 3 loss:  0.6744732856750488
Epoch 4 loss:  0.6512669920921326
Epoch 5 loss:  0.6395664215087891
Epoch 6 loss:  0.6365177631378174
Epoch 7 loss:  0.6341953873634338
Epoch 8 loss:  0.6323373913764954
Epoch 9 loss:  0.630798876285553
Epoch 10 loss:  0.6294891834259033
Epoch 11 loss:  0.6283511519432068
Epoch 12 loss:  0.6273512840270996
Epoch 13 loss:  0.6264707446098328
Epoch 14 loss:  0.6256937384605408
Epoch 15 loss:  0.6250089406967163
Epoch 16 loss:  0.6244032979011536
Epoch 17 loss:  0.623867392539978
Epoch 18 loss:  0.6233898401260376
Epoch 19 loss:  0.6229621171951294
Epoch 20 loss:  0.6225765347480774
Epoch 21 loss:  0.6222272515296936
Epoch 22 loss:  0.6219082474708557
Epoch 23 loss:  0.6216164231300354
Epoch 24 loss:  0.6213477849960327
Epoch 25 loss:  0.621099054813385
Epoch 26 loss:  0.6208681464195251
Epoch 27 loss:  0.6206521391868591
Epoch 28 loss:  0.6204505562782288
Epoch 29 loss:  0.62026023864746

Epoch 254 loss:  0.6065344214439392
Epoch 255 loss:  0.6065079569816589
Epoch 256 loss:  0.6064829230308533
Epoch 257 loss:  0.6064582467079163
Epoch 258 loss:  0.6064321994781494
Epoch 259 loss:  0.6064070463180542
Epoch 260 loss:  0.606381356716156
Epoch 261 loss:  0.6063567399978638
Epoch 262 loss:  0.6063315272331238
Epoch 263 loss:  0.6063067317008972
Epoch 264 loss:  0.6062818169593811
Epoch 265 loss:  0.6062573790550232
Epoch 266 loss:  0.6062324047088623
Epoch 267 loss:  0.6062079668045044
Epoch 268 loss:  0.6061834692955017
Epoch 269 loss:  0.6061585545539856
Epoch 270 loss:  0.6061344146728516
Epoch 271 loss:  0.6061095595359802
Epoch 272 loss:  0.6060853600502014
Epoch 273 loss:  0.6060611009597778
Epoch 274 loss:  0.6060373187065125
Epoch 275 loss:  0.6060129404067993
Epoch 276 loss:  0.605989396572113
Epoch 277 loss:  0.6059654355049133
Epoch 278 loss:  0.6059412360191345
Epoch 279 loss:  0.605917751789093
Epoch 280 loss:  0.6058944463729858
Epoch 281 loss:  0.605870723724

Epoch 504 loss:  0.6017979979515076
Epoch 505 loss:  0.6017833352088928
Epoch 506 loss:  0.601768434047699
Epoch 507 loss:  0.6017540693283081
Epoch 508 loss:  0.6017395257949829
Epoch 509 loss:  0.6017239689826965
Epoch 510 loss:  0.6017099618911743
Epoch 511 loss:  0.6016950011253357
Epoch 512 loss:  0.6016803979873657
Epoch 513 loss:  0.6016659140586853
Epoch 514 loss:  0.6016513705253601
Epoch 515 loss:  0.6016368865966797
Epoch 516 loss:  0.6016219854354858
Epoch 517 loss:  0.601607620716095
Epoch 518 loss:  0.6015931963920593
Epoch 519 loss:  0.6015779376029968
Epoch 520 loss:  0.6015642285346985
Epoch 521 loss:  0.601549506187439
Epoch 522 loss:  0.6015347838401794
Epoch 523 loss:  0.6015204787254333
Epoch 524 loss:  0.6015059947967529
Epoch 525 loss:  0.6014913320541382
Epoch 526 loss:  0.6014774441719055
Epoch 527 loss:  0.6014627814292908
Epoch 528 loss:  0.6014484167098999
Epoch 529 loss:  0.6014344096183777
Epoch 530 loss:  0.60141921043396
Epoch 531 loss:  0.60140556097030

Epoch 749 loss:  0.5985817313194275
Epoch 750 loss:  0.5985695719718933
Epoch 751 loss:  0.5985584855079651
Epoch 752 loss:  0.5985455513000488
Epoch 753 loss:  0.5985341668128967
Epoch 754 loss:  0.5985218286514282
Epoch 755 loss:  0.5985097289085388
Epoch 756 loss:  0.5984982848167419
Epoch 757 loss:  0.5984861254692078
Epoch 758 loss:  0.5984744429588318
Epoch 759 loss:  0.5984627604484558
Epoch 760 loss:  0.5984507203102112
Epoch 761 loss:  0.598438560962677
Epoch 762 loss:  0.5984261631965637
Epoch 763 loss:  0.5984150171279907
Epoch 764 loss:  0.5984027981758118
Epoch 765 loss:  0.5983913540840149
Epoch 766 loss:  0.5983796119689941
Epoch 767 loss:  0.5983681082725525
Epoch 768 loss:  0.5983553528785706
Epoch 769 loss:  0.5983434915542603
Epoch 770 loss:  0.5983316898345947
Epoch 771 loss:  0.5983197689056396
Epoch 772 loss:  0.5983078479766846
Epoch 773 loss:  0.5982961058616638
Epoch 774 loss:  0.5982844829559326
Epoch 775 loss:  0.598272442817688
Epoch 776 loss:  0.59826093912

Epoch 1000 loss:  0.5956859588623047


In [52]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

predict_out = net(torch.tensor(x_test).float())
_, y_pred = torch.max(predict_out, 1)

print (accuracy_score(y_test, y_pred))

print (precision_score(y_test, y_pred, average='macro'))
print (precision_score(y_test, y_pred, average='micro'))
print (recall_score(y_test, y_pred, average='macro'))
print (recall_score(y_test, y_pred, average='micro'))

0.7150837988826816
0.6887254901960784
0.7150837988826816
0.6673913043478261
0.7150837988826816


In [55]:
torch.max(predict_out, 1)

(tensor([0.9896, 0.9859, 0.9997, 0.9645, 0.9154, 0.9914, 1.0000, 0.9630, 0.9940,
         0.9248, 0.9881, 0.9993, 0.9999, 0.9967, 0.9997, 0.9996, 0.9923, 0.9808,
         1.0000, 0.9980, 0.9389, 0.9510, 0.9704, 0.9841, 0.9481, 1.0000, 0.9752,
         0.9394, 0.9944, 0.9280, 0.9923, 0.9412, 0.9993, 0.9865, 0.9576, 0.9810,
         0.9476, 0.9940, 0.9727, 0.9805, 0.9647, 0.9992, 0.9969, 0.9454, 0.9811,
         0.9908, 0.9683, 0.9997, 1.0000, 0.9927, 1.0000, 0.9868, 0.9995, 0.9973,
         0.9792, 0.9727, 0.9923, 0.9868, 0.9816, 1.0000, 0.9982, 1.0000, 1.0000,
         1.0000, 0.9941, 0.9985, 1.0000, 0.8917, 0.9995, 0.9426, 0.9985, 0.9938,
         0.9826, 0.9921, 0.9993, 0.9666, 0.9372, 0.9851, 1.0000, 0.9998, 1.0000,
         0.9939, 0.9933, 0.9959, 0.9985, 0.9942, 0.9826, 0.9877, 0.9189, 0.9734,
         0.9931, 0.9677, 0.9993, 0.9894, 0.8858, 0.9475, 0.9993, 1.0000, 0.9756,
         0.9991, 0.9866, 0.9521, 0.9757, 0.9995, 0.9919, 0.9956, 0.9721, 1.0000,
         0.9933, 0.9995, 0.9