In [1]:
import torch
from torch.utils.data import Dataset, DataLoader
from torch import nn, from_numpy, optim
import numpy as np
import pandas as pd

In [2]:
# The data
data = pd.read_csv('data/titanic_train.csv', delimiter=',')
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
data.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [4]:
new_data = data.select_dtypes(['int64', 'float64'])

In [5]:
new_data.sample()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
340,341,1,2,2.0,1,1,26.0


In [6]:
new_data.dropna(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [7]:
new_data.sample(5)

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
580,581,1,2,25.0,1,1,30.0
735,736,0,3,28.5,0,0,16.1
2,3,1,3,26.0,0,0,7.925
338,339,1,3,45.0,0,0,8.05
54,55,0,1,65.0,0,1,61.9792


In [8]:
# Save the data after removing categorical features and nones
new_data.to_csv("./data/preprocessed_titanic", index=False)

In [10]:
# test code before putting it in Dataset class
data = pd.read_csv('./data/preprocessed_titanic', delimiter=',', dtype=np.float32)
len = data.shape[0]
y_data = data.loc[:, 'Survived']
x_data = data.drop(['Survived'], axis=1)

In [11]:
data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
0,1.0,0.0,3.0,22.0,1.0,0.0,7.25
1,2.0,1.0,1.0,38.0,1.0,0.0,71.283302
2,3.0,1.0,3.0,26.0,0.0,0.0,7.925
3,4.0,1.0,1.0,35.0,1.0,0.0,53.099998
4,5.0,0.0,3.0,35.0,0.0,0.0,8.05


In [12]:
x_data.head()

Unnamed: 0,PassengerId,Pclass,Age,SibSp,Parch,Fare
0,1.0,3.0,22.0,1.0,0.0,7.25
1,2.0,1.0,38.0,1.0,0.0,71.283302
2,3.0,3.0,26.0,0.0,0.0,7.925
3,4.0,1.0,35.0,1.0,0.0,53.099998
4,5.0,3.0,35.0,0.0,0.0,8.05


In [13]:
y_data.head()

0    0.0
1    1.0
2    1.0
3    1.0
4    0.0
Name: Survived, dtype: float32

In [14]:
x_data.loc[1]

PassengerId     2.000000
Pclass          1.000000
Age            38.000000
SibSp           1.000000
Parch           0.000000
Fare           71.283302
Name: 1, dtype: float32

In [15]:
class Titanic(Dataset):
    def __init__(self):
        data = pd.read_csv('./data/preprocessed_titanic', delimiter=',', dtype=np.float32)
        self.len = data.shape[0]
        self.y_data = torch.tensor(data.loc[:, 'Survived'].values)
        self.x_data = torch.tensor(data.drop(['Survived'], axis=1).values)
        
    def __getitem__(self, index):
        return self.x_data[index], self.y_data[index]
    
    def __len__(self):
        return self.len

In [16]:
titanic = Titanic()
testing_loader = DataLoader(titanic,
                       batch_size=1,
                       shuffle=True,
                    num_workers=4)

In [17]:
it = iter(testing_loader)
first = next(it)
first

[tensor([[506.0000,   1.0000,  18.0000,   1.0000,   0.0000, 108.9000]]),
 tensor([0.])]

In [18]:
titanic = Titanic()
train_loader = DataLoader(titanic,
                         batch_size=32,
                         num_workers=4)

In [19]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(6, 4)
        self.fc2 = nn.Linear(4, 2)
        self.fc3 = nn.Linear(2, 1)
        
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        out1 = self.sigmoid(self.fc1(x))
        out2 = self.sigmoid(self.fc2(out1))
        y_preds = self.sigmoid(self.fc3(out2))
        return y_preds

In [20]:
model = Model()
criterion = nn.MSELoss()
optimizer = optim.SGD(model.parameters(), lr=0.1)

In [21]:
# Training Loop
for epoch in range(2):
    for i, data in enumerate(train_loader, 0):
        # Get the data
        inputs, labels = data
        
        # Forward pass
        y_preds = model(inputs)
        
        # Loss
        loss = criterion(y_preds, labels)
        print(f"Epoch {epoch} | Batch: {i+1} | Loss: {loss.item():.4f}")
        
        # Zero grad, backward, upgrade
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        

Epoch 0 | Batch: 1 | Loss: 0.2717
Epoch 0 | Batch: 2 | Loss: 0.2693
Epoch 0 | Batch: 3 | Loss: 0.2972
Epoch 0 | Batch: 4 | Loss: 0.2750
Epoch 0 | Batch: 5 | Loss: 0.2688
Epoch 0 | Batch: 6 | Loss: 0.2580
Epoch 0 | Batch: 7 | Loss: 0.2603
Epoch 0 | Batch: 8 | Loss: 0.2517
Epoch 0 | Batch: 9 | Loss: 0.2542
Epoch 0 | Batch: 10 | Loss: 0.2586
Epoch 0 | Batch: 11 | Loss: 0.2555
Epoch 0 | Batch: 12 | Loss: 0.2530
Epoch 0 | Batch: 13 | Loss: 0.2547
Epoch 0 | Batch: 14 | Loss: 0.2508
Epoch 0 | Batch: 15 | Loss: 0.2508
Epoch 0 | Batch: 16 | Loss: 0.2557
Epoch 0 | Batch: 17 | Loss: 0.2596
Epoch 0 | Batch: 18 | Loss: 0.2538
Epoch 0 | Batch: 19 | Loss: 0.2543
Epoch 0 | Batch: 20 | Loss: 0.2517
Epoch 0 | Batch: 21 | Loss: 0.2527
Epoch 0 | Batch: 22 | Loss: 0.2509
Epoch 0 | Batch: 23 | Loss: 0.2522


  return F.mse_loss(input, target, reduction=self.reduction)
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1 | Batch: 1 | Loss: 0.2519
Epoch 1 | Batch: 2 | Loss: 0.2503
Epoch 1 | Batch: 3 | Loss: 0.2485
Epoch 1 | Batch: 4 | Loss: 0.2458
Epoch 1 | Batch: 5 | Loss: 0.2453
Epoch 1 | Batch: 6 | Loss: 0.2479
Epoch 1 | Batch: 7 | Loss: 0.2466
Epoch 1 | Batch: 8 | Loss: 0.2505
Epoch 1 | Batch: 9 | Loss: 0.2491
Epoch 1 | Batch: 10 | Loss: 0.2464
Epoch 1 | Batch: 11 | Loss: 0.2476
Epoch 1 | Batch: 12 | Loss: 0.2491
Epoch 1 | Batch: 13 | Loss: 0.2475
Epoch 1 | Batch: 14 | Loss: 0.2507
Epoch 1 | Batch: 15 | Loss: 0.2507
Epoch 1 | Batch: 16 | Loss: 0.2459
Epoch 1 | Batch: 17 | Loss: 0.2408
Epoch 1 | Batch: 18 | Loss: 0.2452
Epoch 1 | Batch: 19 | Loss: 0.2431
Epoch 1 | Batch: 20 | Loss: 0.2469
Epoch 1 | Batch: 21 | Loss: 0.2424
Epoch 1 | Batch: 22 | Loss: 0.2467
Epoch 1 | Batch: 23 | Loss: 0.2362
