In [10]:
%matplotlib inline
import pandas as pd 

import torch 
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim


device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

# import torch.nn as nn
# import torch.nn.functional as F
# # remember, we are using data variable for torch.utils.data and not for our actual dataset
# import torch.utils.data as data
# import torch.optim as optim

# import matplotlib.pyplot as plt # to plot graphs 

Using cpu device


In [4]:
# Load the Titanic dataset.
titanic_train_csv_df = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/train.csv")
titanic_test_csv_df = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/eval.csv")


titanic_train_csv_df.shape, titanic_test_csv_df.shape



((627, 10), (264, 10))

In [44]:

class TitanicDataset (torch.utils.data.Dataset):
    
    def __init__(self, raw_df):
        self.df = raw_df.copy()
        self.column_codes = {
            "sex": { 'male': 0, 'female': 1 },
            "class": { 'First': 1, 'Second': 2, 'Third': 3 },
            "embark_town": { 'Southampton': 1, 'Cherbourg': 2, 'Queenstown': 3, 'unknown': 4},
            "alone": { 'n': 0, 'y': 1},
            "deck": { "unknown" : 0, "A" : 1, "B" : 2, "C": 3, "D": 4, "E": 5, "F": 6, "G": 7},
        }

        for col, mapping in self.column_codes.items():
            self.df[f'{col}_code'] = self.df[col].map(mapping)
            self.df.pop(col)


        self.label_column = "survived"
        self.feature_columns = sorted(self.df.columns)
        self.feature_columns.remove(self.label_column)
                
    def getCodeMappings(self, col):
        return self.column_codes[col]
        
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, idx):

        # return a tuple of tensors for (sample, label)         
        row = self.df.iloc[idx]

        features = [ row[col] for col in self.feature_columns ]
        label = row[self.label_column]
        
        features_tensor = torch.tensor(features, dtype=torch.float32)
        label_tensor = torch.tensor(label, dtype=torch.float32)

        return features_tensor, label_tensor 




In [69]:
train_ds = TitanicDataset(titanic_train_csv_df)
test_ds = TitanicDataset(titanic_test_csv_df)

batch_size = int(len(train_ds) / 10)
print(f'{len(train_ds)} training records in with batch size {batch_size}')
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_ds, batch_size=batch_size, shuffle=True)

train_ds.feature_columns

627 training records in with batch size 62


['age',
 'alone_code',
 'class_code',
 'deck_code',
 'embark_town_code',
 'fare',
 'n_siblings_spouses',
 'parch',
 'sex_code']

In [None]:
DROPOUT_RATE_01 = .40

class TitanicSurvivalNeuralNetwork(nn.Module):
    def __init__(self, num_feature_columns):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(num_feature_columns, 64),
            nn.ReLU(),
            nn.Dropout(p=DROPOUT_RATE_01), 
            nn.Linear(64, 64),
            nn.ReLU(),
            nn.Dropout(p=DROPOUT_RATE_01), 
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.linear_relu_stack(x)


num_feature_columns = len(train_ds.feature_columns)
model = TitanicSurvivalNeuralNetwork( num_feature_columns )
print(model)

TitanicSurvivalNeuralNetwork(
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=9, out_features=64, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.4, inplace=False)
    (3): Linear(in_features=64, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.4, inplace=False)
    (6): Linear(in_features=32, out_features=32, bias=True)
    (7): ReLU()
    (8): Linear(in_features=32, out_features=1, bias=True)
    (9): Sigmoid()
  )
)


In [118]:


SEED = 123
torch.manual_seed(SEED)


device = torch.device("cpu")
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
    torch.cuda.manual_seed_all(SEED)
    device = torch.device("cuda")

model = TitanicSurvivalNeuralNetwork( len(train_ds.feature_columns) )
model.to(device)

loss_fn   = nn.BCELoss()  # binary cross entropy
optimizer = optim.Adam(model.parameters(), lr=0.001)


num_epochs = 100


for epoch in range(num_epochs):
    epoch_correct_count, epoch_pred_count = 0, 0
    for X, y in train_dataloader:

        X = X.to(device)
        y = y.reshape(-1, 1).to(device)

        y_pred = model(X)
        loss = loss_fn(y_pred, y)

        y_pred_guess = torch.round(y_pred)
        batch_num_correct = (y == y_pred_guess).sum()
        epoch_correct_count += batch_num_correct
        epoch_pred_count += len(y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f"Epoch [{epoch+1}/{num_epochs}], {epoch_correct_count} of {epoch_pred_count} correct {(100*epoch_correct_count/epoch_pred_count):.1f} %")



RuntimeError: mat1 and mat2 shapes cannot be multiplied (62x64 and 32x32)