In [None]:

%load_ext autoreload
%autoreload 2
%matplotlib inline

import pandas as pd 
import numpy as np

import torch 
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder

from modules.classifer_utils import ClassifierDataset, TrainingManager


device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")




In [None]:
# Load the Titanic dataset.
titanic_train_csv_df = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/train.csv")
titanic_test_csv_df = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/eval.csv")

print(f'titanic_train_csv_df shape: {titanic_train_csv_df.shape}')
print(f'titanic_test_csv_df shape: {titanic_test_csv_df.shape}')

# concat test and train to get metadata for non numeric categories
csv_union_df = pd.concat([titanic_test_csv_df, titanic_train_csv_df])



In [None]:

label_column_name = "survived"
[train_encoded_df, test_encoded_df] = ClassifierDataset.onehot_encode_datafames([titanic_train_csv_df, titanic_test_csv_df])


train_ds = ClassifierDataset(train_encoded_df, label_column_name)
test_ds = ClassifierDataset(test_encoded_df, label_column_name)

batch_size = int(len(train_ds) / 10)

train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_ds, shuffle=True)
print(f'{len(train_ds)} training records in with batch size {batch_size}, {len(test_ds)} records for test')

print(f'train has {train_ds.get_feature_count()} features')
print(f'test has {test_ds.get_feature_count()} features')




In [None]:
DROPOUT_RATE_01 = .20

# TODO have a config for layers and wire them from it

class TitanicSurvivalNeuralNetwork(nn.Module):
    def __init__(self, num_feature_columns):
        super().__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(num_feature_columns, 64),
            nn.GELU(),
            nn.Dropout(p=DROPOUT_RATE_01), 
            nn.Linear(64, 64),
            nn.GELU(),
            nn.Dropout(p=DROPOUT_RATE_01), 
            nn.Linear(64, 32),
            nn.GELU(),
            nn.Dropout(p=DROPOUT_RATE_01), 
            nn.Linear(32, 32),
            nn.GELU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.linear_relu_stack(x)


first_training_record, _ = train_ds[0]
num_features = first_training_record.shape[-1]
model = TitanicSurvivalNeuralNetwork( num_features)
print(model)

In [None]:

first_training_record, _ = train_ds[0]
num_feature_columns = first_training_record.shape[-1]
model = TitanicSurvivalNeuralNetwork( num_feature_columns )

training_mgr = TrainingManager(model)
training_mgr.train(train_dataloader, 100)



In [None]:

training_mgr.eval(test_dataloader)
