In [None]:

%load_ext autoreload
%autoreload 2

import pandas as pd
from torch.utils.data import DataLoader

from modules.classifer_utils import NormalizedClassifierDataset, NormalizedClassifierDatasetMetadata, TrainingManager, GeneralNN


In [None]:
# Load the Titanic dataset.
titanic_train_csv_df = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/train.csv")
titanic_test_csv_df = pd.read_csv("https://storage.googleapis.com/tf-datasets/titanic/eval.csv")
print(f'titanic_train_csv_df shape: {titanic_train_csv_df.shape}')
print(f'titanic_test_csv_df shape: {titanic_test_csv_df.shape}')

display(titanic_test_csv_df.head())


## now apply some metadata and create train and test loaders

encoded_train_df, encoded_test_df = titanic_train_csv_df.copy(), titanic_test_csv_df.copy()
union_df = pd.concat([encoded_train_df, encoded_test_df])


label_column_name = "survived"
ds_meta = NormalizedClassifierDatasetMetadata(label_column_name)

ds_meta.set_categorical_map({
    col : list(union_df[col].unique()) for col in ['sex', 'embark_town', 'alone']
})

ds_meta.set_ordinal_map({
    "class" : ['First', 'Second', 'Third'],
    "deck": ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'unknown']
})
ds_meta.set_ordinal_numeric_cols( ["age", "n_siblings_spouses", "parch", "fare"] )


train_ds = NormalizedClassifierDataset(encoded_train_df, ds_meta)
test_ds = NormalizedClassifierDataset(encoded_test_df, ds_meta)

batch_size = int(len(train_ds) / 20)

train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_ds, shuffle=True, drop_last=True)
print(f'{len(train_ds)} training records in with batch size {batch_size}, {len(test_ds)} records for test')

print(f'train has {train_ds.get_feature_count()} features')
print(f'test has {test_ds.get_feature_count()} features')


In [None]:
dropoutRate = 0.2
first_training_record, _ = train_ds[0]
input_features = first_training_record.shape[-1]
model = GeneralNN( input_features, [32,32,16,8,1], dropoutRate )

print(model)


In [None]:

training_mgr = TrainingManager(model)
training_mgr.train(train_dataloader, 100)


In [None]:

training_mgr.eval(test_dataloader)
