In [None]:

%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd
from torch.utils.data import DataLoader, random_split

from modules.classifer_utils import NormalizedClassifierDataset, NormalizedClassifierDatasetMetadata, TrainingManager, GeneralNN


In [None]:

file_url = "http://storage.googleapis.com/download.tensorflow.org/data/heart.csv"
heart_csv_df = pd.read_csv(file_url)

heart_csv_df.head()

In [None]:

label_column="target"
ds_meta = NormalizedClassifierDatasetMetadata(label_column)
ds_meta.set_categorical_map({
    col : list(heart_csv_df[col].unique()) for col in ['thal']
})


numeric_cols = heart_csv_df.select_dtypes(include=np.number).columns.tolist()
numeric_cols.remove(label_column)
ds_meta.ordinal_numeric_cols = numeric_cols
overall_ds = NormalizedClassifierDataset(heart_csv_df, ds_meta)


train_ds, test_ds = random_split(overall_ds, [.75, .25])

batch_size = int(len(train_ds) / 10)
train_dataloader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, drop_last=True)
test_dataloader = DataLoader(test_ds, batch_size=batch_size, shuffle=True, drop_last=True)

print(f'{len(train_ds)} training records in with batch size {batch_size}, {len(test_ds)} records for test')

first_training_record, _ = train_ds[0]
num_features = first_training_record.shape[-1]
print(f'datasets have {num_features} features')


In [None]:

dropoutRate = 0.2

first_training_record, _ = train_ds[0]
input_features = first_training_record.shape[-1]

model = GeneralNN( input_features, [32,16,1], dropoutRate )
print(model)

In [None]:

training_mgr = TrainingManager(model)
training_mgr.train(train_dataloader, 100)

In [None]:

training_mgr.eval(test_dataloader)