In [1]:
from dml.utils import DiabetesDataset
from dml.model.dnn import DNN

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.loggers import WandbLogger

import pandas as pd

In [2]:
wandb = WandbLogger(project='dml')

In [3]:
AVAIL_GPUS = min(1, torch.cuda.device_count())
BATCH_SIZE = 256 if AVAIL_GPUS else 64

In [4]:
torch.manual_seed(42)

<torch._C.Generator at 0x23c6b726f50>

In [5]:
dataset = DiabetesDataset('../data/diabetes.csv')

train_len = int(len(dataset) * 0.8)
test_len = len(dataset) - train_len

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_len, test_len])
len(train_dataset), len(test_dataset)

(56553, 14139)

In [6]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE)

### Training the model

In [8]:
import os

def train_classifier():
    pl.seed_everything(42)
    
    root_dir = os.path.join('../dnnmodel', 'DiabetesModel')
    os.makedirs(root_dir, exist_ok=True)
    
    trainer = pl.Trainer(
        default_root_dir=root_dir,
        logger=wandb,
        callbacks=[ModelCheckpoint(save_weights_only=True, mode='max', monitor='val_acc')],
        gpus=AVAIL_GPUS,
        max_epochs=20,
        progress_bar_refresh_rate=0
    )
    
    trainer.logger._default_hp_metric = None
    pretrained_filename = os.path.join('../dnnmodel', 'DiabetesModelDNN.ckpt')
    
    if os.path.isfile(pretrained_filename):
        print('Found pretrained model, loading...')
        model = DNN.load_from_checkpoint(pretrained_filename)
    else:
        model = DNN(c_in=21, c_hidden=64, c_out=1, num_layers=3)
        trainer.fit(model, train_loader, val_loader)
        
        model = DNN.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
        
    train_result = trainer.test(model, test_dataloaders=train_loader, verbose=False)
    test_result = trainer.test(model, test_dataloaders=test_loader, verbose=False)
    
    result = {
        'test': test_result[0]['test_acc'],
        'train': train_result[0]['test_acc']
    }
    
    return model, result

In [9]:
model, result = train_classifier()

Global seed set to 42
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[34m[1mwandb[0m: Currently logged in as: [33mjbmed[0m (use `wandb login --relogin` to force relogin)



  | Name        | Type              | Params
--------------------------------------------------
0 | flatten     | Flatten           | 0     
1 | loss_module | BCEWithLogitsLoss | 0     
2 | layers      | ModuleList        | 9.7 K 
3 | head        | Sequential        | 65    
--------------------------------------------------
9.8 K     Trainable params
0         Non-trainable params
9.8 K     Total params
0.039     Total estimated model params size (MB)
  rank_zero_warn(
Global seed set to 42
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


In [10]:
result

{'test': 0.7504066824913025, 'train': 0.7543366551399231}