## Imports & helper functions

In [53]:
import torch
from torch import nn
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from timeit import default_timer as timer
from tqdm.notebook import tqdm
from torchinfo import summary
from pathlib import Path
import torch.nn.utils.prune as prune
import torch.nn.functional as F
from ptflops import get_model_complexity_info
import psutil
from model import SmokersBinaryClassification
from utils import *

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [54]:
# Calculate accuracy
def accuracy_fn(y_pred, y_true):
    # print(f'prediction {y_pred} true: {y_true} ')
    correct = torch.eq(y_true, y_pred).sum().item()
    return (correct / len(y_pred)) * 100

In [55]:
def print_train_time(start: float, end: float, device: torch.device = None):
    """Prints difference between start and end time.
    """
    total_time = end - start
    print(f'Train time on {device}: {total_time:.3f} seconds')
    # return total_time

In [56]:
def training_loop(model, X_train, y_train, X_test, y_test, epochs, loss_fn, optimizer):
    X_train, y_train, X_test, y_test = X_train.to(device), y_train.to(device), X_test.to(device), y_test.to(device)

    start_time = timer()

    for epoch in tqdm(range(epochs), desc='Training...'):
        model.train()

        # 1. Forward passs
        y_hat = model(X_train)
        loss = loss_fn(y_hat, y_train)    

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Calculate prediction accuracy
        y_train_pred = torch.round(torch.sigmoid(y_hat))        #if not using sigmoid on model output
        # y_train_pred = torch.round(y_hat)                     #prediction probabilities -> prediction labels
        acc = accuracy_fn(y_pred=y_train_pred, y_true=y_train)

        ### Testing
        model.eval()
        with torch.inference_mode():
            y_hat = model(X_test)
            test_loss = loss_fn(y_hat, y_test)

            y_test_pred = torch.round(torch.sigmoid(y_hat))
            # y_test_pred = torch.round(y_hat)
            test_acc = accuracy_fn(y_pred=y_test_pred, y_true=y_test)

        if epoch % 50 == 0 and epoch != 0             :
            print(f'Epoch: {epoch:04d} | Loss: {loss:.4f}, Acc: {acc:.2f}% | Test Loss: {test_loss:.4f}, Test Acc: {test_acc:.2f}%')

    end_time = timer()
    return end_time - start_time, acc, test_acc
  

1. Initialize model
2. Save parameter values
3. Train network
4. Prune
5. Restore values from step 2 to pruned network
6. Repeat steps 3 - 5

## Data load and prep

In [57]:
dataset = pd.read_csv('dataset/smoking_driking_dataset_Ver01.csv')

In [58]:
dataset = dataset[dataset['SMK_stat_type_cd'] != 2]

In [59]:
dataset['sex'] = dataset['sex'].replace('Male', 0.0)
dataset['sex'] = dataset['sex'].replace('Female', 1.0)

In [60]:
dataset['SMK_stat_type_cd'] = dataset['SMK_stat_type_cd'].apply(lambda x: 1.0 if x == 3.0 else 0.0) # Smokers: 1 non smokers: 0

In [61]:
# Remove some input features from dataset
dataset = dataset.drop(['waistline','sight_left','sight_right','hear_left','hear_right','urine_protein','DRK_YN'], axis=1)

In [62]:
print(f'Number of smokers in the dataset: {len(dataset[dataset["SMK_stat_type_cd"]==1])}')
print(f'Number of non-smokers in the dataset: {len(dataset[dataset["SMK_stat_type_cd"]==0])}')

Number of smokers in the dataset: 213954
Number of non-smokers in the dataset: 602441


In [63]:
print(f'Number of smokers in the dataset: {len(dataset[dataset["SMK_stat_type_cd"]==1])}')
print(f'Number of non-smokers in the dataset: {len(dataset[dataset["SMK_stat_type_cd"]==0])}')

Number of smokers in the dataset: 213954
Number of non-smokers in the dataset: 602441


In [64]:
# Construct pandas dataframes for X and y values
df_X = dataset.drop(['SMK_stat_type_cd'], axis=1)
df_y = pd.DataFrame(dataset['SMK_stat_type_cd'])

In [65]:
# Train Test Split, Set X, y
X_train, X_test, y_train, y_test = train_test_split(df_X.to_numpy(), df_y.to_numpy(), test_size=0.2, random_state=1)

# Convert X features to float tensors
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)

# Conver y labels to float tensors
y_train = torch.FloatTensor(y_train)
y_test = torch.FloatTensor(y_test)

## Training

In [66]:
model = SmokersBinaryClassification(input_features=16).to(device)

In [67]:
original_params = save_parameters(model)

In [68]:
# loss_fn = torch.nn.BCELoss().to(device)
loss_fn = torch.nn.BCEWithLogitsLoss().to(device)
# optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
optimizer= torch.optim.Adam(model.parameters(), lr=0.01)

In [69]:
for i in range(10):
    print(summary(model))
    time, train_acc, test_acc = training_loop(model, X_train, y_train, X_test, y_test, 400, loss_fn, optimizer)
    print(f'Iteration {i} training accuracy: {train_acc} | testing accuracy: {test_acc}')
    prune.l1_unstructured(model.stack[0], name='weight', amount=0.3)
    prune.l1_unstructured(model.stack[2], name='weight', amount=0.3)   
    restore_original_parameters(original_params, model.state_dict())

Layer (type:depth-idx)                   Param #
SmokersBinaryClassification              --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       1,088
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       4,160
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       65
Total params: 5,313
Trainable params: 5,313
Non-trainable params: 0


Training...:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 0050 | Loss: 0.5003, Acc: 76.42% | Test Loss: 0.4920, Test Acc: 76.87%
Epoch: 0100 | Loss: 0.4415, Acc: 78.36% | Test Loss: 0.4381, Test Acc: 78.55%
Epoch: 0150 | Loss: 0.4214, Acc: 79.45% | Test Loss: 0.4192, Test Acc: 79.51%
Epoch: 0200 | Loss: 0.4929, Acc: 74.80% | Test Loss: 0.3985, Test Acc: 80.42%
Epoch: 0250 | Loss: 0.3916, Acc: 80.56% | Test Loss: 0.4061, Test Acc: 79.93%
Epoch: 0300 | Loss: 0.3680, Acc: 81.70% | Test Loss: 0.3666, Test Acc: 81.75%
Epoch: 0350 | Loss: 0.3666, Acc: 81.76% | Test Loss: 0.3626, Test Acc: 81.96%
Iteration 0 training accuracy: 81.65578549599152 | testing accuracy: 81.53712357376025
Layer (type:depth-idx)                   Param #
SmokersBinaryClassification              --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       781
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       2,931
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       65
Total 

Training...:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 0050 | Loss: 0.3774, Acc: 80.44% | Test Loss: 0.3741, Test Acc: 80.59%
Epoch: 0100 | Loss: 0.3680, Acc: 81.18% | Test Loss: 0.3661, Test Acc: 81.22%
Epoch: 0150 | Loss: 0.3657, Acc: 81.45% | Test Loss: 0.3638, Test Acc: 81.48%
Epoch: 0200 | Loss: 0.3638, Acc: 81.65% | Test Loss: 0.3619, Test Acc: 81.67%
Epoch: 0250 | Loss: 0.3623, Acc: 81.82% | Test Loss: 0.3604, Test Acc: 81.94%
Epoch: 0300 | Loss: 0.3612, Acc: 81.93% | Test Loss: 0.3593, Test Acc: 82.03%
Epoch: 0350 | Loss: 0.3604, Acc: 82.02% | Test Loss: 0.3586, Test Acc: 82.13%
Iteration 1 training accuracy: 82.08572443486302 | testing accuracy: 82.21387931087281
Layer (type:depth-idx)                   Param #
SmokersBinaryClassification              --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       566
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       2,071
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       65
Total 

Training...:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 0050 | Loss: 0.3697, Acc: 81.23% | Test Loss: 0.3673, Test Acc: 81.34%
Epoch: 0100 | Loss: 0.3643, Acc: 81.73% | Test Loss: 0.3626, Test Acc: 81.69%
Epoch: 0150 | Loss: 0.3627, Acc: 81.83% | Test Loss: 0.3611, Test Acc: 81.82%
Epoch: 0200 | Loss: 0.3616, Acc: 81.90% | Test Loss: 0.3600, Test Acc: 81.93%
Epoch: 0250 | Loss: 0.3606, Acc: 81.96% | Test Loss: 0.3590, Test Acc: 82.01%
Epoch: 0300 | Loss: 0.3596, Acc: 82.05% | Test Loss: 0.3581, Test Acc: 82.06%
Epoch: 0350 | Loss: 0.3585, Acc: 82.18% | Test Loss: 0.3573, Test Acc: 82.21%
Iteration 2 training accuracy: 82.24710464909755 | testing accuracy: 82.30329681097997
Layer (type:depth-idx)                   Param #
SmokersBinaryClassification              --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       415
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       1,469
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       65
Total 

Training...:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 0050 | Loss: 0.3643, Acc: 81.55% | Test Loss: 0.3617, Test Acc: 81.69%
Epoch: 0100 | Loss: 0.3623, Acc: 81.73% | Test Loss: 0.3595, Test Acc: 81.89%
Epoch: 0150 | Loss: 0.3606, Acc: 81.93% | Test Loss: 0.3579, Test Acc: 82.09%
Epoch: 0200 | Loss: 0.3597, Acc: 82.02% | Test Loss: 0.3572, Test Acc: 82.21%
Epoch: 0250 | Loss: 0.3591, Acc: 82.08% | Test Loss: 0.3567, Test Acc: 82.25%
Epoch: 0300 | Loss: 0.3587, Acc: 82.12% | Test Loss: 0.3563, Test Acc: 82.30%
Epoch: 0350 | Loss: 0.3583, Acc: 82.17% | Test Loss: 0.3559, Test Acc: 82.31%
Iteration 3 training accuracy: 82.20392702062115 | testing accuracy: 82.35229270144967
Layer (type:depth-idx)                   Param #
SmokersBinaryClassification              --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       310
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       1,047
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       65
Total 

Training...:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 0050 | Loss: 0.3691, Acc: 81.28% | Test Loss: 0.3656, Test Acc: 81.40%
Epoch: 0100 | Loss: 0.3649, Acc: 81.57% | Test Loss: 0.3619, Test Acc: 81.74%
Epoch: 0150 | Loss: 0.3640, Acc: 81.64% | Test Loss: 0.3611, Test Acc: 81.76%
Epoch: 0200 | Loss: 0.3635, Acc: 81.69% | Test Loss: 0.3605, Test Acc: 81.81%
Epoch: 0250 | Loss: 0.3631, Acc: 81.71% | Test Loss: 0.3603, Test Acc: 81.83%
Epoch: 0300 | Loss: 0.3627, Acc: 81.72% | Test Loss: 0.3600, Test Acc: 81.80%
Epoch: 0350 | Loss: 0.3610, Acc: 81.91% | Test Loss: 0.3581, Test Acc: 82.07%
Iteration 4 training accuracy: 81.95083262391367 | testing accuracy: 82.1311987457052
Layer (type:depth-idx)                   Param #
SmokersBinaryClassification              --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       236
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       752
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       65
Total par

Training...:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 0050 | Loss: 0.3669, Acc: 81.56% | Test Loss: 0.3637, Test Acc: 81.76%
Epoch: 0100 | Loss: 0.3643, Acc: 81.89% | Test Loss: 0.3613, Test Acc: 82.02%
Epoch: 0150 | Loss: 0.3636, Acc: 81.90% | Test Loss: 0.3607, Test Acc: 82.04%
Epoch: 0200 | Loss: 0.3632, Acc: 81.95% | Test Loss: 0.3602, Test Acc: 82.08%
Epoch: 0250 | Loss: 0.3627, Acc: 81.97% | Test Loss: 0.3598, Test Acc: 82.10%
Epoch: 0300 | Loss: 0.3621, Acc: 82.00% | Test Loss: 0.3592, Test Acc: 82.11%
Epoch: 0350 | Loss: 0.3614, Acc: 82.04% | Test Loss: 0.3585, Test Acc: 82.14%
Iteration 5 training accuracy: 82.00932146816186 | testing accuracy: 82.21265441361106
Layer (type:depth-idx)                   Param #
SmokersBinaryClassification              --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       184
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       546
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       65
Total pa

Training...:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 0050 | Loss: 0.3777, Acc: 80.45% | Test Loss: 0.3745, Test Acc: 80.61%
Epoch: 0100 | Loss: 0.3770, Acc: 80.52% | Test Loss: 0.3739, Test Acc: 80.67%
Epoch: 0150 | Loss: 0.3767, Acc: 80.55% | Test Loss: 0.3736, Test Acc: 80.70%
Epoch: 0200 | Loss: 0.3765, Acc: 80.58% | Test Loss: 0.3734, Test Acc: 80.71%
Epoch: 0250 | Loss: 0.3763, Acc: 80.62% | Test Loss: 0.3732, Test Acc: 80.73%
Epoch: 0300 | Loss: 0.3761, Acc: 80.65% | Test Loss: 0.3729, Test Acc: 80.74%
Epoch: 0350 | Loss: 0.3757, Acc: 80.68% | Test Loss: 0.3726, Test Acc: 80.79%
Iteration 6 training accuracy: 80.70572455735275 | testing accuracy: 80.79544828177536
Layer (type:depth-idx)                   Param #
SmokersBinaryClassification              --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       148
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       401
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       65
Total pa

Training...:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 0050 | Loss: 0.3755, Acc: 80.70% | Test Loss: 0.3724, Test Acc: 80.83%
Epoch: 0100 | Loss: 0.3753, Acc: 80.72% | Test Loss: 0.3722, Test Acc: 80.80%
Epoch: 0150 | Loss: 0.3753, Acc: 80.73% | Test Loss: 0.3722, Test Acc: 80.81%
Epoch: 0200 | Loss: 0.3752, Acc: 80.72% | Test Loss: 0.3721, Test Acc: 80.81%
Epoch: 0250 | Loss: 0.3752, Acc: 80.72% | Test Loss: 0.3721, Test Acc: 80.82%
Epoch: 0300 | Loss: 0.3751, Acc: 80.72% | Test Loss: 0.3720, Test Acc: 80.83%
Epoch: 0350 | Loss: 0.3751, Acc: 80.73% | Test Loss: 0.3720, Test Acc: 80.83%
Iteration 7 training accuracy: 80.72409801627889 | testing accuracy: 80.83097030236588
Layer (type:depth-idx)                   Param #
SmokersBinaryClassification              --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       123
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       300
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       65
Total pa

Training...:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 0050 | Loss: 0.3750, Acc: 80.73% | Test Loss: 0.3719, Test Acc: 80.84%
Epoch: 0100 | Loss: 0.3750, Acc: 80.73% | Test Loss: 0.3719, Test Acc: 80.85%
Epoch: 0150 | Loss: 0.3749, Acc: 80.74% | Test Loss: 0.3719, Test Acc: 80.85%
Epoch: 0200 | Loss: 0.3749, Acc: 80.73% | Test Loss: 0.3719, Test Acc: 80.86%
Epoch: 0250 | Loss: 0.3749, Acc: 80.73% | Test Loss: 0.3718, Test Acc: 80.87%
Epoch: 0300 | Loss: 0.3749, Acc: 80.73% | Test Loss: 0.3718, Test Acc: 80.88%
Epoch: 0350 | Loss: 0.3748, Acc: 80.74% | Test Loss: 0.3718, Test Acc: 80.86%
Iteration 8 training accuracy: 80.73175362416477 | testing accuracy: 80.86220518254031
Layer (type:depth-idx)                   Param #
SmokersBinaryClassification              --
├─Sequential: 1-1                        --
│    └─Linear: 2-1                       105
│    └─ReLU: 2-2                         --
│    └─Linear: 2-3                       229
│    └─ReLU: 2-4                         --
│    └─Linear: 2-5                       65
Total pa

Training...:   0%|          | 0/400 [00:00<?, ?it/s]

Epoch: 0050 | Loss: 0.3748, Acc: 80.73% | Test Loss: 0.3717, Test Acc: 80.88%
Epoch: 0100 | Loss: 0.3748, Acc: 80.73% | Test Loss: 0.3718, Test Acc: 80.86%
Epoch: 0150 | Loss: 0.3748, Acc: 80.73% | Test Loss: 0.3717, Test Acc: 80.87%
Epoch: 0200 | Loss: 0.3748, Acc: 80.73% | Test Loss: 0.3717, Test Acc: 80.86%
Epoch: 0250 | Loss: 0.3748, Acc: 80.73% | Test Loss: 0.3717, Test Acc: 80.86%
Epoch: 0300 | Loss: 0.3747, Acc: 80.74% | Test Loss: 0.3717, Test Acc: 80.86%
Epoch: 0350 | Loss: 0.3747, Acc: 80.74% | Test Loss: 0.3717, Test Acc: 80.86%
Iteration 9 training accuracy: 80.74094035362785 | testing accuracy: 80.86710477158728
