# Imports

In [1]:
import numpy as np
import pickle, os
from urllib.request import urlopen
from tqdm import tqdm

# Data

In [2]:
from datamodule import VLASSLoader

datamodule = VLASSLoader(root='./data/', batch_size=32, pin_memory=True)

# Train

## Model

In [None]:
import torch
from models.cnn import CNN
from lightning import Trainer

In [None]:
model = CNN(num_classes=4, input_shape=(1, 64, 64))

In [None]:
trainer = Trainer(max_epochs=2)
trainer.fit(model, datamodule)  

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060 Laptop GPU') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision


Loading data from file: ./data/vlass_data_array.p


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Loading labels from file: ./data/vlass_labels.p
61351 images, each of size 64 x 64 pixels.
There are 61351 corresponding labels - one category for each image.



   | Name     | Type        | Params | Mode 
--------------------------------------------------
0  | conv1    | Conv2d      | 750    | train
1  | bn1      | BatchNorm2d | 150    | train
2  | pool1    | MaxPool2d   | 0      | train
3  | conv2    | Conv2d      | 33.8 K | train
4  | dropout1 | Dropout     | 0      | train
5  | bn2      | BatchNorm2d | 100    | train
6  | pool2    | MaxPool2d   | 0      | train
7  | conv3    | Conv2d      | 11.3 K | train
8  | bn3      | BatchNorm2d | 50     | train
9  | pool3    | MaxPool2d   | 0      | train
10 | fc1      | Linear      | 819 K  | train
11 | dropout2 | Dropout     | 0      | train
12 | fc2      | Linear      | 2.1 K  | train
--------------------------------------------------
867 K     Trainable params
0         Non-trainable params
867 K     Total params
3.472     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

c:\Users\mi3se\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Val size: 12270
Train size: 49081


c:\Users\mi3se\AppData\Local\Programs\Python\Python310\lib\site-packages\lightning\pytorch\trainer\connectors\data_connector.py:425: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=19` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]



Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

`Trainer.fit` stopped: `max_epochs=2` reached.


## Evaluate

In [39]:
model = CNN.load_from_checkpoint(
    './outputs/cnn/lightning_logs/version_4/checkpoints/epoch=5-step=4596.ckpt'
).cpu() # GPU to CPU, cuda not needed during inference

In [43]:
import torch
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report

def test_model(model, dataloader):
    model.eval()
    with torch.no_grad():
        y_trues, y_preds = [], []
        
        for inputs, labels in tqdm(dataloader):
            outputs = model(inputs)
            predicted = torch.argmax(outputs, 1)
            
            y_trues.extend(labels.detach().cpu().numpy())
            y_preds.extend(predicted.detach().cpu().numpy())
            
    acc = accuracy_score(y_trues, y_preds)
    f1 = f1_score(y_trues, y_preds, average='macro')
    
    return {
        'accuracy': accuracy_score(y_trues, y_preds),
        'f1': f1_score(y_trues, y_preds, average='macro'),
        'confusion_matrix': confusion_matrix(y_trues, y_preds),
        'classification_report': classification_report(y_trues, y_preds, zero_division=0)
    }

In [46]:
result = test_model(model, datamodule.val_dataloader())

Val size: 12270


100%|██████████| 383/383 [00:10<00:00, 36.16it/s]


In [47]:
print(f'Accuracy = {result["accuracy"]}, F1 = {result["f1"]}')
print('Classification report:\n', result['classification_report'])
print('Confusion Matrix:\n', result['confusion_matrix'])

Accuracy = 0.8174771540469974, F1 = 0.6339822115663432
Classification report:
               precision    recall  f1-score   support

           0       0.74      0.95      0.83      2668
           1       0.94      0.93      0.93      5160
           2       0.72      0.83      0.77      3268
           3       0.00      0.00      0.00      1160

    accuracy                           0.82     12256
   macro avg       0.60      0.68      0.63     12256
weighted avg       0.75      0.82      0.78     12256

Confusion Matrix:
 [[2535   17  116    0]
 [  46 4774  340    0]
 [ 310  248 2710    0]
 [ 554   27  579    0]]
