# Machine Learning - Image Classification

In [43]:
#importing
import os

import torch
import torch.nn as nn
import torch.nn.functional as fun


from dotenv import load_dotenv
from sklearn.model_selection import train_test_split
from torch import optim
from tqdm import tqdm

from utils import *

## Database importing

In [44]:
# TODO: pouzi toto martin
load_dotenv()
DAT_PATH = os.getenv("TRAIN_DATASET_PATH")

ANIMALS_DATAFRAME = load_dataset_info("../data/archive/raw-img")

In [45]:
train_df, test_df = train_test_split(ANIMALS_DATAFRAME.sample(frac=0.5, random_state=37), test_size=0.01, random_state=37)
train_df, val_df = train_test_split(train_df, test_size=0.0526, random_state=37)

In [55]:
#setting up dataloaders

TARGET_SIZE = (256, 256)
MAX_SIZE = 500
BATCH_SIZE = 128
NUM_CLASSES = ANIMALS_DATAFRAME.label.nunique()
CLASS_LABELS = {name: idx for idx, name in enumerate(np.sort(ANIMALS_DATAFRAME.label.unique()))}

# vypocet frekvencie augmentacie -> aby nebol model biasnuty iba na majoritne categorie obrazkov
counts = train_df.label.value_counts()
max_count = counts.max()
aug_strength = (max_count / counts).to_dict()

print(aug_strength)

train_gen = AnimalImageGenerator(
    df=train_df,
    batch_size=BATCH_SIZE,
    target_size=TARGET_SIZE,
    num_classes=NUM_CLASSES,
    augment=True,
    shuffle=True,
    aug_strength=aug_strength,
    max_size=MAX_SIZE,
    class_mapping=CLASS_LABELS,
)

test_gen = AnimalImageGenerator(
    df=test_df,
    batch_size=BATCH_SIZE,
    target_size=TARGET_SIZE,
    num_classes=NUM_CLASSES,
    augment=False,
    shuffle=False,
    max_size=MAX_SIZE,
    class_mapping=CLASS_LABELS,
)

val_gen = AnimalImageGenerator(
    df=val_df,
    batch_size=BATCH_SIZE,
    target_size=TARGET_SIZE,
    num_classes=NUM_CLASSES,
    augment=False,
    shuffle=False,
    max_size=MAX_SIZE,
    class_mapping=CLASS_LABELS,
)


{'cane': 1.0, 'ragno': 1.0594104308390022, 'gallina': 1.5934515688949522, 'cavallo': 1.9194741166803615, 'farfalla': 2.3524672708962737, 'mucca': 2.636568848758465, 'scoiattolo': 2.6485260770975056, 'pecora': 2.7257876312718787, 'gatto': 3.0817941952506596, 'elefante': 3.4505169867060563}
---
Total images: 12277
Num classes: 10
---
---
Total images: 131
Num classes: 10
---
---
Total images: 682
Num classes: 10
---


In [56]:
#testovaci vypis

images, labels = train_gen[0]     # zavolá __getitem__(0) pre prvy batch
print("Images shape:", images.shape)
print("Labels shape:", labels.shape)
print("Image dtype:", images.dtype)
print("Label sample:", labels[0])

  1%|          | 1/95 [00:00<01:08,  1.38it/s]

(202, 256, 256, 3) (202,)


  2%|▏         | 2/95 [00:01<01:08,  1.35it/s]

(206, 256, 256, 3) (206,)


  3%|▎         | 3/95 [00:02<01:15,  1.22it/s]

(208, 256, 256, 3) (208,)


  4%|▍         | 4/95 [00:03<01:09,  1.30it/s]

(196, 256, 256, 3) (196,)


  5%|▌         | 5/95 [00:03<01:12,  1.24it/s]

(196, 256, 256, 3) (196,)


  6%|▋         | 6/95 [00:04<01:07,  1.31it/s]

(199, 256, 256, 3) (199,)


  7%|▋         | 7/95 [00:05<01:03,  1.39it/s]

(193, 256, 256, 3) (193,)


  8%|▊         | 8/95 [00:05<01:01,  1.41it/s]

(204, 256, 256, 3) (204,)


  9%|▉         | 9/95 [00:06<01:03,  1.36it/s]

(218, 256, 256, 3) (218,)


 11%|█         | 10/95 [00:07<01:01,  1.39it/s]

(196, 256, 256, 3) (196,)


 12%|█▏        | 11/95 [00:08<00:59,  1.41it/s]

(204, 256, 256, 3) (204,)


 13%|█▎        | 12/95 [00:09<01:03,  1.30it/s]

(174, 256, 256, 3) (174,)


 14%|█▎        | 13/95 [00:09<01:03,  1.28it/s]

(190, 256, 256, 3) (190,)


 15%|█▍        | 14/95 [00:10<00:59,  1.35it/s]

(189, 256, 256, 3) (189,)


 16%|█▌        | 15/95 [00:11<01:01,  1.31it/s]

(186, 256, 256, 3) (186,)


 17%|█▋        | 16/95 [00:12<00:58,  1.34it/s]

(198, 256, 256, 3) (198,)


 18%|█▊        | 17/95 [00:12<00:55,  1.40it/s]

(192, 256, 256, 3) (192,)


 19%|█▉        | 18/95 [00:13<00:55,  1.39it/s]

(195, 256, 256, 3) (195,)


 20%|██        | 19/95 [00:14<00:55,  1.38it/s]

(199, 256, 256, 3) (199,)


 21%|██        | 20/95 [00:15<01:02,  1.19it/s]

(196, 256, 256, 3) (196,)


 22%|██▏       | 21/95 [00:15<01:00,  1.22it/s]

(221, 256, 256, 3) (221,)


 23%|██▎       | 22/95 [00:16<00:58,  1.24it/s]

(212, 256, 256, 3) (212,)


 24%|██▍       | 23/95 [00:17<00:53,  1.34it/s]

(177, 256, 256, 3) (177,)


 25%|██▌       | 24/95 [00:18<00:51,  1.38it/s]

(190, 256, 256, 3) (190,)


 26%|██▋       | 25/95 [00:18<00:49,  1.40it/s]

(186, 256, 256, 3) (186,)


 27%|██▋       | 26/95 [00:19<00:49,  1.40it/s]

(200, 256, 256, 3) (200,)


 28%|██▊       | 27/95 [00:20<00:50,  1.33it/s]

(204, 256, 256, 3) (204,)


 29%|██▉       | 28/95 [00:20<00:49,  1.37it/s]

(203, 256, 256, 3) (203,)


 31%|███       | 29/95 [00:21<00:47,  1.40it/s]

(186, 256, 256, 3) (186,)


 32%|███▏      | 30/95 [00:22<00:45,  1.41it/s]

(189, 256, 256, 3) (189,)


 33%|███▎      | 31/95 [00:23<00:45,  1.41it/s]

(197, 256, 256, 3) (197,)


 34%|███▎      | 32/95 [00:23<00:44,  1.41it/s]

(198, 256, 256, 3) (198,)


 35%|███▍      | 33/95 [00:24<00:43,  1.42it/s]

(202, 256, 256, 3) (202,)


 36%|███▌      | 34/95 [00:25<00:50,  1.21it/s]

(215, 256, 256, 3) (215,)


 37%|███▋      | 35/95 [00:26<00:46,  1.30it/s]

(183, 256, 256, 3) (183,)


 37%|███▋      | 35/95 [00:26<00:45,  1.32it/s]


KeyboardInterrupt: 

## Model implemetation

In [48]:
#PRE TUTO FUNKCIU NEGENERUJ ZIADNE KOMENTARE
class ImageClassifier(nn.Module):
    def __init__(self, classes: int):
        super(ImageClassifier, self).__init__()
        self.numberOfClasses = classes

        # Convolution layres ONLY WORKS with RGB because of in_channels, kernel_size for filtering is 3 stride 1 padding 1 for size preservation
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1,padding=1)
        self.bn1 = nn.BatchNorm2d(16)

        # Significant for grad-CAM
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1,padding=1)
        self.bn2 = nn.BatchNorm2d(32)

        #
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1,padding=1)
        self.bn3 = nn.BatchNorm2d(64)

        self.conv4 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1,padding=1)
        self.bn4 = nn.BatchNorm2d(128)

        self.conv5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1,padding=1)
        self.bn5 = nn.BatchNorm2d(256)
        # Adaptive pooling to make model input-size agnostic / dont want to use it for now
        # self.adaptive_pool = nn.AdaptiveAvgPool2d((4, 4))

        #size is determined by conv channels and the reduction in size by conv channels
        #channels * width * height because 256 /2 /2 /2 /2 /2 is 8
        self.fc1 = nn.Linear(in_features=256*8*8, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=self.numberOfClasses)

    def forward(self, x):
        #Block 1
        x = fun.relu(self.bn1(self.conv1(x)))
        x = fun.max_pool2d(x, kernel_size=2) # zmensovanie velkosti

        x = fun.relu(self.bn2(self.conv2(x)))
        x = fun.max_pool2d(x, kernel_size=2)

        x = fun.relu(self.bn3(self.conv3(x)))
        x = fun.max_pool2d(x, kernel_size=2)

        x = fun.relu(self.bn4(self.conv4(x)))
        x = fun.max_pool2d(x, kernel_size=2)

        x = fun.relu(self.bn5(self.conv5(x)))
        x = fun.max_pool2d(x, kernel_size=2)

        x = x.reshape(x.size(0), -1)

        x = fun.relu(self.fc1(x))
        x = self.fc2(x)
        return x


### Grad CAM Implementation
 -- On hold
 https://medium.com/@codetrade/grad-cam-in-pytorch-a-powerful-tool-for-visualize-explanations-from-deep-networks-bdc7caf0b282



## Model Training

### Training function

In [49]:
def train_model(model: nn.Module,
                train_loader: AnimalImageGenerator,
                val_loader: AnimalImageGenerator,
                criterion: nn.Module,
                optimizer: torch.optim.Optimizer,
                device: torch.device,
                epochs: int = 10,
                scheduler=None):

    model.to(device)

    for epoch in range(1, epochs+1):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in tqdm(train_loader, desc="Training"):
            images = torch.from_numpy(images).permute(0,3,1,2).float().to(device) # permute because the convolution layer gets input in the wrong order
            # images = torch.tensor(images, dtype=torch.float32, device=device)
            labels = torch.tensor(labels, dtype=torch.long, device=device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / total
        train_acc = correct / total

        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images = torch.tensor(images, dtype=torch.float32, device=device)
                labels = torch.tensor(labels, dtype=torch.long, device=device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * images.size(0)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_loss /= val_total
        val_acc = val_correct / val_total

        if scheduler:
            scheduler.step(val_loss)

        print(f"Epoch {epoch}/{epochs} | "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        ## TODO: add f1 score

### Training

In [50]:
img_class_model = ImageClassifier(NUM_CLASSES)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(img_class_model.parameters(), lr=0.001, momentum=0.9)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")

train_model(model=img_class_model,
            train_loader=train_gen,
            val_loader=val_gen,
            criterion=criterion,
            optimizer=optimizer,
            device=device,
            epochs=1,
            scheduler=None)


device: cuda


Training:   0%|          | 0/95 [00:00<?, ?it/s]


RuntimeError: mat1 and mat2 shapes cannot be multiplied (189x4096 and 16384x128)

### Hyper parameter sweeping