# Machine Learning - Image Classification

In [6]:
#importing
import os

import cv2
import torch
import torch.nn as nn
import torch.nn.functional as fun
from torch import Tensor


from dotenv import load_dotenv
from sklearn.model_selection import train_test_split
from torch import optim
from tqdm import tqdm

from utils import *

## Database importing

In [2]:
load_dotenv()
DAT_PATH = os.getenv("TRAIN_DATASET_PATH")

ANIMALS_DATAFRAME = load_dataset_info("../data/archive/raw-img")

In [3]:
train_df, test_df = train_test_split(ANIMALS_DATAFRAME, test_size=0.05, random_state=37)
train_df, val_df = train_test_split(train_df, test_size=0.0526, random_state=37)

In [4]:
TARGET_SIZE = (256, 256)
MAX_SIZE = 500
BATCH_SIZE = 64
NUM_CLASSES = ANIMALS_DATAFRAME.label.nunique()
CLASS_LABELS = {name: idx for idx, name in enumerate(np.sort(ANIMALS_DATAFRAME.label.unique()))}

# vypocet frekvencie augmentacie -> aby nebol model biasnuty iba na majoritne categorie obrazkov
counts = train_df.label.value_counts()
max_count = counts.max()
aug_strength = (max_count / counts).to_dict()

print(aug_strength)


{'cane': 1.0, 'ragno': 1.0809676623056035, 'gallina': 1.5667262969588551, 'cavallo': 1.8399159663865545, 'mucca': 2.606547619047619, 'scoiattolo': 2.615890083632019, 'farfalla': 2.9748641304347827, 'pecora': 3.3684615384615384, 'gatto': 4.047134935304991, 'elefante': 4.4912820512820515}


In [5]:
#setting up dataloaders#
#   POMALE
#
# train_gen = AnimalImageGenerator(
#     df=train_df,
#     batch_size=BATCH_SIZE,
#     target_size=TARGET_SIZE,
#     num_classes=NUM_CLASSES,
#     augment=True,
#     shuffle=True,
#     aug_strength=aug_strength,
#     max_size=MAX_SIZE,
#     class_mapping=CLASS_LABELS,
# )
#
# test_gen = AnimalImageGenerator(
#     df=test_df,
#     batch_size=BATCH_SIZE,
#     target_size=TARGET_SIZE,
#     num_classes=NUM_CLASSES,
#     augment=False,
#     shuffle=False,
#     max_size=MAX_SIZE,
#     class_mapping=CLASS_LABELS,
# )
#
# val_gen = AnimalImageGenerator(
#     df=val_df,
#     batch_size=BATCH_SIZE,
#     target_size=TARGET_SIZE,
#     num_classes=NUM_CLASSES,
#     augment=False,
#     shuffle=False,
#     max_size=MAX_SIZE,
#     class_mapping=CLASS_LABELS,
# )


In [6]:
from torch.utils.data import WeightedRandomSampler


#CHATOVINA ale ze vraj rychla

def create_sampler(df, class_mapping):
    labels = df["label"].map(class_mapping).values
    class_counts = np.bincount(labels)
    class_weights = 1.0 / class_counts

    sample_weights = class_weights[labels]
    sampler = WeightedRandomSampler(
        torch.from_numpy(sample_weights).float(),
        num_samples=len(sample_weights),
        replacement=True
    )
    return sampler

train_dataset = AnimalDataset(
    df=train_df,
    class_mapping=CLASS_LABELS,
    augment=True,
    target_size=TARGET_SIZE,
)

val_dataset = AnimalDataset(
    df=val_df,
    class_mapping=CLASS_LABELS,
    augment=False,
    target_size=TARGET_SIZE,
)

sampler = create_sampler(train_df, CLASS_LABELS)

train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    sampler=sampler,
    num_workers=4,
    pin_memory=True,
    persistent_workers=True
)

val_loader = torch.utils.data.DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=4,
    pin_memory=True,
    persistent_workers=True
)


## Model implemetation

In [7]:
#PRE TUTO FUNKCIU NEGENERUJ ZIADNE KOMENTARE
class ImageClassifier(nn.Module):
    def __init__(self, classes: int):
        super(ImageClassifier, self).__init__()
        self.numberOfClasses = classes

        # Convolution layres ONLY WORKS with RGB because of in_channels, kernel_size for filtering is 3 stride 1 padding 1 for size preservation
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1,padding=1)
        self.bn1 = nn.BatchNorm2d(16)

        # Significant for grad-CAM
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1,padding=1)
        self.bn2 = nn.BatchNorm2d(32)

        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1,padding=1)
        self.bn3 = nn.BatchNorm2d(64)

        # Adaptive pooling to make model input-size agnostic / dont want to use it for now
        # self.adaptive_pool = nn.AdaptiveAvgPool2d((4, 4))

        #size is determined by conv channels and the reduction in size by conv channels
        #channels * width * height because 256 /2 /2 /2 is 8
        self.fc1 = nn.Linear(in_features=64*32*32, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=self.numberOfClasses)

    def forward(self, x):
        #Block 1
        x = fun.leaky_relu(self.bn1(self.conv1(x)))
        x = fun.max_pool2d(x, kernel_size=2) # zmensovanie velkosti

        x = fun.leaky_relu(self.bn2(self.conv2(x)))
        x = fun.max_pool2d(x, kernel_size=2)

        x = fun.leaky_relu(self.bn3(self.conv3(x)))
        x = fun.max_pool2d(x, kernel_size=2)

        x = x.reshape(x.size(0), -1)

        x = fun.leaky_relu(self.fc1(x))
        x = self.fc2(x)
        return x


## Model Training

### Training function

In [8]:
def train_model(model: nn.Module,
                train_loader: AnimalImageGenerator,
                val_loader: AnimalImageGenerator,
                criterion: nn.Module,
                optimizer: torch.optim.Optimizer,
                device: torch.device,
                epochs: int = 10,
                scheduler=None):

    model.to(device)

    for epoch in tqdm(range(1, epochs+1), desc="Training model"):
    # for epoch in range(1, epochs+1):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        # for images, labels in train_loader:
        for images, labels in train_loader:
            images = images.float().to(device)
            labels = labels.long().to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * images.size(0)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / total
        train_acc = correct / total

        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images = images.float().to(device)
                labels = labels.long().to(device)

                outputs = model(images)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * images.size(0)
                _, preds = torch.max(outputs, 1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)

        val_loss /= val_total
        val_acc = val_correct / val_total

        if scheduler:
            scheduler.step(val_loss)

        print(f"Epoch {epoch}/{epochs} | "
              f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f} | "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        ## TODO: add f1 score

## Training

### Set-up

In [9]:
img_class_model = ImageClassifier(NUM_CLASSES)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(img_class_model.parameters(), lr=0.001, momentum=0.9)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")


device: cuda


In [10]:
# TRAINING
train_model(model=img_class_model,
            train_loader=train_loader,
            val_loader=val_loader,
            criterion=criterion,
            optimizer=optimizer,
            device=device,
            epochs=50,
            scheduler=None)

Training model:   2%|▏         | 1/50 [01:11<58:43, 71.90s/it]

Epoch 1/50 | Train Loss: 1.9547, Train Acc: 0.3053 | Val Loss: 1.6509, Val Acc: 0.4041


Training model:   4%|▍         | 2/50 [01:35<34:40, 43.35s/it]

Epoch 2/50 | Train Loss: 1.7794, Train Acc: 0.3697 | Val Loss: 1.6030, Val Acc: 0.4355


Training model:   6%|▌         | 3/50 [01:58<26:49, 34.25s/it]

Epoch 3/50 | Train Loss: 1.7072, Train Acc: 0.3959 | Val Loss: 1.5181, Val Acc: 0.4430


Training model:   8%|▊         | 4/50 [02:22<22:58, 29.97s/it]

Epoch 4/50 | Train Loss: 1.6601, Train Acc: 0.4165 | Val Loss: 1.6030, Val Acc: 0.4149


Training model:  10%|█         | 5/50 [02:45<20:43, 27.62s/it]

Epoch 5/50 | Train Loss: 1.6066, Train Acc: 0.4328 | Val Loss: 1.5260, Val Acc: 0.4711


Training model:  12%|█▏        | 6/50 [03:09<19:13, 26.21s/it]

Epoch 6/50 | Train Loss: 1.5791, Train Acc: 0.4436 | Val Loss: 1.4121, Val Acc: 0.4901


Training model:  14%|█▍        | 7/50 [03:32<18:08, 25.32s/it]

Epoch 7/50 | Train Loss: 1.5492, Train Acc: 0.4571 | Val Loss: 1.4020, Val Acc: 0.4959


Training model:  16%|█▌        | 8/50 [03:55<17:18, 24.72s/it]

Epoch 8/50 | Train Loss: 1.5118, Train Acc: 0.4698 | Val Loss: 1.4242, Val Acc: 0.4868


Training model:  18%|█▊        | 9/50 [04:19<16:37, 24.34s/it]

Epoch 9/50 | Train Loss: 1.5030, Train Acc: 0.4737 | Val Loss: 1.4983, Val Acc: 0.4843


Training model:  20%|██        | 10/50 [04:42<16:02, 24.06s/it]

Epoch 10/50 | Train Loss: 1.4788, Train Acc: 0.4810 | Val Loss: 1.3499, Val Acc: 0.5207


Training model:  22%|██▏       | 11/50 [05:06<15:34, 23.96s/it]

Epoch 11/50 | Train Loss: 1.4647, Train Acc: 0.4832 | Val Loss: 1.3592, Val Acc: 0.5198


Training model:  24%|██▍       | 12/50 [05:30<15:08, 23.91s/it]

Epoch 12/50 | Train Loss: 1.4320, Train Acc: 0.4991 | Val Loss: 1.3036, Val Acc: 0.5388


Training model:  26%|██▌       | 13/50 [05:54<14:43, 23.89s/it]

Epoch 13/50 | Train Loss: 1.4206, Train Acc: 0.5008 | Val Loss: 1.3549, Val Acc: 0.5157


Training model:  28%|██▊       | 14/50 [06:18<14:18, 23.85s/it]

Epoch 14/50 | Train Loss: 1.4085, Train Acc: 0.5052 | Val Loss: 1.2835, Val Acc: 0.5488


Training model:  30%|███       | 15/50 [06:41<13:53, 23.82s/it]

Epoch 15/50 | Train Loss: 1.3848, Train Acc: 0.5180 | Val Loss: 1.2863, Val Acc: 0.5496


Training model:  32%|███▏      | 16/50 [07:05<13:30, 23.83s/it]

Epoch 16/50 | Train Loss: 1.3782, Train Acc: 0.5178 | Val Loss: 1.2991, Val Acc: 0.5438


Training model:  34%|███▍      | 17/50 [07:29<13:06, 23.82s/it]

Epoch 17/50 | Train Loss: 1.3710, Train Acc: 0.5195 | Val Loss: 1.2772, Val Acc: 0.5496


Training model:  36%|███▌      | 18/50 [07:53<12:42, 23.82s/it]

Epoch 18/50 | Train Loss: 1.3537, Train Acc: 0.5297 | Val Loss: 1.2086, Val Acc: 0.5645


Training model:  38%|███▊      | 19/50 [08:17<12:18, 23.82s/it]

Epoch 19/50 | Train Loss: 1.3340, Train Acc: 0.5341 | Val Loss: 1.2426, Val Acc: 0.5661


Training model:  40%|████      | 20/50 [08:40<11:54, 23.81s/it]

Epoch 20/50 | Train Loss: 1.3200, Train Acc: 0.5426 | Val Loss: 1.2272, Val Acc: 0.5645


Training model:  42%|████▏     | 21/50 [09:04<11:30, 23.80s/it]

Epoch 21/50 | Train Loss: 1.3209, Train Acc: 0.5372 | Val Loss: 1.1796, Val Acc: 0.5736


Training model:  44%|████▍     | 22/50 [09:28<11:06, 23.80s/it]

Epoch 22/50 | Train Loss: 1.3257, Train Acc: 0.5380 | Val Loss: 1.1949, Val Acc: 0.5810


Training model:  46%|████▌     | 23/50 [09:52<10:43, 23.83s/it]

Epoch 23/50 | Train Loss: 1.3155, Train Acc: 0.5414 | Val Loss: 1.2141, Val Acc: 0.5752


Training model:  48%|████▊     | 24/50 [10:16<10:19, 23.83s/it]

Epoch 24/50 | Train Loss: 1.3114, Train Acc: 0.5457 | Val Loss: 1.1590, Val Acc: 0.5810


Training model:  50%|█████     | 25/50 [10:40<09:56, 23.84s/it]

Epoch 25/50 | Train Loss: 1.2845, Train Acc: 0.5563 | Val Loss: 1.1612, Val Acc: 0.5975


Training model:  52%|█████▏    | 26/50 [11:06<09:49, 24.55s/it]

Epoch 26/50 | Train Loss: 1.2799, Train Acc: 0.5519 | Val Loss: 1.1641, Val Acc: 0.5744


Training model:  54%|█████▍    | 27/50 [11:33<09:46, 25.51s/it]

Epoch 27/50 | Train Loss: 1.2872, Train Acc: 0.5509 | Val Loss: 1.1344, Val Acc: 0.6124


Training model:  56%|█████▌    | 28/50 [12:01<09:34, 26.12s/it]

Epoch 28/50 | Train Loss: 1.2543, Train Acc: 0.5610 | Val Loss: 1.1201, Val Acc: 0.6025


Training model:  58%|█████▊    | 29/50 [12:28<09:16, 26.51s/it]

Epoch 29/50 | Train Loss: 1.2568, Train Acc: 0.5624 | Val Loss: 1.1450, Val Acc: 0.5942


Training model:  60%|██████    | 30/50 [12:56<08:55, 26.77s/it]

Epoch 30/50 | Train Loss: 1.2568, Train Acc: 0.5622 | Val Loss: 1.0285, Val Acc: 0.6388


Training model:  62%|██████▏   | 31/50 [13:23<08:32, 26.95s/it]

Epoch 31/50 | Train Loss: 1.2295, Train Acc: 0.5715 | Val Loss: 1.0269, Val Acc: 0.6207


Training model:  64%|██████▍   | 32/50 [13:51<08:07, 27.07s/it]

Epoch 32/50 | Train Loss: 1.2351, Train Acc: 0.5713 | Val Loss: 1.1317, Val Acc: 0.6050


Training model:  66%|██████▌   | 33/50 [14:18<07:41, 27.14s/it]

Epoch 33/50 | Train Loss: 1.2296, Train Acc: 0.5729 | Val Loss: 1.1539, Val Acc: 0.5909


Training model:  68%|██████▊   | 34/50 [14:45<07:15, 27.19s/it]

Epoch 34/50 | Train Loss: 1.2137, Train Acc: 0.5815 | Val Loss: 1.0480, Val Acc: 0.6289


Training model:  70%|███████   | 35/50 [15:12<06:47, 27.19s/it]

Epoch 35/50 | Train Loss: 1.2414, Train Acc: 0.5725 | Val Loss: 1.1352, Val Acc: 0.6165


Training model:  72%|███████▏  | 36/50 [15:40<06:21, 27.22s/it]

Epoch 36/50 | Train Loss: 1.2143, Train Acc: 0.5777 | Val Loss: 1.0695, Val Acc: 0.6207


Training model:  74%|███████▍  | 37/50 [16:07<05:54, 27.25s/it]

Epoch 37/50 | Train Loss: 1.2032, Train Acc: 0.5807 | Val Loss: 1.1034, Val Acc: 0.6025


Training model:  76%|███████▌  | 38/50 [16:34<05:27, 27.26s/it]

Epoch 38/50 | Train Loss: 1.2072, Train Acc: 0.5803 | Val Loss: 1.0340, Val Acc: 0.6289


Training model:  78%|███████▊  | 39/50 [17:01<04:59, 27.25s/it]

Epoch 39/50 | Train Loss: 1.1981, Train Acc: 0.5879 | Val Loss: 1.0546, Val Acc: 0.6207


Training model:  80%|████████  | 40/50 [17:29<04:32, 27.24s/it]

Epoch 40/50 | Train Loss: 1.1936, Train Acc: 0.5865 | Val Loss: 1.0529, Val Acc: 0.6223


Training model:  82%|████████▏ | 41/50 [17:56<04:05, 27.23s/it]

Epoch 41/50 | Train Loss: 1.1983, Train Acc: 0.5861 | Val Loss: 1.0641, Val Acc: 0.6107


Training model:  84%|████████▍ | 42/50 [18:23<03:37, 27.22s/it]

Epoch 42/50 | Train Loss: 1.1785, Train Acc: 0.5936 | Val Loss: 1.1106, Val Acc: 0.6149


Training model:  86%|████████▌ | 43/50 [18:50<03:10, 27.22s/it]

Epoch 43/50 | Train Loss: 1.1762, Train Acc: 0.5967 | Val Loss: 0.9739, Val Acc: 0.6512


Training model:  88%|████████▊ | 44/50 [19:17<02:43, 27.21s/it]

Epoch 44/50 | Train Loss: 1.1535, Train Acc: 0.6032 | Val Loss: 1.0319, Val Acc: 0.6215


Training model:  90%|█████████ | 45/50 [19:45<02:15, 27.20s/it]

Epoch 45/50 | Train Loss: 1.1601, Train Acc: 0.5965 | Val Loss: 1.0567, Val Acc: 0.6397


Training model:  92%|█████████▏| 46/50 [20:12<01:48, 27.22s/it]

Epoch 46/50 | Train Loss: 1.1459, Train Acc: 0.6039 | Val Loss: 1.0654, Val Acc: 0.6347


Training model:  94%|█████████▍| 47/50 [20:39<01:21, 27.21s/it]

Epoch 47/50 | Train Loss: 1.1497, Train Acc: 0.6005 | Val Loss: 1.0036, Val Acc: 0.6347


Training model:  96%|█████████▌| 48/50 [21:06<00:54, 27.21s/it]

Epoch 48/50 | Train Loss: 1.1432, Train Acc: 0.6047 | Val Loss: 1.0679, Val Acc: 0.6281


Training model:  98%|█████████▊| 49/50 [21:34<00:27, 27.20s/it]

Epoch 49/50 | Train Loss: 1.1470, Train Acc: 0.6060 | Val Loss: 0.9951, Val Acc: 0.6628


Training model: 100%|██████████| 50/50 [22:01<00:00, 26.42s/it]

Epoch 50/50 | Train Loss: 1.1330, Train Acc: 0.6055 | Val Loss: 0.9942, Val Acc: 0.6579





### Grad CAM Implementation
 -- On hold
 https://medium.com/@codetrade/grad-cam-in-pytorch-a-powerful-tool-for-visualize-explanations-from-deep-networks-bdc7caf0b282

## Single image predictions

In [1]:
class SingleImageInput():
    def __init__(self, target_size=(256, 256)):
        self.target_size = target_size
        self.transform = transforms.Compose([
            transforms.ToPILImage(),
            transforms.Resize(target_size),
            transforms.ToTensor(),
            transforms.Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225]
            ),
        ])
    def read_image(self, path) -> torch.Tensor:
        print(path)
        img = cv2.imread(path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = self.transform(img)
        return img

NameError: name 'torch' is not defined

In [5]:
ran_img_sample = pick_random(ANIMALS_DATAFRAME, ANIMALS_DATAFRAME.label == "cane")
print(ran_img_sample)
ImageReader = SingleImageInput(target_size=(256, 256))
img = ImageReader.read_image(ran_img_sample).to(device)
img.unsqueeze_(0)
print(ran_img_sample)

NameError: name 'ANIMALS_DATAFRAME' is not defined

In [None]:
img_class_model.eval()

target_layer = img_class_model.conv3 ## last layer

activations = []
gradients = []

def forward_hook(module, input, output):
    activations.append(output)
def backward_hook(module, grad_input, grad_output):
    gradients.append(grad_output[0])

f_handle = target_layer.register_forward_hook(forward_hook)
b_handle = target_layer.register_full_backward_hook(backward_hook)


img.requires_grad_(True)


try:
    img_class_model.zero_grad()
    out = img_class_model(img)
    predicted_l = out.argmax(dim=1).item()

    score = out[0, predicted_l]
    score.backward()
finally:
    f_handle.remove()
    b_handle.remove()

print(len(gradients))

weights = torch.mean(gradients[0], dim=[2, 3])

heatmap = torch.sum(weights * activations[0], dim=1).squeeze()
heatmap = np.maximum(heatmap.cpu().detach().numpy(), 0)
heatmap /= np.max(heatmap)  # normalization

image = cv2.imread(ran_img_sample)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

heatmap = cv2.resize(heatmap, (image.shape[1], image.shape[0]))
heatmap = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET)
superimposed_img = cv2.addWeighted(image, 0.6, heatmap, 0.4, 0)

cv2.imshow('Grad-CAM', superimposed_img)
cv2