# Ucitavanje podataka

U ovoj svesci bavićemo se, takođe, zadatkom klasifikacije MNIST slika. U pristupu ćemo koristiti konvolutivne neuronske mreže.



In [19]:
import numpy as np
from matplotlib import pyplot as plt
import pandas as pd
import seaborn as sns

from tqdm import tqdm

from sklearn.model_selection import train_test_split

In [None]:
from sklearn.metrics import confusion_matrix, accuracy_score, precision_recall_fscore_support

import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data.sampler import SubsetRandomSampler


In [21]:
def get_device():
  return torch.device("cuda" if torch.cuda.is_available() else "cpu")

def bind_gpu(data):
  device = get_device()
  if isinstance(data, (list, tuple)):
    return [bind_gpu(data_elem) for data_elem in data]
  else:
    return data.to(device, non_blocking=True)

In [22]:
data_dir = '~/datasets/'
norm_mean = 0
norm_stddev = 1
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((norm_mean,), (norm_stddev,))
])

valid_size = 0.2
shuffle = True
num_workers = 8
pin_memory = False
RANDOM_STATE = 1389
BATCH_SIZE = 2048

In [23]:
train_dataset = datasets.MNIST(root=data_dir, train=True, download=True, transform=train_transform)
valid_dataset = datasets.MNIST(root=data_dir, train=True, download=True, transform=train_transform)
test_dataset = datasets.MNIST(root=data_dir, train=False, download=True, transform=train_transform)

num_train = len(train_dataset)
indices = list(range(num_train))
split = int(np.floor(valid_size * num_train))

if shuffle == True:
  np.random.seed(RANDOM_STATE)
  np.random.shuffle(indices)

train_idx, valid_idx = indices[split:], indices[:split]

train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, sampler=train_sampler,
                                           num_workers=num_workers, pin_memory=pin_memory)
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=BATCH_SIZE, sampler=valid_sampler,
                                           num_workers=num_workers, pin_memory=pin_memory)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, sampler=None,
                                           num_workers=num_workers, pin_memory=pin_memory)

# Dizajn Konvolutivnog modela


Naš model će se sastojati iz više konvolutivnih i agregacionih slojeva.

Konvolutivni slojevi se na nivou biblioteke kreiraju korišćenjem funkcije `Conv2D`.  Prvo se navodi broj filtera (parametar `filters`, obično neimenovan), zatim veličine kernela (`kernel_size` parametar), veličina pomeraja (parametar `strides`) i uokvirenje (`padding` parametar). <img src='assets/convolution_operation.gif' style='height: 300px'>

Na primer, na slici je prikazan kernel veličine 3x3 (kvadratić sive boje) koji prolazi kroz ulaz (kvadratić plave boje) sa horizontalnim i vertikalnim pomerajem veličine 2. Dodati beli skup kvadrata predstavlja uokvirenje i u zavisnosti od njegovog prisustva veličina izlazne slike (kvadratić zelene boje) može biti istih dimenzija (u Keras biblioteci se ovo naglašava vrednošću `same` parametra `padding`) ili nešto manja (u Keras biblioteci se ovo naglašava vrednošću `valid` parametra `padding`).

Agregacioni slojevi (`MaxPooling2D` i `AvgPooling2D`) vrše redukciju slojeva svođenjem blokova zadatih večina na njihove maksimalne ili prosečne vrednosti. Veličina bloka zadaje se parametrom `pool_size`. <img src='assets/pooling.png' style='width: 300px'>

Prilikom treniranja koristićemo i `Dropout` tehniku regularizacije. Podsetimo se da na ovaj način isključujemo nasumično odabrane neurone, omogućavamo drugačiji protok podataka kroz mrežu i smanjujemo zavisnosti izlaza od ulaza. <img src='assets/dropout.png'> Da bi se primenila ova tehnika regularizacije, dovoljno je mreži dodati `Dropout` sloj. Njegov parametar je verovatnoća isključivanja pojedinačnih neurona.

Sloj ispravljanja, sloj `Flatten`, se korist za transformisanje matrica vrednosti u vektore i obično se koristi kao priprema za prelazak sa konvolutivnog dela na dalji gusti deo mreže.

In [24]:
import torch.nn.functional as F

In [25]:
class MNISTConvClassifier(nn.Module):
    def __init__(self, number_of_classes):
        super(MNISTConvClassifier, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(in_features=9216, out_features=128)
        self.fc2 = nn.Linear(in_features=128, out_features=number_of_classes)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(input=x, kernel_size=2)
        x = self.dropout1(x)

        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        logits = self.fc2(x)

        probs = F.softmax(x, dim=1)

        return probs, logits

# Trening Modela

In [26]:
number_of_classes = 10
model = MNISTConvClassifier(number_of_classes)
bind_gpu(model)
model

MNISTConvClassifier(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (dropout1): Dropout(p=0.25, inplace=False)
  (dropout2): Dropout(p=0.5, inplace=False)
  (fc1): Linear(in_features=9216, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [27]:
def count_parameters(model):
  total_params = sum(p.numel() for p in model.parameters())

  # Iterate through the layers and print their details
  for name, layer in model.named_children():
        num_params = sum(p.numel() for p in layer.parameters())
        print(f"Layer: {name}, Parameters: {num_params}")

  return total_params

In [17]:
count_parameters(model)

Layer: conv1, Parameters: 320
Layer: conv2, Parameters: 18496
Layer: dropout1, Parameters: 0
Layer: dropout2, Parameters: 0
Layer: fc1, Parameters: 1179776
Layer: fc2, Parameters: 1290


1199882

In [28]:
N_EPOCHS = 30
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters())

metrics = {
    'train_loss': [],
    'train_accuracy': [],
    'train_steps': [],
    'val_loss': [],
    'val_accuracy': [],
    'val_steps': [],
}

device = get_device()
training_step = 0
pbar = tqdm(total=N_EPOCHS, desc="Training Progress")
pbar.set_postfix({"loss": -1, "accuracy": -1})
for epoch in range(N_EPOCHS):
    model.train()
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        probs, logits = model(inputs)

        loss = nn.functional.cross_entropy(logits, labels)
        predicted = torch.argmax(probs, dim=1)
        correct = (predicted == labels).sum().item()
        accuracy = correct / labels.size(0)
        metrics['train_loss'].append(loss.item())
        metrics['train_accuracy'].append(accuracy)
        metrics['train_steps'].append(training_step)
        training_step += 1
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    model.eval()
    with torch.no_grad():
        val_loss = 0
        val_correct = 0
        val_samples = 0
        for inputs, labels in valid_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            probs, logits = model(inputs)

            loss = nn.functional.cross_entropy(logits, labels, reduction='sum')
            val_loss += loss.item()

            predicted = torch.argmax(probs, dim=1)
            val_correct += (predicted == labels).sum().item()
            val_samples += labels.size(0)

        val_loss /= val_samples
        val_accuracy = val_correct / val_samples
        metrics['val_loss'].append(val_loss)
        metrics['val_accuracy'].append(val_accuracy)
        metrics['val_steps'].append(training_step)

    pbar.set_postfix({"loss": val_loss, "accuracy": val_accuracy})
    pbar.update(1)


Training Progress:  10%|█         | 3/30 [02:09<19:23, 43.08s/it, loss=0.147, accuracy=0.00167]Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x77432fd3e310>>
Traceback (most recent call last):
  File "/home/ognjen/miniconda3/envs/ai/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 794, in _clean_thread_parent_frames
    for identity in list(thread_to_parent_header.keys()):
                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
KeyboardInterrupt: 


KeyboardInterrupt: 

# Evaluacija modela

In [None]:
plt.figure(figsize=(15, 5))
plt.subplot(1, 2, 1)
plt.plot(metrics['train_steps'], metrics['train_loss'], label='Train Loss')
plt.plot(metrics['val_steps'], metrics['val_loss'], label='Validation Loss')
plt.xlabel('Training Steps')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(metrics['train_steps'], metrics['train_accuracy'], label='Train Accuracy')
plt.plot(metrics['val_steps'], metrics['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Training Steps')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
model.eval()
total_loss = 0.0
total_correct = 0
total_samples = 0
predicted_labels, true_labels = [], []
device = get_device()

with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        probs, logits = model(inputs)
        total_loss += nn.functional.cross_entropy(logits, labels).item()

        predicted = torch.argmax(probs, dim=1)
        
        predicted_labels.extend(predicted.squeeze().tolist())
        true_labels.extend(labels.tolist())

        total_samples += labels.size(0)
        total_correct += (predicted.squeeze() == labels).sum().item()

# compute metrics
accuracy = accuracy_score(true_labels, predicted_labels)
precision, recall, f1_score, _ = precision_recall_fscore_support(true_labels, predicted_labels, average='weighted')
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1_score:.4f}")

# plot
cm = confusion_matrix(true_labels, predicted_labels)
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Actual')
plt.ylabel('Predicted')

avg_loss = total_loss / len(test_loader)
accuracy = total_correct / total_samples
print(f'Train Set: Loss: {avg_loss:.4f}, Accuracy: {accuracy:.4f}')