# Прокопчук Роман, ШІ-2

Варіант 11
- Задача 1: Зображення для знаходження відстані: KR2/ picture / . Зображення з відомою
відстанню picture_1.jpg. Знаходження кутів: LOG, Знаходження ключових точок: SURF
- Задача 2: Натренувати класифікатор на класах: `plate`, `elephant`, `mouse`

# Задача 2

In [1]:
from google.colab import drive
GOOGLE_DRIVE_ROOT = '/content/drive'
drive.mount(GOOGLE_DRIVE_ROOT)

Mounted at /content/drive


In [2]:
import os

ROOT_PATH_TO_DATA = os.path.join(GOOGLE_DRIVE_ROOT, "MyDrive", "KNU_CV_CW_2")
TASK_2_PATH_TO_DATA = os.path.join(ROOT_PATH_TO_DATA, "task_2")

In [7]:
SUB_CLASSES = [
    'plate',
    'elephant',
    'mouse'
]

In [8]:
CIFAR100_ROOT = os.path.join(TASK_2_PATH_TO_DATA, "cifar100")

In [9]:
import torch
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision import transforms

class Cifar100Subset(Dataset):
    def __init__(self, sub_classes, root, train=True, transform=None, download=False):
        self.dataset = datasets.CIFAR100(root=root, train=train, transform=transform, download=download)
        self.sub_classes = sub_classes
        self.original_sub_classes_indices = [self.dataset.class_to_idx[name] for name in self.sub_classes]
        self.target_indices = [i for i, target in enumerate(self.dataset.targets) if target in self.original_sub_classes_indices]
        self.label_mapping = {orig_idx: new_idx for new_idx, orig_idx in enumerate(self.original_sub_classes_indices)}

    def __len__(self):
        return len(self.target_indices)

    def __getitem__(self, index):
        index = self.target_indices[index]
        image, original_label = self.dataset[index]
        new_label = self.label_mapping[original_label]

        return image, new_label

train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.507, 0.487, 0.441), (0.267, 0.256, 0.276))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.507, 0.487, 0.441), (0.267, 0.256, 0.276))
])

cifar100_train = Cifar100Subset(
    sub_classes=SUB_CLASSES,
    root=CIFAR100_ROOT,
    train=True,
    transform=train_transform,
    download=True
)

cifar100_test = Cifar100Subset(
    sub_classes=SUB_CLASSES,
    root=CIFAR100_ROOT,
    train=False,
    transform=test_transform,
    download=True
)

In [10]:
import torch.nn as nn

class Cifar100CnnClassifier(nn.Module):
    def __init__(self, n_classes):
        super(Cifar100CnnClassifier, self).__init__()

        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )

        self.flatten = nn.Flatten()
        self.fc = nn.Sequential(
            nn.Linear(256 * 4 * 4, 1024),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, n_classes)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.flatten(x)
        x = self.fc(x)

        return x

In [11]:
BATCH_SIZE=64

In [12]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(
    dataset=cifar100_train,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=2
)

test_dataloader = DataLoader(
    dataset=cifar100_test,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=2
)

In [28]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {DEVICE}")

Using device: cuda


In [29]:
def train(model, loader, criterion, optimizer, device=DEVICE):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(loader)
    acc = 100 * correct / total

    return avg_loss, acc

In [30]:
def evaluate(model, loader, criterion, device=DEVICE):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(loader)
    acc = 100 * correct / total

    return avg_loss, acc

In [31]:
TASK_2_MODEL_OUTPUT = os.path.join(TASK_2_PATH_TO_DATA, "model")
os.makedirs(TASK_2_MODEL_OUTPUT, exist_ok=True)

In [27]:
LEARNING_RATE = 1e-3
N_EPOCHS = 40

In [32]:
import time
import torch.optim as optim

model = Cifar100CnnClassifier(n_classes=len(SUB_CLASSES)).to(DEVICE)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

start_time = time.time()
best_acc = 0.0

for epoch in range(N_EPOCHS):
    epoch_start_time = time.time()
    print(f"--------------Epoch {epoch + 1}/{N_EPOCHS} started--------------")
    train_loss, train_acc = train(model, train_dataloader, criterion, optimizer)
    test_loss, test_acc = evaluate(model, test_dataloader, criterion)

    print(
        f"Epoch {epoch + 1}/{N_EPOCHS}",
        f"Train loss: {train_loss:.4f} | Train accuracy: {train_acc:.2f}%",
        f"Test loss: {test_loss:.4f} | Test accuracy: {test_acc:.2f}%",
        f"Time elapsed: {time.time() - epoch_start_time:.2f} sec",
        sep="\n"
    )

    if test_acc > best_acc:
        best_acc = test_acc
        torch.save(model.state_dict(), os.path.join(TASK_2_MODEL_OUTPUT, "best_model.pth"))

print(f"Training time: {time.time() - start_time:.2f} sec")

--------------Epoch 1/40 started--------------
Epoch 1/40
Train loss: 1.1934 | Train accuracy: 47.73%
Test loss: 0.9404 | Test accuracy: 51.33%
Time elapsed: 1.29 sec
--------------Epoch 2/40 started--------------
Epoch 2/40
Train loss: 0.8328 | Train accuracy: 59.87%
Test loss: 1.1587 | Test accuracy: 46.67%
Time elapsed: 1.65 sec
--------------Epoch 3/40 started--------------
Epoch 3/40
Train loss: 0.7382 | Train accuracy: 68.53%
Test loss: 0.8905 | Test accuracy: 66.67%
Time elapsed: 1.90 sec
--------------Epoch 4/40 started--------------
Epoch 4/40
Train loss: 0.6716 | Train accuracy: 71.87%
Test loss: 0.6748 | Test accuracy: 67.67%
Time elapsed: 0.84 sec
--------------Epoch 5/40 started--------------
Epoch 5/40
Train loss: 0.6554 | Train accuracy: 70.27%
Test loss: 0.6993 | Test accuracy: 72.00%
Time elapsed: 0.80 sec
--------------Epoch 6/40 started--------------
Epoch 6/40
Train loss: 0.6175 | Train accuracy: 73.53%
Test loss: 0.5699 | Test accuracy: 77.67%
Time elapsed: 0.83 se

In [48]:
def load_model(model, path):
    model.load_state_dict(torch.load(path))

    return model

model = load_model(model, os.path.join(TASK_2_MODEL_OUTPUT, "best_model.pth")).to(DEVICE)
model.eval()

final_test_dataloader = torch.utils.data.DataLoader(cifar100_test, batch_size=BATCH_SIZE, shuffle=False)

correct = 0
total = 0

with torch.no_grad():
    for data in final_test_dataloader:
        images, labels = data
        images, labels = images.to(DEVICE), labels.to(DEVICE)

        output = model(images)
        _, predicted = torch.max(output.data, 1)

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

print(f'Accuracy of the network on the test images: {100 * correct / total:.2f}%')

Accuracy of the network on the test images: 92.33%
