<a href="https://colab.research.google.com/github/saltysallysmine/MIPT-CV-Homeworks/blob/main/CV_HW_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Computer Vision. Homework 1. Тренировочный цикл и linear probe на ViT-Tiny

Датасет: CIFAR-100

In [61]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset, random_split
from torch.utils.tensorboard import SummaryWriter
import numpy as np
import random

In [62]:
# SEEDS #
seed = 124
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [63]:
# CONSTANTS #
CIFAR_MEAN = (0.5071, 0.4867, 0.4408)
# CIFAR_STD = (0.2675, 0.2565, 0.2761)
CIFAR_STD = (0.2470, 0.2435, 0.2616)

LR = 1e-3

In [64]:
# 5 классов выбора
selected_classes = [0, 1, 2, 3, 4]

# Трансформации с аугментациями
train_transform = transforms.Compose([
    transforms.RandomResizedCrop(32),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
])
val_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(CIFAR_MEAN, CIFAR_STD),
])

In [65]:
# DATASET #

full_train = datasets.CIFAR100(root='./data', train=True, download=True, transform=train_transform)

# Фильтрация по выбранным классам
indices = [i for i, label in enumerate(full_train.targets) if label in selected_classes]
subset = Subset(full_train, indices)

train_size = int(0.8 * len(subset))
val_size = len(subset) - train_size
train_subset, val_subset = random_split(subset, [train_size, val_size])

val_subset.dataset.transform = val_transform

# Даталоадеры
train_loader = DataLoader(train_subset, batch_size=64, shuffle=True, num_workers=2)
val_loader = DataLoader(val_subset, batch_size=64, shuffle=False, num_workers=2)

In [66]:
print(train_size, val_size)

2000 500


In [67]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=5):
        super().__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),  # 32x32x32
            nn.LeakyReLU(),
            nn.MaxPool2d(2),                             # 32x16x16
            nn.Conv2d(32, 64, kernel_size=3, padding=1), # 64x16x16
            nn.LeakyReLU(),
            nn.MaxPool2d(2),                             # 64x8x8
            nn.Conv2d(64, 128, kernel_size=3, padding=1),# 128x8x8
            nn.LeakyReLU(),
            nn.MaxPool2d(2),                             # 128x4x4
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128*4*4, 256),
            nn.LeakyReLU(),
            nn.Linear(256, num_classes),
        )

    def forward(self, x):
        x = self.features(x)
        x = self.classifier(x)
        return x

In [74]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [75]:
def validate(model, epoch):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)

    model.eval()
    running_loss = 0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            running_loss += loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    acc = correct / total * 100.0
    avg_loss = running_loss / len(val_loader)
    print(f"Epoch {epoch}, Val loss: {avg_loss:.4f}, Accuracy: {acc:.2f}%")

    writer.add_scalar('Val/Loss', avg_loss, epoch)
    writer.add_scalar('Val/Accuracy', acc, epoch)

In [76]:
# TensorBoard логгер
writer = SummaryWriter('runs/cifar100_simple_cnn')

def sanity_check(model):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)

    model.train()
    for i, (images, labels) in enumerate(train_loader):
        if i > 35:
            break
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        print(f'Sanity check batch {i}, loss: {loss.item():.4f}')
        # validate(i)

model = SimpleCNN(num_classes=len(selected_classes)).to(device)
sanity_check(model)
validate(model, len(train_loader))

Sanity check batch 0, loss: 1.6123
Sanity check batch 1, loss: 1.5639
Sanity check batch 2, loss: 1.6870
Sanity check batch 3, loss: 1.5574
Sanity check batch 4, loss: 1.6079
Sanity check batch 5, loss: 1.5413
Sanity check batch 6, loss: 1.5277
Sanity check batch 7, loss: 1.4804
Sanity check batch 8, loss: 1.3927
Sanity check batch 9, loss: 1.5398
Sanity check batch 10, loss: 1.3808
Sanity check batch 11, loss: 1.4129
Sanity check batch 12, loss: 1.2783
Sanity check batch 13, loss: 1.2901
Sanity check batch 14, loss: 1.3628
Sanity check batch 15, loss: 1.3661
Sanity check batch 16, loss: 1.3550
Sanity check batch 17, loss: 1.2543
Sanity check batch 18, loss: 1.2968
Sanity check batch 19, loss: 1.4190
Sanity check batch 20, loss: 1.3337
Sanity check batch 21, loss: 1.3424
Sanity check batch 22, loss: 1.2490
Sanity check batch 23, loss: 1.3561
Sanity check batch 24, loss: 1.1820
Sanity check batch 25, loss: 1.1241
Sanity check batch 26, loss: 1.3438
Sanity check batch 27, loss: 1.2382
Sa

In [23]:
def train_epoch(model, epoch):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=LR)

    model.train()
    running_loss = 0
    correct = 0
    total = 0
    for batch_idx, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

        # Логируем гистограммы весов и градиентов для первых слоев
        for name, param in model.named_parameters():
            writer.add_histogram(name, param, epoch)
            if param.grad is not None:
                writer.add_histogram(name + '/grad', param.grad, epoch)

    acc = correct / total * 100.0
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch}, Train loss: {avg_loss:.4f}, Accuracy: {acc:.2f}%")

    writer.add_scalar('Train/Loss', avg_loss, epoch)
    writer.add_scalar('Train/Accuracy', acc, epoch)
    writer.add_scalar('Train/Learning_Rate', optimizer.param_groups[0]['lr'], epoch)

In [25]:
# MAIN CODE #

num_epochs = 10
for epoch in range(1, num_epochs + 1):
    train_epoch(model, epoch)
    validate(model, epoch)

writer.close()

Epoch 1, Train loss: 1.1228, Accuracy: 53.55%
Epoch 1, Val loss: 1.1290, Accuracy: 51.80%
Epoch 2, Train loss: 0.9875, Accuracy: 59.55%
Epoch 2, Val loss: 0.9597, Accuracy: 60.20%
Epoch 3, Train loss: 0.9300, Accuracy: 62.35%
Epoch 3, Val loss: 0.9265, Accuracy: 61.80%
Epoch 4, Train loss: 0.9156, Accuracy: 61.65%
Epoch 4, Val loss: 0.8714, Accuracy: 65.40%
Epoch 5, Train loss: 0.8760, Accuracy: 64.65%
Epoch 5, Val loss: 0.8973, Accuracy: 64.80%
Epoch 6, Train loss: 0.8499, Accuracy: 65.85%
Epoch 6, Val loss: 0.8590, Accuracy: 65.40%
Epoch 7, Train loss: 0.8352, Accuracy: 66.35%
Epoch 7, Val loss: 0.8476, Accuracy: 67.60%
Epoch 8, Train loss: 0.7921, Accuracy: 68.35%
Epoch 8, Val loss: 0.8784, Accuracy: 67.60%
Epoch 9, Train loss: 0.7701, Accuracy: 68.70%
Epoch 9, Val loss: 0.8763, Accuracy: 65.40%
Epoch 10, Train loss: 0.7700, Accuracy: 68.45%
Epoch 10, Val loss: 0.8390, Accuracy: 66.60%
