In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
from torchvision.models import resnet18
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F
import copy
import torch.optim as optim
import os, pathlib

base_path = pathlib.Path("/home/mpuscian/Desktop/repozytoria/MINI_projects/anvil/models/")
model_path = base_path.joinpath("cifar_model3.pth")


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


In [2]:


def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))



class ImageClassificationBase(nn.Module):
    def training_step(self, batch):
        images, labels = batch 
        out = self(images)                  # Generate predictions
        loss = F.cross_entropy(out, labels) # Calculate loss
        acc = accuracy(out, labels)  
        return loss,acc
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images)                    # Generate predictions
        loss = F.cross_entropy(out, labels)   # Calculate loss
        acc = accuracy(out, labels)           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.4f}, train_acc: {:.4f}, val_loss: {:.4f}, val_acc: {:.4f}, last_lr: {:.5f}".format(
            epoch+1, result['train_loss'], result['train_accuracy'], result['val_loss'], result['val_acc'], result['lrs'][-1]))



def conv_block(in_channels, out_channels, pool=False):
    layers = [nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1), 
              nn.BatchNorm2d(out_channels), 
              nn.ReLU(inplace=True)]
    if pool: layers.append(nn.MaxPool2d(2))
    return nn.Sequential(*layers)

class ResNet9(ImageClassificationBase):
    def __init__(self, in_channels, num_classes):
        super().__init__()
        
        self.conv1 = conv_block(in_channels, 64)
        self.conv2 = conv_block(64, 128, pool=True)
        self.res1 = nn.Sequential(conv_block(128, 128), conv_block(128, 128))
        
        self.conv3 = conv_block(128, 256, pool=True)
        self.conv4 = conv_block(256, 512, pool=True)
        self.res2 = nn.Sequential(conv_block(512, 512), conv_block(512, 512))
        
        self.classifier = nn.Sequential(nn.AdaptiveMaxPool2d((1,1)), 
                                        nn.Flatten(), 
                                        nn.Dropout(0.2),
                                        nn.Linear(512, num_classes))
        
    def forward(self, xb):
        out = self.conv1(xb)
        out = self.conv2(out)
        out = self.res1(out) + out
        out = self.conv3(out)
        out = self.conv4(out)
        out = self.res2(out) + out
        out = self.classifier(out)
        return out

model = ResNet9(3, 10)

# Model without quantization

In [3]:
print(f"Using device: {device}")

# 2. Transforms
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
])

# 3. Datasets
full_train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_size = int(0.9 * len(full_train_dataset))  # 45,000
val_size = len(full_train_dataset) - train_size  # 5,000
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

# 4. DataLoaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=6)
val_dataset.dataset.transform = transform_test
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=6)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=6)

# 5. Model
cifar_model = copy.deepcopy(model)
cifar_model = cifar_model.to(device)

# 6. Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cifar_model.parameters(), lr=0.0001, weight_decay=5e-4)

# 7. Training with validation
best_val_acc = 0.0

for epoch in range(20):
    cifar_model.train()
    train_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = cifar_model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Validation
    cifar_model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = cifar_model(inputs)
            _, predicted = outputs.max(1)
            val_correct += predicted.eq(targets).sum().item()
            val_total += targets.size(0)

    val_acc = 100.0 * val_correct / val_total
    print(f"[{epoch+1}/20] Loss: {train_loss/len(train_loader):.4f} | Val Accuracy: {val_acc:.2f}%")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(cifar_model.state_dict(), model_path)
        print(f"✅ Nowy najlepszy model zapisany ({val_acc:.2f}%)")

# 8. Test best model
cifar_model.load_state_dict(torch.load(model_path))
cifar_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = cifar_model(inputs)
        _, predicted = outputs.max(1)
        correct += predicted.eq(targets).sum().item()
        total += targets.size(0)

accuracy = 100 * correct / total
print(f"🎯 Dokładność na zbiorze testowym: {accuracy:.2f}%")
print(f"📦 Model zapisany do: {model_path}")

Using device: cuda
[1/20] Loss: 1.4100 | Val Accuracy: 66.24%
✅ Nowy najlepszy model zapisany (66.24%)
[2/20] Loss: 0.9040 | Val Accuracy: 71.20%
✅ Nowy najlepszy model zapisany (71.20%)
[3/20] Loss: 0.6724 | Val Accuracy: 73.18%
✅ Nowy najlepszy model zapisany (73.18%)
[4/20] Loss: 0.5247 | Val Accuracy: 75.92%
✅ Nowy najlepszy model zapisany (75.92%)
[5/20] Loss: 0.3949 | Val Accuracy: 71.10%
[6/20] Loss: 0.3007 | Val Accuracy: 78.94%
✅ Nowy najlepszy model zapisany (78.94%)
[7/20] Loss: 0.2119 | Val Accuracy: 73.86%
[8/20] Loss: 0.1537 | Val Accuracy: 80.76%
✅ Nowy najlepszy model zapisany (80.76%)
[9/20] Loss: 0.1086 | Val Accuracy: 78.04%
[10/20] Loss: 0.0817 | Val Accuracy: 80.56%
[11/20] Loss: 0.0691 | Val Accuracy: 76.72%
[12/20] Loss: 0.0681 | Val Accuracy: 74.96%
[13/20] Loss: 0.0570 | Val Accuracy: 79.72%
[14/20] Loss: 0.0538 | Val Accuracy: 76.54%
[15/20] Loss: 0.0550 | Val Accuracy: 78.98%
[16/20] Loss: 0.0500 | Val Accuracy: 75.56%
[17/20] Loss: 0.0449 | Val Accuracy: 74.