In [1]:
%load_ext autoreload
%autoreload 2

import torch
import torch.nn as nn
from torchvision.models import resnet18
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import torch.nn.functional as F
import copy
import torch.optim as optim
import os, pathlib

base_path = pathlib.Path("/home/mpuscian/Desktop/repozytoria/MINI_projects/anvil/models/")
model_path = base_path.joinpath("cifar_model2.pth")
adaround_model_path = base_path.joinpath("adaround_model.pth")


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = resnet18(weights=None)
model.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False)
model.maxpool = nn.Identity()
model.fc = nn.Linear(512, 10)

# Model without quantization

In [11]:
print(f"Using device: {device}")

# 2. Transforms
transform_train = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
])

# 3. Datasets
full_train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)

train_size = int(0.9 * len(full_train_dataset))  # 45,000
val_size = len(full_train_dataset) - train_size  # 5,000
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

# 4. DataLoaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=6)
val_dataset.dataset.transform = transform_test
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False, num_workers=6)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=6)

# 5. Model
cifar_model = copy.deepcopy(model)
cifar_model = cifar_model.to(device)

# 6. Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cifar_model.parameters(), lr=0.0001, weight_decay=5e-4)

# 7. Training with validation
best_val_acc = 0.0

for epoch in range(20):
    cifar_model.train()
    train_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = cifar_model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()

    # Validation
    cifar_model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for inputs, targets in val_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = cifar_model(inputs)
            _, predicted = outputs.max(1)
            val_correct += predicted.eq(targets).sum().item()
            val_total += targets.size(0)

    val_acc = 100.0 * val_correct / val_total
    print(f"[{epoch+1}/20] Loss: {train_loss/len(train_loader):.4f} | Val Accuracy: {val_acc:.2f}%")

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(cifar_model.state_dict(), model_path)
        print(f"✅ Nowy najlepszy model zapisany ({val_acc:.2f}%)")

# 8. Test best model
cifar_model.load_state_dict(torch.load(model_path))
cifar_model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = cifar_model(inputs)
        _, predicted = outputs.max(1)
        correct += predicted.eq(targets).sum().item()
        total += targets.size(0)

accuracy = 100 * correct / total
print(f"🎯 Dokładność na zbiorze testowym: {accuracy:.2f}%")
print(f"📦 Model zapisany do: {model_path}")

Using device: cuda
[1/20] Loss: 1.3887 | Val Accuracy: 55.14%
✅ Nowy najlepszy model zapisany (55.14%)
[2/20] Loss: 0.9243 | Val Accuracy: 64.82%
✅ Nowy najlepszy model zapisany (64.82%)
[3/20] Loss: 0.6023 | Val Accuracy: 59.34%
[4/20] Loss: 0.2969 | Val Accuracy: 66.20%
✅ Nowy najlepszy model zapisany (66.20%)
[5/20] Loss: 0.1183 | Val Accuracy: 63.60%
[6/20] Loss: 0.0643 | Val Accuracy: 64.16%
[7/20] Loss: 0.0503 | Val Accuracy: 63.00%
[8/20] Loss: 0.0639 | Val Accuracy: 62.18%
[9/20] Loss: 0.0859 | Val Accuracy: 65.62%
[10/20] Loss: 0.0624 | Val Accuracy: 65.60%
[11/20] Loss: 0.0401 | Val Accuracy: 66.40%
✅ Nowy najlepszy model zapisany (66.40%)
[12/20] Loss: 0.0393 | Val Accuracy: 65.02%
[13/20] Loss: 0.0578 | Val Accuracy: 64.48%
[14/20] Loss: 0.0470 | Val Accuracy: 64.72%
[15/20] Loss: 0.0475 | Val Accuracy: 60.56%
[16/20] Loss: 0.0433 | Val Accuracy: 63.92%
[17/20] Loss: 0.0362 | Val Accuracy: 64.06%
[18/20] Loss: 0.0483 | Val Accuracy: 66.40%
[19/20] Loss: 0.0458 | Val Accurac

# Adaround

In [2]:
import anvil
import anvil.adaround

adaround_model = copy.deepcopy(model)
adaround_model.load_state_dict(torch.load(model_path, map_location=device))

# Stwórz wejście testowe (np. batch 1 obrazka)
sample_input = torch.randn(1, 3, 32, 32)

# Kwantyzuj
adarounder = anvil.adaround.AdaRoundModelWrapper(adaround_model, sample_input)
adarounder.apply_adaround_to_conv_layers()
adarounder.save_model(adaround_model_path)

[AdaRound] Processing layer: conv1
final h_alpha: tensor([[[[0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000]],

         [[0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000]],

         [[0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000]]],


        [[[0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000]],

         [[0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000]],

         [[0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000]]],


        [[[0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000]],

         [[0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5000]],

         [[0.5000, 0.5000, 0.5000],
          [0.5000, 0.5000, 0.5

KeyboardInterrupt: 