In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader

# Kontrola dostupnosti CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Transformace dat
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

transform_val = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# Dataset a DataLoader
train_dataset = datasets.CIFAR10(root='./data', train=True, transform=transform_train, download=True)
val_dataset = datasets.CIFAR10(root='./data', train=False, transform=transform_val, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False, num_workers=2)

# Definice modelů
small_model = models.resnet18(pretrained=False, num_classes=10).to(device)
large_model = models.resnet50(pretrained=False, num_classes=10).to(device)

# Funkce ztráty a optimalizátory
criterion = nn.CrossEntropyLoss()
optimizer_small = optim.Adam(small_model.parameters(), lr=0.001)
optimizer_large = optim.Adam(large_model.parameters(), lr=0.001)

# Trénink modelu
def train_model(model, optimizer, epochs, dataloader, name):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"{name} - Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(dataloader):.4f}")

# Testování modelu
def evaluate_model(model, dataloader, name):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            _, predicted = outputs.max(1)
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
    print(f"{name} Accuracy: {100 * correct / total:.2f}%")

# Generování "soft targets" z malého modelu
def generate_soft_targets(model, dataloader):
    model.eval()
    soft_targets = []
    with torch.no_grad():
        for inputs, _ in dataloader:
            inputs = inputs.to(device)
            outputs = torch.softmax(model(inputs), dim=1)
            soft_targets.append(outputs.cpu())
    return torch.cat(soft_targets, dim=0)

# Trénink modelu s distilací
def train_with_distillation(large_model, small_model, dataloader, optimizer, epochs, alpha=0.5):
    large_model.train()
    small_model.eval()
    for epoch in range(epochs):
        total_loss = 0
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()

            # Výstupy malého a velkého modelu
            with torch.no_grad():
                soft_targets = torch.softmax(small_model(inputs), dim=1)
            outputs = large_model(inputs)

            # Ztrátová funkce
            distillation_loss = torch.mean((torch.softmax(outputs, dim=1) - soft_targets)**2)
            classification_loss = criterion(outputs, targets)
            loss = alpha * distillation_loss + (1 - alpha) * classification_loss

            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f"Distillation - Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(dataloader):.4f}")

# Pipeline
print("Training small model...")
train_model(small_model, optimizer_small, epochs=20, dataloader=train_loader, name="Small Model")

print("Evaluating small model...")
evaluate_model(small_model, val_loader, name="Small Model")

print("Training large model with distillation...")
train_with_distillation(large_model, small_model, train_loader, optimizer_large, epochs=20)

print("Evaluating large model (with distillation)...")
evaluate_model(large_model, val_loader, name="Large Model (Distillation)")

# Trénink velkého modelu bez distilace
large_model_without_distillation = models.resnet50(pretrained=False, num_classes=10).to(device)
optimizer_large_without_distillation = optim.Adam(large_model_without_distillation.parameters(), lr=0.001)

print("Training large model without distillation...")
train_model(large_model_without_distillation, optimizer_large_without_distillation, epochs=20, dataloader=train_loader, name="Large Model (No Distillation)")

print("Evaluating large model (without distillation)...")
evaluate_model(large_model_without_distillation, val_loader, name="Large Model (No Distillation)")


Using device: cuda
Files already downloaded and verified
Files already downloaded and verified




Training small model...
Small Model - Epoch 1/20, Loss: 1.5388
Small Model - Epoch 2/20, Loss: 1.1813
Small Model - Epoch 3/20, Loss: 1.0212
Small Model - Epoch 4/20, Loss: 0.9223
Small Model - Epoch 5/20, Loss: 0.8462
Small Model - Epoch 6/20, Loss: 0.7902
Small Model - Epoch 7/20, Loss: 0.7430
Small Model - Epoch 8/20, Loss: 0.6997
Small Model - Epoch 9/20, Loss: 0.6756
Small Model - Epoch 10/20, Loss: 0.6397
Small Model - Epoch 11/20, Loss: 0.6103
Small Model - Epoch 12/20, Loss: 0.5959
Small Model - Epoch 13/20, Loss: 0.5736
Small Model - Epoch 14/20, Loss: 0.5493
Small Model - Epoch 15/20, Loss: 0.5249
Small Model - Epoch 16/20, Loss: 0.5107
Small Model - Epoch 17/20, Loss: 0.4962
Small Model - Epoch 18/20, Loss: 0.4911
Small Model - Epoch 19/20, Loss: 0.4654
Small Model - Epoch 20/20, Loss: 0.4545
Evaluating small model...
Small Model Accuracy: 81.94%
Training large model with distillation...
Distillation - Epoch 1/20, Loss: 1.0534
Distillation - Epoch 2/20, Loss: 0.9175
Distilla