# YOLOv11 finetuning
Training run ~5min on Google Colab T5

In [None]:
!wget https://github.com/ultralytics/assets/releases/download/v8.3.0/yolo11n-cls.pt

In [None]:
!pip install ultralytics

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from ultralytics import YOLO

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define transforms for CIFAR-10
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                      download=True, transform=transform)
subset_indices = torch.randperm(len(trainset))[:20000]  # i have negative patience
trainset = Subset(trainset, subset_indices)

trainloader = DataLoader(trainset, batch_size=1024, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                     download=True, transform=transform)
subset_indices = torch.randperm(len(testset))[:500]
testset = Subset(testset, subset_indices)
testloader = DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# Load YOLOv11 model
model = YOLO('yolo11n-cls.pt')
yolo_model = model.model

yolo_model.train()

# Freeze all layers except the last one
for param in yolo_model.parameters():
    param.requires_grad = False

yolo_model.model[10].linear = nn.Linear(1280, 10)

yolo_model = yolo_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(yolo_model.model[10].linear.parameters(), lr=0.002, betas=(0.9, 0.999))

num_epochs = 5

def train():
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = yolo_model(inputs)
            loss = criterion(outputs, labels)

            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i%10 == 9:
                print(f'[{epoch + 1}, {i + 1}] loss: {running_loss / 10:.3f}')
                running_loss = 0.0
        evaluate()

def evaluate():
    correct = 0
    total = 0
    yolo_model.eval()
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = yolo_model(images)
            _, predicted = torch.max(outputs[0].data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    yolo_model.train()
    print(f'Accuracy on test images: {100 * correct / total}%')

if __name__ == "__main__":
    evaluate()
    train()

Files already downloaded and verified
Files already downloaded and verified
Accuracy on test images: 8.2%
[1, 10] loss: 1.524
[1, 20] loss: 0.823
Accuracy on test images: 78.2%
[2, 10] loss: 0.676
[2, 20] loss: 0.608
Accuracy on test images: 79.2%
[3, 10] loss: 0.559
[3, 20] loss: 0.548
Accuracy on test images: 82.4%
[4, 10] loss: 0.516
[4, 20] loss: 0.494
Accuracy on test images: 82.6%
[5, 10] loss: 0.476
[5, 20] loss: 0.475
Accuracy on test images: 83.8%
Finished Training
Accuracy on test images: 83.8%


In [2]:
torch.save(yolo_model.state_dict(), 'yolov11_cifar10.pth')

In [3]:
model = YOLO('yolo11n-cls.pt')
yolo_model = model.model
yolo_model.model[10].linear = nn.Linear(1280, 10)
yolo_model.load_state_dict(torch.load('yolov11_cifar10.pth', map_location=device))
yolo_model = yolo_model.to(device)
yolo_model.eval()
evaluate()

Accuracy on test images: 83.8%
