In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import timm  # Import timm library

In [2]:
# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Data loading and preprocessing
transform = transforms.Compose([
    transforms.Resize(224),  # Resize the images to 224x224 for ViT
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


In [3]:
trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=64, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


In [4]:
# Define the Vision Transformer model
model = timm.create_model('vit_small_patch16_224', pretrained=False, num_classes=10)
model = model.to(device)

# Loss and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Training the model
for epoch in range(10):  # Loop over the dataset multiple times
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % 20 == 19:  # Print every 200 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 200:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,    20] loss: 0.213
[1,    40] loss: 0.215
[1,    60] loss: 0.213
[1,    80] loss: 0.212
[1,   100] loss: 0.210
[1,   120] loss: 0.209
[1,   140] loss: 0.210
[1,   160] loss: 0.205
[1,   180] loss: 0.209
[1,   200] loss: 0.207
[1,   220] loss: 0.208
[1,   240] loss: 0.209
[1,   260] loss: 0.210
[1,   280] loss: 0.208
[1,   300] loss: 0.204
[1,   320] loss: 0.206
[1,   340] loss: 0.203
[1,   360] loss: 0.205
[1,   380] loss: 0.204
[1,   400] loss: 0.204
[1,   420] loss: 0.203
[1,   440] loss: 0.202
[1,   460] loss: 0.201
[1,   480] loss: 0.202
[1,   500] loss: 0.197
[1,   520] loss: 0.204
[1,   540] loss: 0.204
[1,   560] loss: 0.194
[1,   580] loss: 0.200
[1,   600] loss: 0.199
[1,   620] loss: 0.204
[1,   640] loss: 0.198
[1,   660] loss: 0.194
[1,   680] loss: 0.197
[1,   700] loss: 0.199
[1,   720] loss: 0.202
[1,   740] loss: 0.202
[1,   760] loss: 0.193
[1,   780] loss: 0.194
[2,    20] loss: 0.201
[2,    40] loss: 0.202
[2,    60] loss: 0.196
[2,    80] loss: 0.200
[2,   100] 

[10,   120] loss: 0.203
[10,   140] loss: 0.202
[10,   160] loss: 0.205
[10,   180] loss: 0.203
[10,   200] loss: 0.204
[10,   220] loss: 0.198
[10,   240] loss: 0.202
[10,   260] loss: 0.204
[10,   280] loss: 0.204
[10,   300] loss: 0.202
[10,   320] loss: 0.203
[10,   340] loss: 0.206
[10,   360] loss: 0.208
[10,   380] loss: 0.200
[10,   400] loss: 0.204
[10,   420] loss: 0.204
[10,   440] loss: 0.203
[10,   460] loss: 0.200
[10,   480] loss: 0.205
[10,   500] loss: 0.204
[10,   520] loss: 0.203
[10,   540] loss: 0.200
[10,   560] loss: 0.201
[10,   580] loss: 0.201
[10,   600] loss: 0.203
[10,   620] loss: 0.203
[10,   640] loss: 0.201
[10,   660] loss: 0.201
[10,   680] loss: 0.206
[10,   700] loss: 0.203
[10,   720] loss: 0.204
[10,   740] loss: 0.199


In [None]:
# Evaluating the model
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the network on the 10000 test images: {100 * correct / total:.2f} %')