In [None]:
pip install timm #needed when run in Google Colab

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.9.2-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from timm)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors (from timm)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m78.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: safetensors, huggingface-hub, timm
Successfully installed huggingface-hub-0.15.1 safetensors-0.3.1 timm-0.9.2


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
import timm
import random
import numpy as np

In [None]:
model_num = 5 # total number of models
total_epoch = 55 # total epoch
lr = 0.01 # initial learning rate

In [None]:
from google.colab import drive #mounting Google drive to save the models
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
for s in range(model_num):
    # fix random seed
    seed_number = s
    random.seed(seed_number)
    np.random.seed(seed_number)
    torch.manual_seed(seed_number)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    # Check if GPU is available
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(device)

    # Define the data transforms
    transform_train = transforms.Compose([
        transforms.Resize(256),
        transforms.RandomCrop(224),
        transforms.RandomHorizontalFlip(),
        # transforms.RandomVerticalFlip(),
        # transforms.RandomRotation(45),
        # transforms.GaussianBlur(kernel_size=(5, 9), sigma=(0.1, 5)),
        # transforms.ColorJitter(brightness=.5, contrast=.3),
        # transforms.RandomAffine(45),
        # transforms.RandomPerspective(0.5),
        transforms.AutoAugment(transforms.AutoAugmentPolicy.CIFAR10),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        transforms.RandomErasing()
    ])

    transform_test = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    # Load the CIFAR-10 dataset
    trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
    trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=16)

    testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
    testloader = torch.utils.data.DataLoader(testset, batch_size=100, shuffle=False, num_workers=16)

    # Define the ResNet-18 model with pre-trained weights
    model = timm.create_model('resnet18', pretrained=True, num_classes=10)
    model = model.to(device)  # Move the model to the GPU

    # Define the loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adamax(model.parameters(), lr=lr)
    # Define the learning rate scheduler
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)

    def train():
        model.train()
        running_loss = 0.0
        
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)  # Move the input data to the GPU
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
            if i % 100 == 99:
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
                running_loss = 0.0   
                
    def test():
        model.eval()
        
        # Test the model
        correct = 0
        total = 0
        with torch.no_grad():
            for data in testloader:
                images, labels = data
                images, labels = images.to(device), labels.to(device)  # Move the input data to the GPU
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the 10000 test images: %f %%' % (100 * correct / total))

    # Train the model
    for epoch in range(total_epoch):
        train()
        test()
        scheduler.step()

    print('Finished Training')

    # Save the checkpoint of the last model
    PATH = '/content/drive/My Drive/Colab Notebooks/resnet18_cifar10_%f_%d.pth' % (lr, seed_number)
    torch.save(model.state_dict(), PATH)

cuda
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:01<00:00, 91500693.39it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data




Files already downloaded and verified


Downloading model.safetensors:   0%|          | 0.00/46.8M [00:00<?, ?B/s]

[1,   100] loss: 1.341
[1,   200] loss: 0.877
[1,   300] loss: 0.733
Accuracy of the network on the 10000 test images: 86.550000 %
[2,   100] loss: 0.611
[2,   200] loss: 0.574
[2,   300] loss: 0.543
Accuracy of the network on the 10000 test images: 87.580000 %
[3,   100] loss: 0.489
[3,   200] loss: 0.486
[3,   300] loss: 0.483
Accuracy of the network on the 10000 test images: 90.170000 %
[4,   100] loss: 0.459
[4,   200] loss: 0.440
[4,   300] loss: 0.435
Accuracy of the network on the 10000 test images: 90.560000 %
[5,   100] loss: 0.409
[5,   200] loss: 0.404
[5,   300] loss: 0.406
Accuracy of the network on the 10000 test images: 91.640000 %
[6,   100] loss: 0.376
[6,   200] loss: 0.377
[6,   300] loss: 0.375
Accuracy of the network on the 10000 test images: 92.730000 %
[7,   100] loss: 0.344
[7,   200] loss: 0.359
[7,   300] loss: 0.355
Accuracy of the network on the 10000 test images: 90.820000 %
[8,   100] loss: 0.331
[8,   200] loss: 0.337
[8,   300] loss: 0.351
Accuracy of th