In [1]:
from intromlproject.utils.read_dataset import read_dataset, transform_dataset, get_data_loader

train_data, val_data, test_data = read_dataset("intromlproject/cub", transform_dataset())
train_loader, val_loader, test_loader = get_data_loader(train_data, val_data, test_data, batch_size=32)


  from .autonotebook import tqdm as notebook_tqdm


Read dataset successfully
Data loader successfully


In [2]:
dataset_sizes = {
    'train': len(train_loader.dataset),
    'val': len(val_loader.dataset),
    'test': len(test_loader.dataset)
}

dataloaders = {
    'train': train_loader,
    'val': val_loader,
    'test': test_loader
}

In [3]:
class_names = train_data.classes
print(class_names)
num_classes = len(class_names)
print(num_classes)

['001.Black_footed_Albatross', '002.Laysan_Albatross', '003.Sooty_Albatross', '004.Groove_billed_Ani', '005.Crested_Auklet', '006.Least_Auklet', '007.Parakeet_Auklet', '008.Rhinoceros_Auklet', '009.Brewer_Blackbird', '010.Red_winged_Blackbird', '011.Rusty_Blackbird', '012.Yellow_headed_Blackbird', '013.Bobolink', '014.Indigo_Bunting', '015.Lazuli_Bunting', '016.Painted_Bunting', '017.Cardinal', '018.Spotted_Catbird', '019.Gray_Catbird', '020.Yellow_breasted_Chat', '021.Eastern_Towhee', '022.Chuck_will_Widow', '023.Brandt_Cormorant', '024.Red_faced_Cormorant', '025.Pelagic_Cormorant', '026.Bronzed_Cowbird', '027.Shiny_Cowbird', '028.Brown_Creeper', '029.American_Crow', '030.Fish_Crow', '031.Black_billed_Cuckoo', '032.Mangrove_Cuckoo', '033.Yellow_billed_Cuckoo', '034.Gray_crowned_Rosy_Finch', '035.Purple_Finch', '036.Northern_Flicker', '037.Acadian_Flycatcher', '038.Great_Crested_Flycatcher', '039.Least_Flycatcher', '040.Olive_sided_Flycatcher', '041.Scissor_tailed_Flycatcher', '042.Ver

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
import timm
import copy

In [5]:
model_name = 'vit_base_patch16_224'
model = timm.create_model(model_name, pretrained=True)

In [17]:
for name, param in model.named_parameters():
    print(name, param.size())

cls_token torch.Size([1, 1, 768])
pos_embed torch.Size([1, 197, 768])
patch_embed.proj.weight torch.Size([768, 3, 16, 16])
patch_embed.proj.bias torch.Size([768])
blocks.0.norm1.weight torch.Size([768])
blocks.0.norm1.bias torch.Size([768])
blocks.0.attn.qkv.weight torch.Size([2304, 768])
blocks.0.attn.qkv.bias torch.Size([2304])
blocks.0.attn.proj.weight torch.Size([768, 768])
blocks.0.attn.proj.bias torch.Size([768])
blocks.0.norm2.weight torch.Size([768])
blocks.0.norm2.bias torch.Size([768])
blocks.0.mlp.fc1.weight torch.Size([3072, 768])
blocks.0.mlp.fc1.bias torch.Size([3072])
blocks.0.mlp.fc2.weight torch.Size([768, 3072])
blocks.0.mlp.fc2.bias torch.Size([768])
blocks.1.norm1.weight torch.Size([768])
blocks.1.norm1.bias torch.Size([768])
blocks.1.attn.qkv.weight torch.Size([2304, 768])
blocks.1.attn.qkv.bias torch.Size([2304])
blocks.1.attn.proj.weight torch.Size([768, 768])
blocks.1.attn.proj.bias torch.Size([768])
blocks.1.norm2.weight torch.Size([768])
blocks.1.norm2.bias to

In [7]:
for param in model.parameters():
    param.requires_grad = False

TypeError: object of type 'generator' has no len()

In [8]:
model.head = nn.Linear(model.head.in_features, num_classes)
for param in model.head.parameters():
    param.requires_grad = True

In [19]:
total_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
frozen_params = total_params - trainable_params

print(f'Total parameters: {total_params}')
print(f'Trainable parameters: {trainable_params}')
print(f'Frozen parameters: {frozen_params}')

Total parameters: 85952456
Trainable parameters: 153800
Frozen parameters: 85798656


In [9]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-4)

In [10]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [11]:
num_epochs = 10

In [12]:
def train_model(model, criterion, optimizer, num_epochs=10):
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  
            else:
                model.eval()   

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    print(f'Best val Acc: {best_acc:4f}')

    model.load_state_dict(best_model_wts)
    return model


In [13]:
model_ft = train_model(model, criterion, optimizer, num_epochs=num_epochs)

Epoch 0/4
----------
train Loss: 4.4347 Acc: 0.1623
val Loss: 3.3686 Acc: 0.3994

Epoch 1/4
----------
train Loss: 2.6611 Acc: 0.5383
val Loss: 2.1796 Acc: 0.6221

Epoch 2/4
----------
train Loss: 1.7800 Acc: 0.6999
val Loss: 1.6333 Acc: 0.7139

Epoch 3/4
----------
train Loss: 1.3316 Acc: 0.7819
val Loss: 1.3315 Acc: 0.7468

Epoch 4/4
----------
train Loss: 1.0711 Acc: 0.8224
val Loss: 1.1530 Acc: 0.7697

Best val Acc: 0.769653


In [14]:
def evaluate_model(model, dataloader, criterion):
    model.eval()  
    running_loss = 0.0
    running_corrects = 0

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs = inputs.to(device)
            labels = labels.to(device)

            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

    total_loss = running_loss / dataset_sizes['test']
    total_acc = running_corrects.double() / dataset_sizes['test']

    print(f'Test Loss: {total_loss:.4f} Acc: {total_acc:.4f}')

    return total_loss, total_acc

In [15]:
test_loss, test_acc = evaluate_model(model_ft, dataloaders['test'], criterion)

Test Loss: 1.1378 Acc: 0.7717


In [16]:
torch.save(model_ft.state_dict(), 'fine_tuned_vit_1lf_5e.pth')