In [1]:
import time
import os

import numpy as np
import torch
import torch.optim as optim
from torch import nn
from torch.autograd import Variable

import torchvision
import torchvision.models as models
import torch.utils.model_zoo as model_zoo
import torchvision.transforms as transforms
from torchvision import datasets

from itertools import accumulate
from functools import reduce


In [2]:
model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
    'densenet121': 'https://download.pytorch.org/models/densenet121-241335ed.pth',
    'densenet169': 'https://download.pytorch.org/models/densenet169-6f0f7f60.pth',
    'densenet201': 'https://download.pytorch.org/models/densenet201-4c113574.pth',
    'densenet161': 'https://download.pytorch.org/models/densenet161-17b70270.pth',
    'inception_v3': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
    'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth',
    'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth',
    'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth',
    'squeezenet1_0': 'https://download.pytorch.org/models/squeezenet1_0-a815701f.pth',
    'squeezenet1_1': 'https://download.pytorch.org/models/squeezenet1_1-f364aa15.pth',
    'vgg11': 'https://download.pytorch.org/models/vgg11-bbd30ac9.pth',
    'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'vgg19': 'https://download.pytorch.org/models/vgg19-dcbb9e9d.pth',
}


In [3]:
# model_names = model_urls.keys()

input_sizes = {
    'alexnet' : (224,224),
    'resnet' : (224,224),
    'inception' : (299,299),
    'vgg' : (224,224)
}

models_to_test = ['alexnet',  'inception_v3', \
                  'resnet34', 'vgg13']
# models_to_test = ['vgg13']

batch_size = 30
use_gpu = torch.cuda.is_available()

In [4]:
# import torch
# import torchvision.models as models
# from torch.utils import model_zoo
# import torchvision

# def diff_states(dict_canonical, dict_subset):
#     names1, names2 = list(dict_canonical.keys()), list(dict_subset.keys())

#     not_in_1 = [n for n in names1 if n not in names2]
#     not_in_2 = [n for n in names2 if n not in names1]

#     if not_in_1:
#         print(f"Parameters in canonical dict but not in subset dict: {not_in_1}")
#     if not_in_2:
#         print(f"Parameters in subset dict but not in canonical dict: {not_in_2}")

#     # Only return differences for matching parameters
#     for name in set(names1).intersection(names2):
#         v1, v2 = dict_canonical[name], dict_subset[name]
#         if v1.size() != v2.size():
#             yield (name, v1)

In [None]:
import torch
import torchvision.models as models
from torch.utils import model_zoo

def load_defined_model(name, num_classes, device='cuda'):
    model = models.__dict__[name](pretrained=False, num_classes=num_classes)
    

    if name == 'densenet121':
        model.classifier = torch.nn.Linear(model.classifier.in_features, num_classes)
    elif name == 'inception_v3':
        model.aux_logits = False
        model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
        if 'AuxLogits' in model.__dict__:
            model.AuxLogits.fc = torch.nn.Linear(model.AuxLogits.fc.in_features, num_classes)
    elif name == 'alexnet':
        model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, num_classes)
    elif name == 'vgg13':
        model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, num_classes)
    elif name == 'resnet34':
        model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
    
    model_urls = {
        'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
        'densenet121': 'https://download.pytorch.org/models/densenet121-241335ed.pth',
        'inception_v3': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
        'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth',
        'vgg13': 'https://download.pytorch.org/models/vgg13-c768596a.pth',
    }
    if name in model_urls:
        pretrained_state = model_zoo.load_url(model_urls[name])
    else:
        raise ValueError(f"No URL found for model '{name}'")
    
    if name in ['inception_v3', 'alexnet', 'vgg13', 'resnet34']:
        # if name == 'densenet121':
        #     pretrained_state.pop('classifier.weight', None)
        #     pretrained_state.pop('classifier.bias', None)
        if name == 'inception_v3':
            pretrained_state.pop('fc.weight', None)
            pretrained_state.pop('fc.bias', None)
            if 'AuxLogits.fc.weight' in pretrained_state:
                pretrained_state.pop('AuxLogits.fc.weight', None)
            if 'AuxLogits.fc.bias' in pretrained_state:
                pretrained_state.pop('AuxLogits.fc.bias', None)
        elif name == 'alexnet':
            pretrained_state.pop('classifier.6.weight', None)
            pretrained_state.pop('classifier.6.bias', None)
        elif name == 'vgg13':
            pretrained_state.pop('classifier.6.weight', None)
            pretrained_state.pop('classifier.6.bias', None)
        elif name == 'resnet34':
            pretrained_state.pop('fc.weight', None)
            pretrained_state.pop('fc.bias', None)

    diff = [s for s in diff_states(model.state_dict(), pretrained_state)]
    if diff:
        print(f"Replacing the following state from initialized {name}: {[d[0] for d in diff]}")

    model.load_state_dict(pretrained_state, strict=False)
    
    model.to(device)
    return model, diff

def diff_states(dict_canonical, dict_subset):
    names1, names2 = list(dict_canonical.keys()), list(dict_subset.keys())

    not_in_1 = [n for n in names1 if n not in names2]
    not_in_2 = [n for n in names2 if n not in names1]

    if not_in_1:
        print(f"Parameters in canonical dict but not in subset dict: {not_in_1}")
    if not_in_2:
        print(f"Parameters in subset dict but not in canonical dict: {not_in_2}")

    for name in set(names1).intersection(names2):
        v1, v2 = dict_canonical[name], dict_subset[name]
        if v1.size() != v2.size():
            yield (name, v1)


In [None]:
def filtered_params(net, param_list=None):
    def in_param_list(s):
        for p in param_list:
            if s.endswith(p):
                return True
        return False    
    params = net.named_parameters() if param_list is None \
    else (p for p in net.named_parameters() if in_param_list(p[0]))
    return params

In [None]:
from torch.utils.data import random_split, DataLoader
from torchvision import datasets, transforms

def load_data(resize, val_split=0.2):
    data_transforms = transforms.Compose([
        transforms.RandomResizedCrop(max(resize)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    data_dir = './plantvillage dataset/color'
    full_dataset = datasets.ImageFolder(data_dir, transform=data_transforms)

    val_size = int(len(full_dataset) * val_split)
    train_size = len(full_dataset) - val_size
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

    trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
    valloader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

    return trainloader, valloader


In [None]:
import torch
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm 

def train(net, trainloader, param_list=None, epochs=1, use_gpu=True):
    net = net.train()

    device = torch.device("cuda" if use_gpu and torch.cuda.is_available() else "cpu")
    net.to(device)

    if param_list:
        params = [p for n, p in net.named_parameters() if n in param_list and p.requires_grad]
        if not params:
            raise ValueError("No valid parameters found in param_list for optimization.")
    else:
        params = [p for p in net.parameters() if p.requires_grad]

    if not params:
        raise ValueError("Optimizer got an empty parameter list: no parameters to optimize.")

    optimizer = optim.SGD(params, lr=0.001, momentum=0.9)
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)  

    losses = []
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(tqdm(trainloader), 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = net(inputs)
            loss = torch.nn.functional.cross_entropy(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:
                print(f"[Epoch {epoch + 1}, Batch {i + 1}] loss: {running_loss / 100:.3f}")
                running_loss = 0.0

        scheduler.step()

        losses.append(running_loss / len(trainloader))

    print("Finished Training")
    return losses


In [9]:
def train_stats(m, trainloader, param_list = None):
    stats = {}
    params = filtered_params(m, param_list)    
    counts = 0,0
    for counts in enumerate(accumulate((reduce(lambda d1,d2: d1*d2, p[1].size()) for p in params)) ):
        pass
    stats['variables_optimized'] = counts[0] + 1
    stats['params_optimized'] = counts[1]
    
    before = time.time()
    losses = train(m, trainloader, param_list=param_list)
    stats['training_time'] = time.time() - before

    stats['training_loss'] = losses[-1] if len(losses) else float('nan')
    stats['training_losses'] = losses
    
    return stats

In [10]:
def evaluate_stats(net, testloader):
    stats = {}
    correct = 0
    total = 0
    
    before = time.time()
    for i, data in enumerate(testloader, 0):
        images, labels = data

        if use_gpu:
            images, labels = (images.cuda()), (labels.cuda())

        outputs = net(Variable(images))
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum()
    accuracy = correct / total
    stats['accuracy'] = accuracy
    stats['eval_time'] = time.time() - before
    
    print('Accuracy on test images: %f' % accuracy)
    return stats

In [None]:
def train_eval(net, trainloader, testloader, param_list=None):
    print("Training..." if not param_list else "Retraining...")
    stats_train = train_stats(net, trainloader, param_list=param_list)
    
    print("Evaluating...")
    net = net.eval()
    stats_eval = evaluate_stats(net, testloader)
    
    return {**stats_train, **stats_eval}

stats = []
num_classes = 39
print("RETRAINING")

for name in models_to_test:
    print("")
    print("Targeting %s with %d classes" % (name, num_classes))
    print("------------------------------------------")
    model_pretrained, diff = load_defined_model(name, num_classes)
    final_params = [d[0] for d in diff]
    #final_params = None
    
    resize = [s[1] for s in input_sizes.items() if s[0] in name][0]
    print("Resizing input images to max of", resize)
    trainloader, testloader = load_data(resize)
    
    if use_gpu:
        print("Transfering models to GPU(s)")
        model_pretrained = torch.nn.DataParallel(model_pretrained).cuda()
        
    pretrained_stats = train_eval(model_pretrained, trainloader, testloader, final_params)
    pretrained_stats['name'] = name
    pretrained_stats['retrained'] = True
    pretrained_stats['shallow_retrain'] = True
    stats.append(pretrained_stats)
    
    print("")

print("---------------------")
print("TRAINING from scratch")
for name in models_to_test:
    print("")    
    print("Targeting %s with %d classes" % (name, num_classes))
    print("------------------------------------------")
    model_blank = models.__dict__[name](num_classes=num_classes)

    resize = [s[1] for s in input_sizes.items() if s[0] in name][0]
    print("Resizing input images to max of", resize)
    trainloader, testloader = load_data(resize)
    
    if use_gpu:
        print("Transfering models to GPU(s)")
        model_blank = torch.nn.DataParallel(model_blank).cuda()    
        
    blank_stats = train_eval(model_blank, trainloader, testloader)
    blank_stats['name'] = name
    blank_stats['retrained'] = False
    blank_stats['shallow_retrain'] = False
    stats.append(blank_stats)
    
    print("")

t = 0.0
for s in stats:
    t += s['eval_time'] + s['training_time']
print("Total time for training and evaluation", t)
print("FINISHED")

print("RETRAINING deep")

for name in models_to_test:
    print("")
    print("Targeting %s with %d classes" % (name, num_classes))
    print("------------------------------------------")
    model_pretrained, diff = load_defined_model(name, num_classes)
    
    resize = [s[1] for s in input_sizes.items() if s[0] in name][0]
    print("Resizing input images to max of", resize)
    trainloader, testloader = load_data(resize)
    
    if use_gpu:
        print("Transfering models to GPU(s)")
        model_pretrained = torch.nn.DataParallel(model_pretrained).cuda()
        
    pretrained_stats = train_eval(model_pretrained, trainloader, testloader, None)
    pretrained_stats['name'] = name
    pretrained_stats['retrained'] = True
    pretrained_stats['shallow_retrain'] = False
    stats.append(pretrained_stats)
    
    print("")

import csv
with open('stats.csv', 'w') as csvfile:
    fieldnames = stats[0].keys()
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for s in stats:
        writer.writerow(s)

RETRAINING

Targeting alexnet with 39 classes
------------------------------------------




Parameters in canonical dict but not in subset dict: ['classifier.6.weight', 'classifier.6.bias']


AssertionError: Torch not compiled with CUDA enabled

In [13]:
import torch
print(torch.__version__)

2.4.1+cpu
