In [18]:
import time
import os

import numpy as np
import torch
import torch.optim as optim
from torch import nn
from torch.autograd import Variable

import torchvision
import torchvision.models as models
import torch.utils.model_zoo as model_zoo
import torchvision.transforms as transforms
from torchvision import datasets

from itertools import accumulate
from functools import reduce

In [19]:
model_urls = {
    'alexnet': 'https://download.pytorch.org/models/alexnet-owt-4df8aa71.pth',
    'densenet121': 'https://download.pytorch.org/models/densenet121-241335ed.pth',
    'inception_v3': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
    'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',   
}

In [20]:
model_names = model_urls.keys()

In [21]:
input_sizes = {
    'alexnet' : (224,224),
    'densenet': (224,224),
    'resnet' : (224,224),
    'inception' : (299,299),
    'squeezenet' : (224,224),#not 255,255 acc. to https://github.com/pytorch/pytorch/issues/1120
    'vgg' : (224,224)
}

In [22]:
models_to_test = ['alexnet','densenet121', 'inception_v3', 'resnet50', 'vgg16']

In [23]:
batch_size = 20
use_gpu = torch.cuda.is_available()
device = 'cuda' if use_gpu else 'cpu'

In [24]:
from torch.hub import load_state_dict_from_url

def diff_states(state_dict1, state_dict2):
    """Helper function to compare model state dicts."""
    diff = []
    for key in state_dict1:
        if key in state_dict2 and not torch.equal(state_dict1[key], state_dict2[key]):
            diff.append((key, state_dict2[key]))
    return diff

In [25]:
import torch
from torch.hub import load_state_dict_from_url
from torchvision import models

# Define URLs for the pretrained models (example: ResNet, VGG, DenseNet, etc.)
model_urls = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-f37072fd.pth',
    'vgg16': 'https://download.pytorch.org/models/vgg16-397923af.pth',
    'densenet121': 'https://download.pytorch.org/models/densenet121-a639ec97.pth',
    'inception_v3': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth'
}

def load_defined_model(name, num_classes, device='cpu'):
    model = models.__dict__[name](pretrained=False)  # Load the base model architecture

    # Adjust the final classifier layer according to the architecture type
    if name.startswith('resnet'):
        model.fc = torch.nn.Linear(model.fc.in_features, num_classes)
    elif name.startswith('vgg'):
        model.classifier[6] = torch.nn.Linear(model.classifier[6].in_features, num_classes)
    elif name.startswith('densenet'):
        model.classifier = torch.nn.Linear(model.classifier.in_features, num_classes)
    elif name == 'inception_v3':
        model.fc = torch.nn.Linear(model.fc.in_features, num_classes)

    # Move the model to the specified device
    model = model.to(device)

    # Load pretrained weights if the URL is available
    if name in model_urls:
        try:
            pretrained_state = load_state_dict_from_url(model_urls[name], map_location=device)
            print(f"Pretrained weights loaded successfully for {name}")
        except Exception as e:
            print(f"Error loading pretrained weights for {name}: {e}")
            pretrained_state = None
    else:
        print(f"No pretrained model URL found for {name}")
        pretrained_state = None

    # If pretrained weights are loaded, update the model's state dict with matching parameters
    if pretrained_state:
        model_state_dict = model.state_dict()
        diff = []
        for key in pretrained_state:
            if key in model_state_dict and pretrained_state[key].shape == model_state_dict[key].shape:
                model_state_dict[key] = pretrained_state[key]
            else:
                diff.append((key, pretrained_state[key]))

        # Load the updated state dict into the model
        model.load_state_dict(model_state_dict)
    else:
        diff = []  # No pretrained state, nothing to compare

    return model, diff


In [26]:
def print_model_params(net):
    print("Model parameters:")
    for name, param in net.named_parameters():
        print(name)


In [27]:
def filtered_params(net, param_list=None):
    def in_param_list(s):
        for p in param_list:
            if s.endswith(p) or s.endswith(f".module.{p}"):
                return True
        return False

    params = net.named_parameters() if param_list is None \
        else (p for p in net.named_parameters() if in_param_list(p[0]))
    
    params = list(params)  # Convert generator to a list for debugging
    
    if len(params) == 0:
        raise ValueError("No parameters matched the provided param_list.")

    filtered_params = [p for p in params if p[1].requires_grad]
    
    if len(filtered_params) == 0:
        raise ValueError("No parameters with requires_grad=True were found to optimize.")
    
    return filtered_params


In [28]:
from torch.utils.data import random_split, DataLoader
from torchvision import datasets, transforms

def load_data(resize, val_split=0.2):
    data_transforms = transforms.Compose([
        transforms.RandomResizedCrop(max(resize)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])

    data_dir = '../plantvillage dataset/color'
    full_dataset = datasets.ImageFolder(data_dir, transform=data_transforms)

    # Split the dataset into training and validation sets
    val_size = int(len(full_dataset) * val_split)
    train_size = len(full_dataset) - val_size
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

    trainloader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)
    valloader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)

    return trainloader, valloader


In [29]:
def train(net, trainloader, param_list=None, epochs=2):
    criterion = nn.CrossEntropyLoss()
    if use_gpu:
        criterion = criterion.cuda()

    params = list(filtered_params(net, param_list))
    print("Filtered parameters:", params)

    if len(params) == 0:
        raise ValueError("No parameters to optimize were found.")

    if param_list:
        for p_fixed in (p for p in net.named_parameters() if not any(p_name in p[0] for p_name in param_list)):
            p_fixed[1].requires_grad = False            

    optimizer = optim.SGD((p[1] for p in params), lr=0.001, momentum=0.9)
    print("Optimizer parameters:", list((p[1] for p in params)))

    losses = []
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            if use_gpu:
                inputs, labels = Variable(inputs.cuda()), Variable(labels.cuda())
            else:
                inputs, labels = Variable(inputs), Variable(labels)

            optimizer.zero_grad()

            outputs = net(inputs)
            
            loss = None
            if isinstance(outputs, tuple):
                loss = sum((criterion(o, labels) for o in outputs))
            else:
                loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.data.item()
            if i % 30 == 29:
                avg_loss = running_loss / 30
                losses.append(avg_loss)
                print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, avg_loss))
                running_loss = 0.0

    print('Finished Training')
    return losses


In [30]:
def train_stats(m, trainloader, param_list=None):
    stats = {}
    params = filtered_params(m, param_list)
    
    counts = 0, 0
    for counts in enumerate(accumulate((reduce(lambda d1, d2: d1 * d2, p[1].size()) for p in params))):
        pass
    stats['variables_optimized'] = counts[0] + 1
    stats['params_optimized'] = counts[1]
    
    before = time.time()
    losses = train(m, trainloader, param_list=param_list)
    stats['training_time'] = time.time() - before

    stats['training_loss'] = losses[-1] if len(losses) else float('nan')
    stats['training_losses'] = losses
    
    return stats


In [31]:
def evaluate_model_performance(network, test_loader):
    evaluation_stats = {}
    correct_predictions = 0
    total_samples = 0
    
    start_time = time.time()
    for index, batch in enumerate(test_loader, 0):
        images, labels = batch

        if use_gpu:
            images, labels = images.cuda(), labels.cuda()

        outputs = network(Variable(images))
        _, predicted_labels = torch.max(outputs.data, 1)
        total_samples += labels.size(0)
        correct_predictions += (predicted_labels == labels).sum()
    
    accuracy = correct_predictions / total_samples
    evaluation_stats['accuracy'] = accuracy
    evaluation_stats['evaluation_time'] = time.time() - start_time
    
    print('Accuracy on test images: %f' % accuracy)
    return evaluation_stats


In [32]:
import torch
from torch import nn, optim
import csv

def train_eval(network, training_loader, test_loader, param_list=None):
    print("Training..." if not param_list else "Retraining...")
    stats_train = train_stats(network, training_loader, param_list=param_list)
    
    print("Evaluating...")
    network = network.eval()
    stats_eval = evaluate_model_performance(network, test_loader)
    
    return {**stats_train, **stats_eval}

stats = []
num_classes = 39
print("RETRAINING")

for name in models_to_test:
    print("")
    print(f"Targeting {name} with {num_classes} classes")
    print("------------------------------------------")
    model_pretrained, diff = load_defined_model(name, num_classes, device='cuda' if use_gpu else 'cpu')
    final_params = [p[0] for p in diff]
    
    resize = [s[1] for s in input_sizes.items() if s[0] in name][0]
    print(f"Resizing input images to max of {resize}")
    trainloader, testloader = load_data(resize)
    
    if use_gpu:
        print("Transferring models to GPU(s)")
        model_pretrained = torch.nn.DataParallel(model_pretrained).cuda()
        
    pretrained_stats = train_eval(model_pretrained, trainloader, testloader, final_params)
    pretrained_stats['name'] = name
    pretrained_stats['retrained'] = True
    pretrained_stats['shallow_retrain'] = True
    stats.append(pretrained_stats)
    
    print("")

print("---------------------")
print("TRAINING from scratch")
for name in models_to_test:
    print("")    
    print(f"Targeting {name} with {num_classes} classes")
    print("------------------------------------------")
    model_blank = models.__dict__[name](pretrained=False)

    if name == 'inception_v3':
        model_blank.aux_logits = False
    
    resize = [s[1] for s in input_sizes.items() if s[0] in name][0]
    print(f"Resizing input images to max of {resize}")
    trainloader, testloader = load_data(resize)
    
    if use_gpu:
        print("Transferring models to GPU(s)")
        model_blank = torch.nn.DataParallel(model_blank).cuda()    
        
    blank_stats = train_eval(model_blank, trainloader, testloader)
    blank_stats['name'] = name
    blank_stats['retrained'] = False
    blank_stats['shallow_retrain'] = False
    stats.append(blank_stats)
    
    print("")

t = 0.0
for s in stats:
    t += s.get('evaluation_time', 0) + s.get('training_time', 0)
print(f"Total time for training and evaluation: {t}")
print("FINISHED")

print("RETRAINING deep")

for name in models_to_test:
    print("")
    print(f"Targeting {name} with {num_classes} classes")
    print("------------------------------------------")
    model_pretrained, diff = load_defined_model(name, num_classes, device='cuda' if use_gpu else 'cpu')
    
    resize = [s[1] for s in input_sizes.items() if s[0] in name][0]
    print(f"Resizing input images to max of {resize}")
    trainloader, testloader = load_data(resize)
    
    if use_gpu:
        print("Transferring models to GPU(s)")
        model_pretrained = torch.nn.DataParallel(model_pretrained).cuda()
        
    pretrained_stats = train_eval(model_pretrained, trainloader, testloader, None)
    pretrained_stats['name'] = name
    pretrained_stats['retrained'] = True
    pretrained_stats['shallow_retrain'] = False
    stats.append(pretrained_stats)
    
    print("")

# Export stats as .csv
with open('stats.csv', 'w') as csvfile:
    fieldnames = stats[0].keys() if stats else []
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    writer.writeheader()
    for s in stats:
        writer.writerow(s)


RETRAINING

Targeting alexnet with 39 classes
------------------------------------------




No pretrained model URL found for alexnet
Resizing input images to max of (224, 224)
Transferring models to GPU(s)
Training...


ValueError: No parameters matched the provided param_list.

In [33]:
print("Final params to retrain:", final_params)


Final params to retrain: []


In [34]:
print("Diff:", diff)


Diff: []
