https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html#inputs

In [2]:
from __future__ import print_function 
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.7.0+cu101
Torchvision Version:  0.8.1+cu101


In [3]:
# Top level data directory. Here we assume the format of the directory conforms 
#   to the ImageFolder structure
data_dir = "/content/drive/MyDrive/data/intel_images_small2/"

model_name = "resnet"

# Number of classes in the dataset
num_classes = 6

# Batch size for training (change depending on how much memory you have)
batch_size = 8

# Number of epochs to train for 
num_epochs = 50
# Flag for feature extracting. When False, we finetune the whole model, 
#   when True we only update the reshaped layer params
feature_extract = True

In [4]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False):
    since = time.time()

    val_acc_history = []
    train_acc_history = []
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    # for early stopping
    c = 0 # add counter
    es = 10 # early stopping after n

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    # Special case for inception because in training it has an auxiliary output. In train
                    #   mode we calculate the loss by summing the final output and the auxiliary output
                    #   but in testing we only consider the final output.
                    if is_inception and phase == 'train':
                        # From https://discuss.pytorch.org/t/how-to-optimize-inception-model-with-auxiliary-classifiers/7958
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # deep copy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                c = 0 # reset es counter
            elif phase == 'val' and epoch_acc <= best_acc:
                c += 1 # add 1 to counter, no improvement

            if phase == 'val':
                val_acc_history.append(epoch_acc)

            if phase == 'train':
                train_acc_history.append(epoch_acc)
        if c == es:
          print(f'early stopping at epoch: {epoch}')
          break

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model, val_acc_history

In [5]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [6]:
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Initialize these variables which will be set in this if statement. Each of these
    #   variables is model specific.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet18
        """
        model_ft = models.resnet18(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 224

    else:
        print("Invalid model name, exiting...")
        exit()
    
    return model_ft, input_size

# Initialize the model for this run
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)

# Print the model we just instantiated
print(model_ft)

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [7]:

# Parameters to change for the experiments

momentum = 0.9 # 0 or 0.9
weight_decay = 0.1 # 0 or 0.1

neg_slope=0.01
leaky_relu = nn.LeakyReLU(neg_slope, inplace = True)
sigmoid = nn.Sigmoid()
elu = nn.ELU(alpha = 1.0, inplace = True)
relu = model_ft.relu

# change this to the desired activation function
act_func = leaky_relu # for the experiments relu and leaky relu were used


model_ft.relu = act_func

model_ft.layer1[0].relu = act_func
model_ft.layer1[1].relu = act_func

model_ft.layer2[0].relu = act_func
model_ft.layer2[1].relu = act_func

model_ft.layer3[0].relu = act_func
model_ft.layer3[1].relu = act_func

model_ft.layer4[0].relu = act_func
model_ft.layer4[1].relu = act_func

print(model_ft)




ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): LeakyReLU(negative_slope=0.01, inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): LeakyReLU(negative_slope=0.01, inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_run

In [8]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")

# Create training and validation datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

Initializing Datasets and Dataloaders...
cuda:0


In [9]:
# Send the model to GPU
model_ft = model_ft.to(device)

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are 
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t",name)
else:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=momentum, weight_decay=weight_decay)


#optimizer_ft = optim.RMSprop(params_to_update, lr=0.001, momentum = 0)



Params to learn:
	 fc.weight
	 fc.bias


In [10]:
# Setup the loss fxn
criterion = nn.CrossEntropyLoss()

# Train and evaluate
model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs, is_inception=(model_name=="inception"))

Epoch 0/49
----------
train Loss: 1.1651 Acc: 0.6005
val Loss: 0.6145 Acc: 0.8144

Epoch 1/49
----------
train Loss: 0.7846 Acc: 0.7525
val Loss: 0.4660 Acc: 0.8433

Epoch 2/49
----------
train Loss: 0.6900 Acc: 0.7730
val Loss: 0.4071 Acc: 0.8722

Epoch 3/49
----------
train Loss: 0.6567 Acc: 0.7788
val Loss: 0.3837 Acc: 0.8722

Epoch 4/49
----------
train Loss: 0.6298 Acc: 0.7892
val Loss: 0.3717 Acc: 0.8711

Epoch 5/49
----------
train Loss: 0.6172 Acc: 0.7945
val Loss: 0.3560 Acc: 0.8756

Epoch 6/49
----------
train Loss: 0.6115 Acc: 0.7875
val Loss: 0.3478 Acc: 0.8878

Epoch 7/49
----------
train Loss: 0.5968 Acc: 0.7958
val Loss: 0.3425 Acc: 0.8789

Epoch 8/49
----------
train Loss: 0.5873 Acc: 0.7997
val Loss: 0.3523 Acc: 0.8778

Epoch 9/49
----------
train Loss: 0.5889 Acc: 0.7965
val Loss: 0.3437 Acc: 0.8867

Epoch 10/49
----------
train Loss: 0.5739 Acc: 0.8090
val Loss: 0.3330 Acc: 0.8789

Epoch 11/49
----------
train Loss: 0.5911 Acc: 0.8007
val Loss: 0.3384 Acc: 0.8822

Ep

In [11]:
# Initialize the non-pretrained version of the model used for this run
scratch_model,_ = initialize_model(model_name, num_classes, feature_extract=False, use_pretrained=False)
scratch_model = scratch_model.to(device)
scratch_optimizer = optim.SGD(scratch_model.parameters(), lr=0.001, momentum=momentum, weight_decay=weight_decay)

print(scratch_model)
scratch_model.relu = act_func

scratch_model.layer1[0].relu = act_func
scratch_model.layer1[1].relu = act_func

scratch_model.layer2[0].relu = act_func
scratch_model.layer2[1].relu = act_func

scratch_model.layer3[0].relu = act_func
scratch_model.layer3[1].relu = act_func

scratch_model.layer4[0].relu = act_func
scratch_model.layer4[1].relu = act_func

print(scratch_model)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [12]:
scratch_criterion = nn.CrossEntropyLoss()
_,scratch_hist = train_model(scratch_model, dataloaders_dict, scratch_criterion, scratch_optimizer, num_epochs=num_epochs*2, is_inception=(model_name=="inception"))


Epoch 0/99
----------
train Loss: 1.4543 Acc: 0.4013
val Loss: 1.1672 Acc: 0.5667

Epoch 1/99
----------
train Loss: 1.2661 Acc: 0.4820
val Loss: 1.0870 Acc: 0.5767

Epoch 2/99
----------
train Loss: 1.1915 Acc: 0.5277
val Loss: 0.9819 Acc: 0.6400

Epoch 3/99
----------
train Loss: 1.1533 Acc: 0.5510
val Loss: 0.9453 Acc: 0.6422

Epoch 4/99
----------
train Loss: 1.0851 Acc: 0.5787
val Loss: 0.8517 Acc: 0.6956

Epoch 5/99
----------
train Loss: 1.0535 Acc: 0.5977
val Loss: 0.7546 Acc: 0.7444

Epoch 6/99
----------
train Loss: 0.9982 Acc: 0.6262
val Loss: 0.7462 Acc: 0.7311

Epoch 7/99
----------
train Loss: 0.9671 Acc: 0.6370
val Loss: 0.7337 Acc: 0.7356

Epoch 8/99
----------
train Loss: 0.9418 Acc: 0.6482
val Loss: 0.6682 Acc: 0.7722

Epoch 9/99
----------
train Loss: 0.9452 Acc: 0.6543
val Loss: 0.6548 Acc: 0.7856

Epoch 10/99
----------
train Loss: 0.9103 Acc: 0.6605
val Loss: 0.6571 Acc: 0.7644

Epoch 11/99
----------
train Loss: 0.8972 Acc: 0.6690
val Loss: 0.6538 Acc: 0.7711

Ep