In [16]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
import numpy as np
import torchvision
from torchvision import datasets, transforms
from torchvision.models import inception_v3, Inception_V3_Weights
import matplotlib.pyplot as plt
import time
import os
import copy


In [17]:
device = torch.device("cuda:0" if torch.cuda.is_available else "cpu")

print(f'PyTorch Version: {torch.__version__}')
print(f'Torchvision Version: {torchvision.__version__}')
print(f'Device: {device}')

PyTorch Version: 1.13.1+cu117
Torchvision Version: 0.14.1+cu117
Device: cuda:0


In [18]:
data_dir = "./data/hymenoptera_data"
model_name = "inception"
num_classes = 2
batch_size = 8
epochs = 15
feature_extract = True

In [19]:
def load_datasets_and_dataloaders(input_size):
    data_transforms = {
        'train': transforms.Compose([
            transforms.RandomResizedCrop(input_size),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
            transforms.Resize(input_size),
            transforms.CenterCrop(input_size),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }

    image_datasets = {
        x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']
    }

    dataloaders_dict = {
        x: DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, 
                                       num_workers=4) for x in ['train', 'val']
    }
    
    print("Loading Datasets and Initializing DataLoaders...")
    return image_datasets, dataloaders_dict

In [20]:
def train_model(model, dataloaders, criterion, optimizer, epochs=25, is_inception=False):
    since = time.time()
    
    val_acc_history = list()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0
    
    for epoch in range(epochs):
        print(f'Epoch {epoch}/{epochs - 1}')
        print("-" * 10)
        
        for phase in ["train", "val"]:
            if phase == "train":
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            running_corrects = 0
            
            #Iterate over the data
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                optimizer.zero_grad() # to zero the parameter gradients
                
                # forward, track history if only in train mode
                with torch.set_grad_enabled(phase == "train"):
                    if is_inception and phase == "train":
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                    
                    _, preds = torch.max(outputs, 1)
                    
                    # backward, optimize only if in train mode
                    if phase == "train":
                        loss.backward()
                        optimizer.step()
                        
                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            
            print(f'{phase} Loss: {epoch_loss} Accuracy: {epoch_acc}')
            
            # deepcopy the model
            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
            if phase == 'val':
                val_acc_history.append(epoch_acc)
        print()
        
    time_elapsed = time.time() - since
    print('Training Complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best Validation Acuuracy: {:.04f}'.format(best_acc))
    
    model.load_state_dict(best_model_wts)
    
    return model, val_acc_history

In [21]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [22]:
def initialize_model(model_name, num_classes, feature_extract, weights, use_pretrained=True):
    model_ft = None
    input_size = 0
    
    if model_name == "inception":
        """
        InceptionV3
        Image size should be (299, 299)
        Has auxiliary output
        """
        model_ft = inception_v3(weights=Inception_V3_Weights.DEFAULT)
        set_parameter_requires_grad(model_ft, feature_extract)
        
        num_ftrs = model_ft.AuxLogits.fc.in_features
        model_ft.AuxLogits.fc = nn.Linear(num_ftrs, num_classes)
        
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        input_size = 299
        print("Initializing InceptionV3 . . .")
        
    else:
        print("Invalid Model Name!")
        exit()
        
    return model_ft, input_size

In [23]:
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, weights=Inception_V3_Weights.DEFAULT)
print(model_ft)
print(f'InputSize: {input_size}')

Initializing InceptionV3 . . .
Inception3(
  (Conv2d_1a_3x3): BasicConv2d(
    (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2a_3x3): BasicConv2d(
    (conv): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_2b_3x3): BasicConv2d(
    (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn): BatchNorm2d(64, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (maxpool1): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  (Conv2d_3b_1x1): BasicConv2d(
    (conv): Conv2d(64, 80, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (bn): BatchNorm2d(80, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  )
  (Conv2d_4a_3x3): BasicConv2d(
    (conv): Conv2d(80

In [24]:
dsets, dataloaders_dict = load_datasets_and_dataloaders(input_size)

Loading Datasets and Initializing DataLoaders...


In [25]:
dataloaders_dict

{'train': <torch.utils.data.dataloader.DataLoader at 0x1fb083177f0>,
 'val': <torch.utils.data.dataloader.DataLoader at 0x1fb126ce3d0>}

In [27]:
# send model to device: gpu or cpu
model_ft = model_ft.to(device)

params_to_update = model_ft.parameters()
print("Parameters to learn:")
if feature_extract:
    params_to_update = list()
    for name, param in model_ft.named_parameters():
        if param.requires_grad == True:
            params_to_update.append(param)
            print("\t", name)
else:
    for name, param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t", name)
            
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9)

Parameters to learn:
	 AuxLogits.fc.weight
	 AuxLogits.fc.bias
	 fc.weight
	 fc.bias


In [28]:

criterion = nn.CrossEntropyLoss()

model_ft, hist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, epochs = epochs, 
                             is_inception = (model_name=="inception"))

Epoch 0/14
----------
train Loss: 0.8997164182975644 Accuracy: 0.5942622950819673
val Loss: 0.47012581135712417 Accuracy: 0.9215686274509804

Epoch 1/14
----------
train Loss: 0.6291178406262007 Accuracy: 0.8114754098360656
val Loss: 0.35187018107549817 Accuracy: 0.934640522875817

Epoch 2/14
----------
train Loss: 0.4987450491209499 Accuracy: 0.8524590163934427
val Loss: 0.3529934236426759 Accuracy: 0.869281045751634

Epoch 3/14
----------
train Loss: 0.5302177833729103 Accuracy: 0.8278688524590164
val Loss: 0.27143554827746225 Accuracy: 0.9215686274509804

Epoch 4/14
----------
train Loss: 0.41407076061749065 Accuracy: 0.8852459016393444
val Loss: 0.2559073158338958 Accuracy: 0.9215686274509804

Epoch 5/14
----------
train Loss: 0.4395282945183457 Accuracy: 0.8852459016393444
val Loss: 0.3662660218580486 Accuracy: 0.8431372549019608

Epoch 6/14
----------
train Loss: 0.4209987346510418 Accuracy: 0.8770491803278689
val Loss: 0.23878500079796985 Accuracy: 0.9215686274509804

Epoch 7/14

In [29]:
# from torchvision.models.inception import Inception3
# from torchvision.models.utils import load_state_dict_from_url
# import torch.nn.functional as F


# model_urls = {
#     # Inception v3 ported from TensorFlow
#     'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
# }
# def inception_v3_sliced(pretrained=True, progress=True, stop_layer=5, **kwargs):
#     if pretrained:
#         if 'transform_input' not in kwargs:
#             kwargs['transform_input'] = True
#         if 'aux_logits' in kwargs:
#             original_aux_logits = kwargs['aux_logits']
#             kwargs['aux_logits'] = True
#         else:
#             original_aux_logits = True
#         kwargs['init_weights'] = False  # we are loading weights from a pretrained model
        
#         class Inception3Mod(Inception3):
#             def __init__(self, stop_layer, **kwargs):
#                 super(Inception3Mod, self).__init__(**kwargs)
#                 self.stop_layer = stop_layer
#             def _forward(self, x):
#                 layers = [
#                  self.Conv2d_1a_3x3,
#                  self.Conv2d_2a_3x3,
#                  self.Conv2d_2b_3x3,
#                  'maxpool',
#                  self.Conv2d_3b_1x1,
#                  self.Conv2d_4a_3x3,
#                  'maxpool',
#                  self.Mixed_5b,
#                  self.Mixed_5c,
#                  self.Mixed_5d,
#                  self.Mixed_6a,
#                  self.Mixed_6b,
#                  self.Mixed_6c,
#                  self.Mixed_6d,
#                  self.Mixed_6e,
#                  self.Mixed_7a,
#                  self.Mixed_7b,
#                  self.Mixed_7c,
#                 ]

#                 for idx in range(self.stop_layer):
#                     layer = layers[idx]
#                     if layer == 'maxpool':
#                         x = F.max_pool2d(x, kernel_size=3, stride=2)
#                     else:
#                         x = layer(x)
#                 return x, None

#         model = Inception3Mod(**kwargs, stop_layer=stop_layer)
#         state_dict = load_state_dict_from_url(model_urls['inception_v3_google'], progress=progress)
#         model.load_state_dict(state_dict)
#         if not original_aux_logits:
#             model.aux_logits = False
#             del model.AuxLogits
#         return model
    
#     return Inception3Mod(**kwargs)

ModuleNotFoundError: No module named 'torchvision.models.utils'

In [None]:
# from torchvision.models.inception import Inception3
# from torchvision.models.utils import load_state_dict_from_url
# import torch.nn.functional as F


# model_urls = {
#     # Inception v3 ported from TensorFlow
#     'inception_v3_google': 'https://download.pytorch.org/models/inception_v3_google-1a9a5a14.pth',
# }
# def inception_v3_sliced(pretrained=False, progress=True, stop_layer=3, **kwargs):
#     r"""Inception v3 model architecture from
#     `"Rethinking the Inception Architecture for Computer Vision" <http://arxiv.org/abs/1512.00567>`_.
#     .. note::
#         **Important**: In contrast to the other models the inception_v3 expects tensors with a size of
#         N x 3 x 299 x 299, so ensure your images are sized accordingly.
#     Args:
#         pretrained (bool): If True, returns a model pre-trained on ImageNet
#         progress (bool): If True, displays a progress bar of the download to stderr
#         aux_logits (bool): If True, add an auxiliary branch that can improve training.
#             Default: *True*
#         transform_input (bool): If True, preprocesses the input according to the method with which it
#             was trained on ImageNet. Default: *False*
#     """
#     if pretrained:
#         if 'transform_input' not in kwargs:
#             kwargs['transform_input'] = True
#         if 'aux_logits' in kwargs:
#             original_aux_logits = kwargs['aux_logits']
#             kwargs['aux_logits'] = True
#         else:
#             original_aux_logits = True
#         kwargs['init_weights'] = False  # we are loading weights from a pretrained model
#         class Inception3Mod(Inception3):
#             def __init__(self, stop_layer, **kwargs):
#                 super(Inception3Mod, self).__init__(**kwargs)
#                 self.stop_layer = stop_layer
#             def _forward(self, x):
#             layers = [
#              self.Conv2d_1a_3x3,
#              self.Conv2d_2a_3x3,
#              self.Conv2d_2b_3x3,
#              'maxpool',
#              self.Conv2d_3b_1x1,
#              self.Conv2d_4a_3x3,
#              'maxpool',
#              self.Mixed_5b,
#              self.Mixed_5c,
#              self.Mixed_5d,
#              self.Mixed_6a,
#              self.Mixed_6b,
#              self.Mixed_6c,
#              self.Mixed_6d,
#              self.Mixed_6e,
#              self.Mixed_7a,
#              self.Mixed_7b,
#              self.Mixed_7c,
#             ]

#             for idx in range(self.stop_layer):
#               layer = layers[idx]
#               if layer == 'maxpool':
#                 x = F.max_pool2d(x, kernel_size=3, stride=2)
#               else:
#                 x = layer(x)
#             return x, None


#         model = Inception3Mod(**kwargs, stop_layer=stop_layer)
#         state_dict = load_state_dict_from_url(model_urls['inception_v3_google'],
#                                               progress=progress)
#         model.load_state_dict(state_dict)
#         if not original_aux_logits:
#             model.aux_logits = False
#             del model.AuxLogits
#         return model

#     return Inception3Mod(**kwargs)