# This notebook aims to find out whether DataAugmentation can help boosting the final CV / test results of an EffNetB0 model

In [1]:
import pathlib
import os
import torchvision
from torch.mps import is_available
from torch import nn
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision.transforms import v2
import torchinfo
from time import time
import numpy as np

try:
    from scripts import dataset_setup
    from scripts import utilities
    from scripts import model_setup
except ImportError:
    print('The helper scripts are not present in the working folder... downloading them from github repository:\n\t'
          'https://github.com/thomaspierantozzi/PyTorch_Course_by_Daniel_Bourke/tree/main/05_PyTorch_Food101/scripts')
    !git clone https://github.com/thomaspierantozzi/PyTorch_Course_by_Daniel_Bourke
    !mv ./PyTorch_Course_by_Daniel_Bourke/05_PyTorch_Food101/scripts .
    !rm -rf ./PyTorch_Course_by_Daniel_Bourke 
    from scripts import dataset_setup
    from scripts import utilities
    from scripts import model_setup
    print('Dependencies properly imported...')


## 1. Importing the model chosen for the testing (aiming to deploy an hypothetical app which goes on mobile devices)

In [2]:
model = model_setup.EffNetB4(name='EfficientNetB2')

### Testing the model functionality via torchinfo and getting some info

In [3]:
torchinfo.summary(model,
                  input_size=(1, 3, 256, 256),
                  col_names=['input_size', 'output_size', 'num_params', 'trainable']
                  )

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Trainable
EffNetB4                                 [1, 3, 256, 256]          [1, 1000]                 --                        True
├─ModuleList: 1-1                        --                        --                        --                        True
│    └─Sequential: 2-1                   [1, 3, 256, 256]          [1, 1792, 8, 8]           17,548,616                True
│    └─Conv2dNormActivation: 2-2         [1, 3, 256, 256]          [1, 48, 128, 128]         1,392                     True
│    └─Conv2d: 2-3                       [1, 3, 256, 256]          [1, 48, 128, 128]         1,296                     True
│    └─BatchNorm2d: 2-4                  [1, 48, 128, 128]         [1, 48, 128, 128]         96                        True
│    └─SiLU: 2-5                         [1, 48, 128, 128]         [1, 48, 128, 128]         --                        --
│    

In [4]:
device = 'mps' if is_available() else 'cpu'
model.to(device)


EffNetB4(
  (architecture): ModuleList(
    (0): EfficientNet(
      (features): Sequential(
        (0): Conv2dNormActivation(
          (0): Conv2d(3, 48, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
          (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): SiLU(inplace=True)
        )
        (1): Sequential(
          (0): MBConv(
            (block): Sequential(
              (0): Conv2dNormActivation(
                (0): Conv2d(48, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=48, bias=False)
                (1): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
                (2): SiLU(inplace=True)
              )
              (1): SqueezeExcitation(
                (avgpool): AdaptiveAvgPool2d(output_size=1)
                (fc1): Conv2d(48, 12, kernel_size=(1, 1), stride=(1, 1))
                (fc2): Conv2d(12, 48, kernel_size=(1, 1), stride=(1, 1))
        

### Freezing the layers belonging to the 'Feature Extractor'

In [5]:
for param in model.features.parameters():
    param.requires_grad = False

In [6]:
torchinfo.summary(model,
                  input_size=(1, 3, 224, 224),
                  col_names=['input_size', 'output_size', 'num_params', 'trainable']
                  )

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Trainable
EffNetB4                                 [1, 3, 224, 224]          [1, 1000]                 --                        Partial
├─ModuleList: 1-1                        --                        --                        --                        Partial
│    └─Sequential: 2-1                   [1, 3, 224, 224]          [1, 1792, 7, 7]           17,548,616                False
│    └─Conv2dNormActivation: 2-2         [1, 3, 224, 224]          [1, 48, 112, 112]         1,392                     False
│    └─Conv2d: 2-3                       [1, 3, 224, 224]          [1, 48, 112, 112]         (1,296)                   False
│    └─BatchNorm2d: 2-4                  [1, 48, 112, 112]         [1, 48, 112, 112]         (96)                      False
│    └─SiLU: 2-5                         [1, 48, 112, 112]         [1, 48, 112, 112]         --                      

### Resetting the classifier layer

In [7]:
#checking the dimension of the output of the model down to the avgpooling layer, in order to set properly the head of the model when it comes to talk of transfer learning
model.classifier

Sequential(
  (0): Dropout(p=0.4, inplace=True)
  (1): Linear(in_features=1792, out_features=1000, bias=True)
)

In [8]:
model.classifier = nn.Sequential(
    nn.Dropout(0.2, inplace=True),
    nn.Linear(in_features=1792, out_features=5, bias=True)
)
model.classifier.to(device)
model.classifier

Sequential(
  (0): Dropout(p=0.2, inplace=True)
  (1): Linear(in_features=1792, out_features=5, bias=True)
)

In [9]:
torchinfo.summary(model,
                  input_size=(1, 3, 224, 224),
                  col_names=['input_size', 'output_size', 'num_params', 'trainable']
                  )

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Trainable
EffNetB4                                 [1, 3, 224, 224]          [1, 5]                    --                        Partial
├─ModuleList: 1-1                        --                        --                        --                        Partial
│    └─Sequential: 2-1                   [1, 3, 224, 224]          [1, 1792, 7, 7]           17,548,616                False
│    └─Conv2dNormActivation: 2-2         [1, 3, 224, 224]          [1, 48, 112, 112]         1,392                     False
│    └─Conv2d: 2-3                       [1, 3, 224, 224]          [1, 48, 112, 112]         (1,296)                   False
│    └─BatchNorm2d: 2-4                  [1, 48, 112, 112]         [1, 48, 112, 112]         (96)                      False
│    └─SiLU: 2-5                         [1, 48, 112, 112]         [1, 48, 112, 112]         --                      

## 2. Setting up the training environment

In [10]:
#the default folder to save new models and the log
MODEL_FOLDER = pathlib.Path(os.getcwd() + '/saved_models/new_model')
LOG_FOLDER = MODEL_FOLDER
FOOD101_PATH = pathlib.Path(os.getcwd() + '/Datasets/food-101') #folder where the original dataset is stored in the  
DATASET_TRAIN_FOLDER = pathlib.Path(os.getcwd() + '/Datasets/train')
DATASET_TEST_FOLDER = pathlib.Path(os.getcwd() + '/Datasets/test')

NUMBER_OF_CLASSES = 5 #number of classes to use for the training
BATCH_SIZE = 2 ** 5
LEARNING_RATE = 1e-2
#setting up a logger Agent
logger = utilities.Logging_Agent(LOG_FOLDER)

### Downloading the data from Food101 library

In [11]:
test = os.getcwd()
test

'/Users/thomaspierantozzi/PycharmProjects/PyTorch_Train/06_PyTorch_TransferLearning'

In [12]:
if not DATASET_TRAIN_FOLDER.parent.exists():
    os.mkdir(DATASET_TRAIN_FOLDER.parent)
    print('Dataset folder created...')

In [13]:
dataset_setup.dataset_creation(
    food101_folder=FOOD101_PATH,
    dataset_train_folder=DATASET_TRAIN_FOLDER,
    dataset_test_folder=DATASET_TEST_FOLDER,
    logger=logger,
    nr_of_classes=NUMBER_OF_CLASSES,
    picked_foods=['sushi', 'steak', 'pizza', 'french_fries', 'cup_cakes']
)

The folders already exist: 
	/Users/thomaspierantozzi/PycharmProjects/PyTorch_Train/06_PyTorch_TransferLearning/Datasets/train
	/Users/thomaspierantozzi/PycharmProjects/PyTorch_Train/06_PyTorch_TransferLearning/Datasets/test
	Foods in the train and test folders: 
		['cup_cakes', 'steak', 'sushi', 'pizza', 'french_fries']


['cup_cakes', 'steak', 'sushi', 'pizza', 'french_fries']

In [14]:
#importing the transform recipe used to train the original model
pre_trained_transform = torchvision.models.EfficientNet_B0_Weights.DEFAULT.transforms()

pre_trained_transform

ImageClassification(
    crop_size=[224]
    resize_size=[256]
    mean=[0.485, 0.456, 0.406]
    std=[0.229, 0.224, 0.225]
    interpolation=InterpolationMode.BICUBIC
)

### Adding feature augmentation operation

In [15]:
augmented_transforms = nn.Sequential(
    v2.TrivialAugmentWide(num_magnitude_bins=15),
    pre_trained_transform
)
augmented_transforms

Sequential(
  (0): TrivialAugmentWide(interpolation=InterpolationMode.NEAREST, num_magnitude_bins=15)
  (1): ImageClassification(
      crop_size=[224]
      resize_size=[256]
      mean=[0.485, 0.456, 0.406]
      std=[0.229, 0.224, 0.225]
      interpolation=InterpolationMode.BICUBIC
  )
)

In [16]:
train_dataset = ImageFolder(DATASET_TRAIN_FOLDER,
                            transform=augmented_transforms) #the augmentation is applied only on the training datasset
test_dataset = ImageFolder(DATASET_TEST_FOLDER,
                           transform=pre_trained_transform)
train_batches = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_batches = DataLoader(test_dataset, batch_size=BATCH_SIZE)

In [17]:
train_dataset

Dataset ImageFolder
    Number of datapoints: 4000
    Root location: /Users/thomaspierantozzi/PycharmProjects/PyTorch_Train/06_PyTorch_TransferLearning/Datasets/train
    StandardTransform
Transform: Sequential(
             (0): TrivialAugmentWide(interpolation=InterpolationMode.NEAREST, num_magnitude_bins=15)
             (1): ImageClassification(
                 crop_size=[224]
                 resize_size=[256]
                 mean=[0.485, 0.456, 0.406]
                 std=[0.229, 0.224, 0.225]
                 interpolation=InterpolationMode.BICUBIC
             )
           )

In [18]:
logger.write_log(f'The DataLoaders are up and ready!\n\tClasses: {train_dataset.class_to_idx}')

The DataLoaders are up and ready!
	Classes: {'cup_cakes': 0, 'french_fries': 1, 'pizza': 2, 'steak': 3, 'sushi': 4}


In [19]:
model.to(device)
print(f'The model is on: {next(iter(model.parameters())).device}')

The model is on: mps:0


In [20]:
import torch
#testing the model functionality with the first picture in the dataloader. Just to see whether the outcome is consistent with what we expect or not:
test_pic = next(iter(train_batches))[0][0].unsqueeze(dim=0).to(device=device)
print(f'The dummy tensor is on: {test_pic.device}')
print(f'The model is on: {next(iter(model.parameters())).device}')

model.eval()
with torch.no_grad():
    outcome_test = model(test_pic)

logger.write_log(f'The outcome of the tast performed on the first picture fetched from the dataloader is: '
      f'\n\tLOGITS: {outcome_test}'
      f'\n\tPROBABILITIES: {torch.softmax(outcome_test, dim=1)}'
      f'\n\tPREDICTED CLASS: {torch.argmax(torch.softmax(outcome_test, dim=1), dim=1).item()}')

The dummy tensor is on: mps:0
The model is on: mps:0
The outcome of the tast performed on the first picture fetched from the dataloader is: 
	LOGITS: tensor([[-0.0187, -0.0342,  0.0265, -0.0535,  0.1365]], device='mps:0')
	PROBABILITIES: tensor([[0.1936, 0.1906, 0.2026, 0.1870, 0.2261]], device='mps:0')
	PREDICTED CLASS: 4


In [21]:
ce_loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) #replicating what's in the course lesson

logger.write_log(f'The Learning Rate is set to {LEARNING_RATE} | Optimizer is on and ready...')

The Learning Rate is set to 0.01 | Optimizer is on and ready...


In [None]:
from tqdm.auto import tqdm
epochs = 0
end_epochs = 20

start_time = time()
for epoch in range(epochs, end_epochs):
    logger.write_log(f'EPOCH NUMBER: {epoch + 1} out of {end_epochs}')
    train_loss = []
    cv_loss = []
    train_acc = []
    cv_acc = []
    start_time_epoch = time() #marking the starting time for the epoch training
    for batch_index, (batch_X, batch_y) in enumerate(train_batches):
        
        start_time_batch = time()
        
        #training step per every batch. the function returns the loss and accuracy
        model.train() #switches on the train mode from torch module
        loss_batch, acc_batch = model.train_step(batch_X=batch_X, 
                                                  batch_y=batch_y,
                                                  optimizer=optimizer, 
                                                  loss_foo=ce_loss,
                                                  device=device)

        #appending the losses computed for the batch to the container for the whole epoch
        train_loss.append(loss_batch)
        train_acc.append(acc_batch)
        
        end_time_batch = time()
        
        if batch_index % 50 == 0:
            resume_batch = model.write_minibatch_results_class(
                batch_index=batch_index,
                batch_quantity=len(train_batches),
                train_loss=train_loss,
                batch_loss_train=loss_batch,
                train_acc=train_acc,
                batch_acc_train=acc_batch,
                start_time_iteration=start_time_batch,
                end_time_iteration=end_time_batch
            )
            logger.write_log(resume_batch)
            
    #5. CrossValidation step (once per epoch...)
    model.eval()
    for batch_test_X, batch_test_y in test_batches:
        loss_batch_cv, acc_batch_cv = model.eval_step(
            batch_test_X=batch_test_X,
            batch_test_y=batch_test_y,
            loss_foo=ce_loss,
            device=device
        )
            
        #appending the losses computed for the batch to the container for the whole epoch
        cv_loss.append(loss_batch_cv)
        cv_acc.append(acc_batch_cv)
        
    #marking the end time for the epoch training
    end_time_epoch = time()
    epoch_resume = model.write_epoch_results_class(
        curr_iteration=epoch,
        last_iteration=end_epochs,
        train_loss=train_loss,
        train_acc=train_acc,
        cv_loss=cv_loss,
        cv_acc=cv_acc,
        end_time_iteration=end_time_epoch,
        start_time_iteration=start_time_epoch
    )
    logger.write_log(epoch_resume)

    #keep track of the history of the training metrics
    model.history_update('train_loss', np.mean(train_loss).item())
    model.history_update('train_acc', np.mean(train_acc).item())
    model.history_update('cv_loss', np.mean(cv_loss).item())
    model.history_update('cv_acc', np.mean(cv_acc).item())
    
    if epoch % 20 == 0 & epoch != 0:
        utilities.save_model(
            path=MODEL_FOLDER,
            model=model,
            optimizer=optimizer,
            epoch_nr=end_epochs,
            learning_rate=LEARNING_RATE,
            batch_size=BATCH_SIZE,
            nr_classes=NUMBER_OF_CLASSES
        )
        logger.write_log(f'Model and optimizer saved for the checkpoint @epoch nr.: {epoch}')

end_time = time()
utilities.save_model(
    path=MODEL_FOLDER,
    model=model,
    optimizer=optimizer,
    epoch_nr=end_epochs,
    learning_rate=LEARNING_RATE,
    batch_size=BATCH_SIZE,
    nr_classes=NUMBER_OF_CLASSES
)

EPOCH NUMBER: 1 out of 20
Intermediate results for batch 000 out of 125: Train Loss epoch:    1.606 (last:    1.606) | Train Acc. epoch: 18.75% (last: 18.75%) | Elapsed Time: 0.5 sec.
Intermediate results for batch 050 out of 125: Train Loss epoch:    0.622 (last:    0.280) | Train Acc. epoch: 79.29% (last: 90.62%) | Elapsed Time: 0.2 sec.
Intermediate results for batch 100 out of 125: Train Loss epoch:    0.512 (last:    0.441) | Train Acc. epoch: 82.58% (last: 87.50%) | Elapsed Time: 0.2 sec.
Epoch number: 1 out of 20
	Train loss: 0.492 | Train Accuracy: 83.100%
	CV loss:    0.252 | CV Accuracy:    92.773%
	Time taken: 47.59 seconds
EPOCH NUMBER: 2 out of 20
Intermediate results for batch 000 out of 125: Train Loss epoch:    0.284 (last:    0.284) | Train Acc. epoch: 87.50% (last: 87.50%) | Elapsed Time: 0.2 sec.
Intermediate results for batch 050 out of 125: Train Loss epoch:    0.318 (last:    0.420) | Train Acc. epoch: 88.54% (last: 87.50%) | Elapsed Time: 0.2 sec.
Intermediate re

In [None]:
import matplotlib.pyplot as plt
metrics = [('train_loss', 'train_acc'),
           ('cv_loss', 'cv_acc')]

fig = plt.figure(figsize=[15, 5], dpi=150)
for index, (loss, acc) in enumerate(metrics):
    fig.add_subplot(1, 2, index + 1)
    plt.plot(
        np.arange(len(model.history[loss])),
        model.history[loss],
        label=loss,
        color='red',
        linewidth = 0.75,
    )
    plt.plot(
        np.arange(len(model.history[loss])),
        model.history[acc],
        label = acc,
        color='black',
        linewidth = 0.75,
        linestyle='--'
    )
    plt.legend()
    plt.title(f'{loss } VS. {acc}')
    plt.yticks(np.arange(0.2, 1.2, 0.1))
    plt.grid(linestyle='--', linewidth=0.5)
plt.savefig(MODEL_FOLDER / 'loss_acc_history.jpg')

In [None]:
fig = plt.figure(figsize=[15, 5], dpi=150)
plt.plot(np.arange(len(model.history[loss])), model.history['train_loss'], color='orange', label='train_loss')
plt.plot(np.arange(len(model.history[loss])), model.history['cv_loss'], color='blue', label='cv_loss')
plt.title('LOSS COMPARISON')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(linewidth='.5', linestyle='--', color='gray')
plt.savefig(MODEL_FOLDER / 'loss_comparison.jpg')

In [None]:
from numpy import polyfit, arange
period = 0
period2 = 10
polyfit(x=arange(period2-period), y=model.history['cv_loss'][period:period2], deg=1)

In [None]:
fig = plt.figure(figsize=[15, 5], dpi=150)
plt.plot(np.arange(len(model.history[loss])), model.history['train_loss'], color='orange', label='train_loss')
plt.plot(np.arange(len(model.history[loss])), model.history['cv_loss'], color='blue', label='cv_loss')
plt.title('LOSS COMPARISON')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(linewidth='.5', linestyle='--', color='gray')
polyfit_line = arange(period2 - period) * (-0.06747818) + 1.41036015
plt.plot(arange(period2 - period), polyfit_line, color='purple', label='cv_loss')