In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from PIL import Image
import os
import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torchvision import datasets, models, transforms
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import splitfolders
from datetime import datetime
import yaml
from sklearn.metrics import classification_report, confusion_matrix
import pandas as pd

from pipeline_torch_models import *

In [3]:
config=yaml.load(open('config.yml', 'r'), Loader=yaml.FullLoader)
seed = config['model_config']['initial_seed']
torch.manual_seed(seed)
np.random.seed(seed)

In [4]:
skip_training = False
LVL_all = False 

### Load Data

In [None]:
if LVL_all == True:
    # define training and test data directories
    data_dir  = r''
    train_valid_dir = os.path.join(data_dir) 
    #test_dir  = os.path.join(data_dir, 'test')

    splitfolders.ratio(input=train_valid_dir, output='split_data', ratio=(0.6, 0.4))
    train_dir='split_data/train'
    valid_dir='split_data/val'

    splitfolders.ratio(input='split_data/val', output='Valid_Test', ratio=(0.5, 0.5))
    valid_dir='Valid_Test/train'
    test_dir='Valid_Test/val'

    # Selecting mean and std values according to ImageNet dataset
    mean = torch.tensor( [0.485, 0.456, 0.406])
    std = torch.tensor([0.229, 0.224, 0.225])
else: 
    # define training and test data directories# define training and test data directories
    data_dir  = r''
    data_dir_test  = r''
    train_valid_dir = os.path.join(data_dir) 
    test_dir  = os.path.join(data_dir, 'test')

    splitfolders.ratio(input=train_valid_dir, output='split_data', ratio=(0.6, 0.4))
    train_dir='split_data/train'
    valid_dir='split_data/val'

    test_dir=r''

    # Selecting mean and std values according to ImageNet dataset
    mean = torch.tensor( [0.485, 0.456, 0.406])
    std = torch.tensor([0.229, 0.224, 0.225])

In [6]:
# load and transform data using ImageFolder
data_transforms = {
    'train':  transforms.Compose([
                                transforms.Resize([224,224]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean,std)
                                ]),
    'validation':  transforms.Compose([
                                transforms.Resize([224,224]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean,std)
                                ]),
    'test':  transforms.Compose([
                                transforms.Resize([224,224]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean,std)
                                ])
}

train_data = datasets.ImageFolder(train_dir, transform=data_transforms["train"])
valid_data = datasets.ImageFolder(valid_dir, transform=data_transforms["validation"])
test_data  = datasets.ImageFolder(test_dir, transform=data_transforms["test"])

### Load models and prepare for TL

In [None]:
## MODEL 1: Resnet 18
model_1 = models.resnet18(pretrained=True)
layers=list(model_1._modules.keys())

layers_frozen=layers[0:8]

for layer in layers_frozen:
    for param in model_1._modules[layer].parameters():
        param.requires_grad=False
        
# modify last layer to match it our classes
n_inputs = model_1.fc.in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_1.fc = last_layer

model_1 = models.resnet18(pretrained=True)
layers=list(model_1._modules.keys())

layers_frozen=layers[0:8]

for layer in layers_frozen:
    for param in model_1._modules[layer].parameters():
        param.requires_grad=False
        
# modify last layer to match it our classes
n_inputs = model_1.fc.in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_1.fc = last_layer

model_1 = model_1.to('cuda')

In [None]:
## MODEL 2: Resnet 50
model_2 = models.resnet50(pretrained=True)
layers=list(model_2._modules.keys())

layers_frozen=layers[0:8]

for layer in layers_frozen:
    for param in model_2._modules[layer].parameters():
        param.requires_grad=False
        
# modify last layer to match it our classes
n_inputs = model_2.fc.in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_2.fc = last_layer

model_2 = model_2.to('cuda')

In [None]:
## MODEL 3: VGG 16

model_3 = models.vgg16(pretrained=True)
layers=list(model_3._modules.keys())

layers_frozen=layers[0:30]

for layer in layers_frozen:
    for param in model_3._modules[layer].parameters():
        param.requires_grad=False

# modify last layer to match it our classes
n_inputs = model_3.classifier[6].in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_3.classifier[6] = last_layer

model_3 = model_3.to('cuda')

In [None]:
# MODEL 4: AlexNet

model_4 = models.alexnet(pretrained=True)
layers=list(model_4._modules.keys())

layers_frozen=layers[0:12]

for layer in layers_frozen:
    for param in model_4._modules[layer].parameters():
        param.requires_grad=False

# modify last layer to match it our classes
n_inputs = model_4.classifier[6].in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_4.classifier[6] = last_layer

model_4 = model_4.to('cuda')

In [None]:
# MODEL 5: GoogleNet

model_5 = models.googlenet(pretrained=True)
layers=list(model_5._modules.keys())

layers_frozen=layers[0:16]

for layer in layers_frozen:
    for param in model_5._modules[layer].parameters():
        param.requires_grad=False

# modify last layer to match it our classes

n_inputs = model_5.fc.in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_5.fc = last_layer

model_5 = model_5.to('cuda')

### Training

In [14]:
def rename_model_file(old_name, new_name, directory='models'):
    # Construct full file paths
    old_file_path = os.path.join(directory, old_name)
    new_file_path = os.path.join(directory, new_name)
    
    # Check if the old file exists
    if os.path.exists(old_file_path):
        # Rename the file
        os.rename(old_file_path, new_file_path)
        print(f"File renamed from {old_name} to {new_name}")
    else:
        print(f"File {old_name} does not exist in the directory {directory}")

In [None]:
pipeline1 = PipelineTorch(model_1, config)
if skip_training == False:
    pipeline1.train(train_data, valid_data, config['model_config']['version'])
    # Example usage
    old_file_name = 'model_' + config['model_config']['version'] + '.pth'
    new_file_name = 'model1_' + config['model_config']['version'] + '.pth'
    rename_model_file(old_file_name, new_file_name) 

In [None]:
pipeline2 = PipelineTorch(model_2, config)
if skip_training == False: 
    pipeline2.train(train_data, valid_data, config['model_config']['version'])
    # Example usage
    old_file_name = 'model_' + config['model_config']['version'] + '.pth'
    new_file_name = 'model2_' + config['model_config']['version'] + '.pth'
    rename_model_file(old_file_name, new_file_name)

In [None]:
pipeline3 = PipelineTorch(model_3, config)
if skip_training == False: 
    pipeline3.train(train_data, valid_data, config['model_config']['version'])
    old_file_name = 'model_' + config['model_config']['version'] + '.pth'
    new_file_name = 'model3_' + config['model_config']['version'] + '.pth'
    rename_model_file(old_file_name, new_file_name)

In [None]:
pipeline4 = PipelineTorch(model_4, config)
if skip_training == False: 
    pipeline4.train(train_data, valid_data, config['model_config']['version'])
    old_file_name = 'model_' + config['model_config']['version'] + '.pth'
    new_file_name = 'model4_' + config['model_config']['version'] + '.pth'
    rename_model_file(old_file_name, new_file_name) 

In [None]:
pipeline5 = PipelineTorch(model_5, config)
if skip_training == False: 
    pipeline5.train(train_data, valid_data, config['model_config']['version'])
    old_file_name = 'model_' + config['model_config']['version'] + '.pth'
    new_file_name = 'model5_' + config['model_config']['version'] + '.pth'
    rename_model_file(old_file_name, new_file_name)     

### Post_Processing

In [None]:
pipeline1.load_checkpoint(config['model_config']['version'],nb_model=1)
pipeline2.load_checkpoint(config['model_config']['version'],nb_model=2)
pipeline3.load_checkpoint(config['model_config']['version'],nb_model=3)
pipeline4.load_checkpoint(config['model_config']['version'],nb_model=4)
pipeline5.load_checkpoint(config['model_config']['version'],nb_model=5)

In [22]:
losses1_test, average_loss1_test, predictions1_test, real_labels1_test, acc1_test= pipeline1.predict(test_data)   
losses2_test, average_loss2_test, predictions2_test, real_labels2_test, acc2_test= pipeline2.predict(test_data)   
losses3_test, average_loss3_test, predictions3_test, real_labels3_test, acc3_test= pipeline3.predict(test_data)   
losses4_test, average_loss4_test, predictions4_test, real_labels4_test, acc4_test= pipeline4.predict(test_data)   
losses5_test, average_loss5_test, predictions5_test, real_labels5_test, acc5_test= pipeline5.predict(test_data)


In [23]:
losses1_valid, average_loss1_valid, predictions1_valid, real_labels1_valid, acc1_valid= pipeline1.predict(valid_data)   
losses2_valid, average_loss2_valid, predictions2_valid, real_labels2_valid, acc2_valid= pipeline2.predict(valid_data)   
losses3_valid, average_loss3_valid, predictions3_valid, real_labels3_valid, acc3_valid= pipeline3.predict(valid_data)   
losses4_valid, average_loss4_valid, predictions4_valid, real_labels4_valid, acc4_valid= pipeline4.predict(valid_data)   
losses5_valid, average_loss5_valid, predictions5_valid, real_labels5_valid, acc5_valid= pipeline5.predict(valid_data) 

In [24]:

losses1_train, average_loss1_train, predictions1_train, real_labels1_train, acc1_train= pipeline1.predict(train_data)   
losses2_train, average_loss2_train, predictions2_train, real_labels2_train, acc2_train= pipeline2.predict(train_data)   
losses3_train, average_loss3_train, predictions3_train, real_labels3_train, acc3_train= pipeline3.predict(train_data)   
losses4_train, average_loss4_train, predictions4_train, real_labels4_train, acc4_train= pipeline4.predict(train_data)   
losses5_train, average_loss5_train, predictions5_train, real_labels5_train, acc5_train= pipeline5.predict(train_data)  

In [25]:
# Data from the pipeline predictions (replace these with your actual outputs)
train_accuracies = [acc1_train[0], acc2_train[0], acc3_train[0], acc4_train[0], acc5_train[0]]
valid_accuracies = [acc1_valid[0], acc2_valid[0], acc3_valid[0], acc4_valid[0], acc5_valid[0]]
test_accuracies = [acc1_test[0], acc2_test[0], acc3_test[0], acc4_test[0], acc5_test[0]]

train_losses = [average_loss1_train, average_loss2_train, average_loss3_train, average_loss4_train, average_loss5_train]
valid_losses = [average_loss1_valid, average_loss2_valid, average_loss3_valid, average_loss4_valid, average_loss5_valid]
test_losses = [average_loss1_test, average_loss2_test, average_loss3_test, average_loss4_test, average_loss5_test]

# Creating a DataFrame with the organized data
df = pd.DataFrame({
    'Model': ['Model 1', 'Model 2', 'Model 3', 'Model 4', 'Model 5'],
    'Train Accuracy': train_accuracies,
    'Validation Accuracy': valid_accuracies,
    'Test Accuracy': test_accuracies,
    'Train Loss': train_losses,
    'Validation Loss': valid_losses,
    'Test Loss': test_losses
})