In [1]:
%load_ext autoreload
%autoreload 2

In [43]:
from PIL import Image
import os
import torch
from torch import nn
import torch.nn.functional as F
from torch import optim
from torchvision import datasets, models, transforms
import torchvision
from torch.utils.data.sampler import SubsetRandomSampler
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import splitfolders
from datetime import datetime
import yaml
from sklearn.metrics import classification_report, confusion_matrix

from pipeline_torch_models import *

In [48]:
config=yaml.load(open('config.yml', 'r'), Loader=yaml.FullLoader)
seed = config['model_config']['initial_seed']
torch.manual_seed(seed)
np.random.seed(seed)

### Load Data

In [4]:
# define training and test data directories
data_dir  = r'C:\Users\FINAELB\Documents\Aalto\Dayyan\TL\DL_input_data\ScenarioAlpha\Sensor1'
train_valid_dir = os.path.join(data_dir) 
#test_dir  = os.path.join(data_dir, 'test')

splitfolders.ratio(input=train_valid_dir, output='split_data', ratio=(0.6, 0.4))
train_dir='split_data/train'
valid_dir='split_data/val'

splitfolders.ratio(input='split_data/val', output='Valid_Test', ratio=(0.5, 0.5))
valid_dir='Valid_Test/train'
test_dir='Valid_Test/val'

# Selecting mean and std values according to ImageNet dataset
mean = torch.tensor( [0.485, 0.456, 0.406])
std = torch.tensor([0.229, 0.224, 0.225])

Copying files: 1159 files [00:11, 102.35 files/s]
Copying files: 464 files [00:04, 105.30 files/s]


In [5]:
# load and transform data using ImageFolder
data_transforms = {
    'train':  transforms.Compose([
                                transforms.Resize([224,224]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean,std)
                                ]),
    'validation':  transforms.Compose([
                                transforms.Resize([224,224]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean,std)
                                ]),
    'test':  transforms.Compose([
                                transforms.Resize([224,224]),
                                transforms.ToTensor(),
                                transforms.Normalize(mean,std)
                                ])
}

train_data = datasets.ImageFolder(train_dir, transform=data_transforms["train"])
valid_data = datasets.ImageFolder(valid_dir, transform=data_transforms["validation"])
test_data  = datasets.ImageFolder(test_dir, transform=data_transforms["test"])

### Load models and prepare for TL

In [9]:
## MODEL 1: Resnet 18
model_1 = models.resnet18(pretrained=True)
layers=list(model_1._modules.keys())

layers_frozen=layers[0:8]

for layer in layers_frozen:
    for param in model_1._modules[layer].parameters():
        param.requires_grad=False
        
# modify last layer to match it our classes
n_inputs = model_1.fc.in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_1.fc = last_layer

model_1 = models.resnet18(pretrained=True)
layers=list(model_1._modules.keys())

layers_frozen=layers[0:8]

for layer in layers_frozen:
    for param in model_1._modules[layer].parameters():
        param.requires_grad=False
        
# modify last layer to match it our classes
n_inputs = model_1.fc.in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_1.fc = last_layer

model_1 = model_1.to('cuda')



In [10]:
## MODEL 2: Resnet 50
model_2 = models.resnet50(pretrained=True)
layers=list(model_2._modules.keys())

layers_frozen=layers[0:8]

for layer in layers_frozen:
    for param in model_2._modules[layer].parameters():
        param.requires_grad=False
        
# modify last layer to match it our classes
n_inputs = model_2.fc.in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_2.fc = last_layer

model_2 = model_2.to('cuda')



In [40]:
## MODEL 3: VGG 16

model_3 = models.vgg16(pretrained=True)
layers=list(model_3._modules.keys())

layers_frozen=layers[0:30]

for layer in layers_frozen:
    for param in model_3._modules[layer].parameters():
        param.requires_grad=False

# modify last layer to match it our classes
n_inputs = model_3.classifier[6].in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_3.classifier[6] = last_layer

model_3 = model_3.to('cuda')



In [39]:
# MODEL 4: AlexNet

model_4 = models.alexnet(pretrained=True)
layers=list(model_4._modules.keys())

layers_frozen=layers[0:12]

for layer in layers_frozen:
    for param in model_4._modules[layer].parameters():
        param.requires_grad=False

# modify last layer to match it our classes
n_inputs = model_4.classifier[6].in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_4.classifier[6] = last_layer

model_4 = model_4.to('cuda')



In [25]:
# MODEL 5: GoogleNet

model_5 = models.googlenet(pretrained=True)
layers=list(model_5._modules.keys())

layers_frozen=layers[0:16]

for layer in layers_frozen:
    for param in model_5._modules[layer].parameters():
        param.requires_grad=False

# modify last layer to match it our classes

n_inputs = model_5.fc.in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_5.fc = last_layer

model_5 = model_5.to('cuda')

In [38]:
# MODEL 6: Inception
model_6 = models.inception_v3(pretrained=True)
layers=list(model_6._modules.keys())

layers_frozen=layers[0:19]

for layer in layers_frozen:
    for param in model_6._modules[layer].parameters():
        param.requires_grad

# modify last layer to match it our classes
n_inputs = model_6.fc.in_features
last_layer = nn.Linear(n_inputs, len(train_data.classes))
model_6.fc = last_layer

model_6 = model_6.to('cuda')

### Training

In [59]:
def rename_model_file(old_name, new_name, directory='models'):
    # Construct full file paths
    old_file_path = os.path.join(directory, old_name)
    new_file_path = os.path.join(directory, new_name)
    
    # Check if the old file exists
    if os.path.exists(old_file_path):
        # Rename the file
        os.rename(old_file_path, new_file_path)
        print(f"File renamed from {old_name} to {new_name}")
    else:
        print(f"File {old_name} does not exist in the directory {directory}")

In [60]:
pipeline1 = PipelineTorch(model_1, config)
pipeline1.train(train_data, valid_data, config['model_config']['version'])

Epoch [1/25], Learning Rate: 0.000909
Duration of training at epoch 1 is : 0:00:05.075020 seconds.
Epoch: 1 	Training Loss: 0.090089 	Validation Loss: 0.066050
Validation loss has descreased (inf-->0.066050). Saving model...
Epoch [2/25], Learning Rate: 0.000800
Duration of training at epoch 2 is : 0:00:04.281344 seconds.
Epoch: 2 	Training Loss: 0.086719 	Validation Loss: 0.065087
Validation loss has descreased (0.066050-->0.065087). Saving model...
Epoch [3/25], Learning Rate: 0.000661
Duration of training at epoch 3 is : 0:00:04.266618 seconds.
Epoch: 3 	Training Loss: 0.065116 	Validation Loss: 0.055699
Validation loss has descreased (0.065087-->0.055699). Saving model...
Epoch [4/25], Learning Rate: 0.000507
Duration of training at epoch 4 is : 0:00:04.253103 seconds.
Epoch: 4 	Training Loss: 0.060250 	Validation Loss: 0.060367
seed has been changed. The new torch seed is 40
Epoch [5/25], Learning Rate: 0.000352
Duration of training at epoch 5 is : 0:00:05.172459 seconds.
Epoch: 5

In [55]:
# Example usage
old_file_name = 'model_' + config['model_config']['version'] + '.pth'
new_file_name = 'model1_' + config['model_config']['version'] + '.pth'
rename_model_file(old_file_name, new_file_name)

File renamed from model_v1.pth to model1_v1.pth


In [57]:
pipeline2 = PipelineTorch(model_2, config)
pipeline2.train(train_data, valid_data, config['model_config']['version'])

Epoch [1/25], Learning Rate: 0.000909
Duration of training at epoch 1 is : 0:00:04.568402 seconds.
Epoch: 1 	Training Loss: 0.115180 	Validation Loss: 0.128568
Validation loss has descreased (inf-->0.128568). Saving model...
Epoch [2/25], Learning Rate: 0.000800
Duration of training at epoch 2 is : 0:00:04.311720 seconds.
Epoch: 2 	Training Loss: 0.094763 	Validation Loss: 0.122616
Validation loss has descreased (0.128568-->0.122616). Saving model...
Epoch [3/25], Learning Rate: 0.000661
Duration of training at epoch 3 is : 0:00:04.360997 seconds.
Epoch: 3 	Training Loss: 0.084205 	Validation Loss: 0.107301
Validation loss has descreased (0.122616-->0.107301). Saving model...
Epoch [4/25], Learning Rate: 0.000507
Duration of training at epoch 4 is : 0:00:04.345000 seconds.
Epoch: 4 	Training Loss: 0.078588 	Validation Loss: 0.107207
Validation loss has descreased (0.107301-->0.107207). Saving model...
Epoch [5/25], Learning Rate: 0.000352
Duration of training at epoch 5 is : 0:00:04.36

<pipeline_torch.PipelineTorch at 0x2dcbf5d9810>

In [58]:
# Example usage
old_file_name = 'model_' + config['model_config']['version'] + '.pth'
new_file_name = 'model2_' + config['model_config']['version'] + '.pth'
rename_model_file(old_file_name, new_file_name)

File renamed from model_v1.pth to model2_v1.pth


In [59]:
pipeline3 = PipelineTorch(model_3, config)
pipeline3.train(train_data, valid_data, config['model_config']['version'])

Epoch [1/25], Learning Rate: 0.000909
Duration of training at epoch 1 is : 0:00:10.224354 seconds.
Epoch: 1 	Training Loss: 0.186895 	Validation Loss: 0.124466
Validation loss has descreased (inf-->0.124466). Saving model...
Epoch [2/25], Learning Rate: 0.000800
Duration of training at epoch 2 is : 0:00:16.479202 seconds.
Epoch: 2 	Training Loss: 0.183883 	Validation Loss: 0.126677
seed has been changed. The new torch seed is 20
Epoch [3/25], Learning Rate: 0.000661
Duration of training at epoch 3 is : 0:00:16.510850 seconds.
Epoch: 3 	Training Loss: 0.140070 	Validation Loss: 0.088208
Validation loss has descreased (0.124466-->0.088208). Saving model...
Epoch [4/25], Learning Rate: 0.000507
Duration of training at epoch 4 is : 0:00:16.738884 seconds.
Epoch: 4 	Training Loss: 0.140528 	Validation Loss: 0.087358
Validation loss has descreased (0.088208-->0.087358). Saving model...
Epoch [5/25], Learning Rate: 0.000352
Duration of training at epoch 5 is : 0:00:17.150605 seconds.
Epoch: 5

<pipeline_torch.PipelineTorch at 0x2de2468ced0>

In [60]:
old_file_name = 'model_' + config['model_config']['version'] + '.pth'
new_file_name = 'model3_' + config['model_config']['version'] + '.pth'
rename_model_file(old_file_name, new_file_name)

File renamed from model_v1.pth to model3_v1.pth


In [61]:
pipeline4 = PipelineTorch(model_4, config)
pipeline4.train(train_data, valid_data, config['model_config']['version'])

Epoch [1/25], Learning Rate: 0.000909
Duration of training at epoch 1 is : 0:00:04.957389 seconds.
Epoch: 1 	Training Loss: 0.922505 	Validation Loss: 0.472903
Validation loss has descreased (inf-->0.472903). Saving model...
Epoch [2/25], Learning Rate: 0.000800
Duration of training at epoch 2 is : 0:00:05.957597 seconds.
Epoch: 2 	Training Loss: 0.451330 	Validation Loss: 0.371416
Validation loss has descreased (0.472903-->0.371416). Saving model...
Epoch [3/25], Learning Rate: 0.000661
Duration of training at epoch 3 is : 0:00:04.579316 seconds.
Epoch: 3 	Training Loss: 0.358429 	Validation Loss: 0.252785
Validation loss has descreased (0.371416-->0.252785). Saving model...
Epoch [4/25], Learning Rate: 0.000507
Duration of training at epoch 4 is : 0:00:04.958660 seconds.
Epoch: 4 	Training Loss: 0.343535 	Validation Loss: 0.224966
Validation loss has descreased (0.252785-->0.224966). Saving model...
Epoch [5/25], Learning Rate: 0.000352
Duration of training at epoch 5 is : 0:00:04.38

<pipeline_torch.PipelineTorch at 0x2dd86755f10>

In [62]:
old_file_name = 'model_' + config['model_config']['version'] + '.pth'
new_file_name = 'model4_' + config['model_config']['version'] + '.pth'
rename_model_file(old_file_name, new_file_name)

File renamed from model_v1.pth to model4_v1.pth


In [63]:
pipeline5 = PipelineTorch(model_5, config)
pipeline5.train(train_data, valid_data, config['model_config']['version'])

Epoch [1/25], Learning Rate: 0.000909
Duration of training at epoch 1 is : 0:00:05.143527 seconds.
Epoch: 1 	Training Loss: 1.137219 	Validation Loss: 0.950582
Validation loss has descreased (inf-->0.950582). Saving model...
Epoch [2/25], Learning Rate: 0.000800
Duration of training at epoch 2 is : 0:00:04.353127 seconds.
Epoch: 2 	Training Loss: 0.786643 	Validation Loss: 0.641497
Validation loss has descreased (0.950582-->0.641497). Saving model...
Epoch [3/25], Learning Rate: 0.000661
Duration of training at epoch 3 is : 0:00:04.323425 seconds.
Epoch: 3 	Training Loss: 0.618177 	Validation Loss: 0.504361
Validation loss has descreased (0.641497-->0.504361). Saving model...
Epoch [4/25], Learning Rate: 0.000507
Duration of training at epoch 4 is : 0:00:04.331063 seconds.
Epoch: 4 	Training Loss: 0.502021 	Validation Loss: 0.445196
Validation loss has descreased (0.504361-->0.445196). Saving model...
Epoch [5/25], Learning Rate: 0.000352
Duration of training at epoch 5 is : 0:00:04.35

<pipeline_torch.PipelineTorch at 0x2dd3589ddd0>

In [66]:
old_file_name = 'model_' + config['model_config']['version'] + '.pth'
new_file_name = 'model5_' + config['model_config']['version'] + '.pth'
rename_model_file(old_file_name, new_file_name)

File renamed from model_v1.pth to model5_v1.pth


### Post_Processing

In [98]:
pipelines = [pipeline1, pipeline2, pipeline3, pipeline4, pipeline5]
data_splits = {'train': train_data, 'validation': valid_data, 'test': test_data}

results_acc_loss = {'train': [], 'validation': [], 'test': []}
results_long = {'train': [], 'validation': [], 'test': []}

In [102]:
for pipeline in pipelines:
    for split_name, split_data in data_splits.items():
        losses, average_loss, predictions, real_labels, acc = pipeline.predict(split_data)

        # Store results
        results_acc_loss[split_name].append({
            'average_loss': average_loss,
            'predictions': average_loss,
            'accuracy': acc,
        })

        results_long[split_name].append({
            'loss': losses,
            'real_labels': real_labels,
        })

In [100]:
#print(classification_report(real_labels, predictions))
#pprint(confusion_matrix(real_labels, predictions))