In [16]:
import torch
import os
import torchvision
from input_pipeline import ImageLoader
from data_manager.manage_csv import *
import pandas as pd
from evaluation_metrics import eval_scores, print_eval_scores
from input_pipeline import create_dataset_split
import numpy as np

In [17]:
""" Define batch size, number of epochs for training and dataset paths """

PATH_TO_IMAGES = 'data/img_align_celeba'
PATH_TO_LABELS = 'data/list_attr_celeba.csv'
PATH_TO_MODELS = 'models/'
PATH_TO_VALIDATION_SCORES = 'metadata/validation_scores.csv'
BATCH_SIZE = 128
NUM_EPOCHS = 2


In [18]:
""" Load pretrained model and dataset """ 

model = torch.hub.load('pytorch/vision:v0.10.0', 'mobilenet_v2', weights=torchvision.models.MobileNet_V2_Weights.IMAGENET1K_V2)
dataset = ImageLoader(PATH_TO_IMAGES, PATH_TO_LABELS, augment=False)
train_data, val_data, test_data = create_dataset_split(dataset=dataset, batch_size=BATCH_SIZE)

Using cache found in /Users/peterbrezovcsik/.cache/torch/hub/pytorch_vision_v0.10.0


In [19]:
""" Create classifier for the core model """

classifier_1 = torch.nn.Sequential(
    torch.nn.Dropout(0.3),
    torch.nn.Linear(1280, 128),
    torch.nn.BatchNorm1d(128),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(128, 64),
    torch.nn.BatchNorm1d(64),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.1),
    torch.nn.Linear(64, 40)
)


In [20]:
classifier_2 = torch.nn.Sequential(
    torch.nn.Dropout(0.3),
    torch.nn.Linear(1280, 128),
    torch.nn.BatchNorm1d(128),
    torch.nn.ReLU(),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(128, 40)
)

In [21]:
classifier_3 = torch.nn.Sequential(
    torch.nn.Dropout(0.3),
    torch.nn.Linear(1280, 40)
)
model.classifier = classifier_3

In [22]:
""" Reset weight parameters of the classifier """

for layer in model.classifier.children():
    if hasattr(layer, 'reset_parameters'):
        layer.reset_parameters()

In [23]:
""" Define loss function and optimizer """

loss = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [1]:
""" Define model metadata to store in model_metadata.csv """

VERSION_NUM = 4

MODEL_ID = 'MOBILE_NET_V2' + '_' + str(VERSION_NUM)
NUM_OF_HEAD_LAYERS = len(model.classifier) 
LOSS_NAME = loss.__class__.__name__
OPTI_NAME = optimizer.__class__.__name__
LEARNING_RATE = optimizer.defaults['lr']
CLASSIFICATION_THRESHOLD = 0.6



""" Define model metadata file path and header """

MODEL_METADATA_PATH = 'metadata/'
MODEL_METADATA_FILE = 'model_metadata.csv'
MODEL_METADATA_HEADER = ['model_id', 'num_of_head_layers', 'batch_size', 'num_epochs', 'loss_fn', 'optimizer', 'learning_rate', 'threshold']
MODEL_ARGS = [MODEL_ID, NUM_OF_HEAD_LAYERS, BATCH_SIZE, NUM_EPOCHS, LOSS_NAME, OPTI_NAME, LEARNING_RATE, CLASSIFICATION_THRESHOLD]


""" Define class_wise_accuracy file path and header """

CLASS_WISE_ACCURACY_FILE = 'class_wise_accuracy.csv'
CLASS_WISE_ACCURACY_HEADER = dataset.attr_names


""" Define validation scores file path and header """

VALIDATION_SCORES_FILE = 'validation_scores.csv' 
VALIDATION_SCORES_HEADER = ['model_id', 'epoch', 'f1_score', 'recall_score', 'precision_score', 'hamming_loss', 'hamming_score', 'partial_accuracy', 'loss']

NameError: name 'model' is not defined

In [25]:
def save_model(model_id, current_accuracy):
    df = pd.read_csv(PATH_TO_VALIDATION_SCORES)
    best_accuracy = df.loc[df['model_id'] == model_id]['partial_accuracy'].max()
    if current_accuracy > best_accuracy or best_accuracy is np.nan:
        print('Saving model...')
        model_path = model_id + '.pth'
        path = os.path.join(PATH_TO_MODELS, model_path)
        torch.save(model.state_dict(), path)

In [26]:
metric_saver = MetricSaver(MODEL_METADATA_PATH)

metric_saver.create_model_metadata_csv(MODEL_METADATA_FILE, MODEL_METADATA_HEADER)
metric_saver.create_class_wise_acc_csv(CLASS_WISE_ACCURACY_FILE, *CLASS_WISE_ACCURACY_HEADER)
metric_saver.create_validation_scores_csv(VALIDATION_SCORES_FILE, VALIDATION_SCORES_HEADER)

metric_saver.save_model_metadata(*MODEL_ARGS)

model_metadata.csv.csv already exists.
class_wise_accuracy.csv already exists.
validation_scores.csv already exists.
Saving model metadata...


In [27]:
""" Validation function for the classifier """

def validate(model, val_data, epoch, model_id, loss_fn):
    model.eval()
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    
    avg_partial_accuracy = avg_f1_score = avg_recall_score = avg_precision_score = avg_hamming_loss = avg_hamming_score = avg_loss = 0.0
    avg_label_wise_accuracy_score = np.zeros(shape=(40))

    NUM_OF_BATCHES = len(val_data)
    
    for i, batch in enumerate(val_data):
        with torch.no_grad():
            images, labels = batch
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)

            loss = loss_fn(outputs, labels)

            result = outputs > CLASSIFICATION_THRESHOLD

            f1_score, recall, precision, hamming_loss, ham_score, partial_accuracy, label_wise_accuracy =\
                        eval_scores(labels.cpu().detach().numpy(), result.cpu().detach().numpy(), loss.item(), print_out=True, epoch=epoch, batch=i)
            
            ### Increase average scores by the current batch scores
            avg_f1_score += f1_score
            avg_recall_score += recall
            avg_precision_score += precision
            avg_hamming_loss += hamming_loss
            avg_hamming_score += ham_score
            avg_partial_accuracy += partial_accuracy
            avg_label_wise_accuracy_score += label_wise_accuracy
            avg_loss += loss.item()
            
    ### Calculate average scores
    avg_f1_score /= NUM_OF_BATCHES
    avg_recall_score /= NUM_OF_BATCHES
    avg_precision_score /= NUM_OF_BATCHES
    avg_hamming_loss /= NUM_OF_BATCHES
    avg_hamming_score /= NUM_OF_BATCHES
    avg_partial_accuracy /= NUM_OF_BATCHES
    avg_label_wise_accuracy_score /= NUM_OF_BATCHES
    avg_loss /= NUM_OF_BATCHES
    
    

    ### Save validation scores to csv file
    save_model(model_id, avg_partial_accuracy)
    metric_saver.save_class_wise_accuracy(model_id, epoch+1, *avg_label_wise_accuracy_score)
    metric_saver.save_validation_scores(model_id, epoch+1, *(avg_f1_score, avg_recall_score, avg_precision_score, avg_hamming_loss, avg_hamming_score, avg_partial_accuracy, avg_loss))

    print_eval_scores(avg_f1_score, avg_recall_score, avg_precision_score, avg_hamming_loss, avg_hamming_score, avg_partial_accuracy, avg_label_wise_accuracy_score, avg_loss)


In [28]:
""" Training function for the classifier """

def fit(model, train_data, val_data, optimizer, loss_fn, epochs, model_id):
        
        # Froze feature layers
        for param in model.features.parameters():
            param.requires_grad = False

        device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

        for epoch in range(epochs):
            model.train(mode=True)
            
            for i, batch in enumerate(train_data):
                images, labels = batch
                images = images.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()
                outputs = model(images)
                loss = loss_fn(outputs, labels)
                loss.backward()
                optimizer.step()
                
                result = outputs > CLASSIFICATION_THRESHOLD
                # Measure model performance for every batch
                f1_score, recall, precision, hamming_loss, ham_score, partial_accuracy, label_wise_accuracy =\
                      eval_scores(labels.cpu().detach().numpy(), result.cpu().detach().numpy(), loss.item(), print_out=True, epoch=epoch, batch=i)
            
            # Evaluetion metrics for every epoch
                
            validate(model, val_data, epoch, model_id, loss_fn)

In [29]:
""" Train the model """

fit(model, train_data, val_data, optimizer, loss, NUM_EPOCHS, MODEL_ID)



F1 score:
0.008987713675213676
Recall:
0.005072206439393939
Precision:
0.045572916666666664
Hamming loss:
0.2193359375
Hamming score:
0.00490817775974026
Partial accuracy:
0.7806640625
Label wise accuracy:
[0.890625  0.7734375 0.515625  0.8125    0.953125  0.890625  0.7890625
 0.8046875 0.7578125 0.84375   0.9375    0.7578125 0.8828125 0.9375
 0.9375    0.921875  0.921875  0.96875   0.7109375 0.59375   0.53125
 0.578125  0.9453125 0.9140625 0.1484375 0.6796875 0.9296875 0.7109375
 0.9140625 0.9375    0.9375    0.546875  0.796875  0.65625   0.84375
 0.9375    0.6015625 0.8984375 0.8984375 0.21875  ]
Loss:
0.6944727301597595



F1 score:
0.017355647824397823
Recall:
0.00962374533146592
Precision:
0.09375
Hamming loss:
0.21640625
Hamming score:
0.00962374533146592
Partial accuracy:
0.78359375
Label wise accuracy:
[0.890625  0.7109375 0.5234375 0.75      0.9765625 0.8984375 0.7578125
 0.828125  0.71875   0.875     0.9375    0.8125    0.890625  0.96875
 0.9609375 0.921875  0.90625   0.9687