In [1]:
import os
import torch
import time
import copy
import numpy as np
import pandas as pd

from torch import nn, optim
from torch.utils.data import Dataset
from torchvision import models, transforms, datasets
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# set seeds for reproducability
torch.manual_seed(42)
np.random.seed(42)
import random
random.seed(42)

In [2]:
# define parameters
num_classes = 5
num_epochs = 15
batch_size = 4

In [3]:
data_transforms = {
    "train": transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.RandomRotation(degrees=180),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    "test": transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

In [4]:
def evaluate_model(model, data_loaders, device):
    y_pred = []
    y_true = []

    # iterate over test data
    with torch.no_grad():
        for inputs, labels in data_loaders["test"]:
            inputs = inputs.to(device)
            labels = labels.to(device)

            output = model(inputs) # Feed Network

            output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
            y_pred.extend(output) # Save Prediction

            labels = labels.data.cpu().numpy()
            y_true.extend(labels) # Save Truth

    return (accuracy_score(y_true, y_pred), precision_score(y_true, y_pred, average = "micro"),
        recall_score(y_true, y_pred, average = "micro"), f1_score(y_true, y_pred, average="micro"))

In [5]:
def crossValidateModel(init_function, train_function, device):
    all_acc = []
    all_pre = []
    all_rec = []
    all_f1 = []

    base_dir = "../data/classification/kFold_5/"

    for i in range(5):
        temp_dir = base_dir + "fold_{}".format(i)

        image_datasets = {x: datasets.ImageFolder(os.path.join(temp_dir, x), data_transforms[x]) for x in ["train", "test"]}
        dataset_sizes = {x: len(image_datasets[x]) for x in ["train", "test"]}
        data_loaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=6) for x in ["train", "test"]}
        
        model = train_function(*init_function(), data_loaders, dataset_sizes)

        acc, pre, rec, f1 = evaluate_model(model, data_loaders, device)

        all_acc.append(acc)
        all_pre.append(pre)
        all_rec.append(rec)
        all_f1.append(f1)

    df = pd.DataFrame({"accuracy": all_acc, "precision": all_pre, "recall": all_rec, "f1": all_f1})

    return df

In [6]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# ResNet18

In [7]:
def train_model(model, criterion, optimizer, scheduler, data_loaders, dataset_sizes, device, num_epochs=25):
    since = time.time()

    best_model_wts = copy.deepcopy(model.state_dict())
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch+1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and testing phase
        for phase in ["train", "test"]:
            if phase == "train":
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data.
            for inputs, labels in data_loaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == "train"):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # backward + optimize only if in training phase
                    if phase == "train":
                        loss.backward()
                        optimizer.step()

                # statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            if phase == "train":
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # deep copy the model
            if phase == "test" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())

        print()

    time_elapsed = time.time() - since
    print(f'Training complete in {time_elapsed // 60:.0f}m {time_elapsed % 60:.0f}s')
    print(f'Best val Acc: {best_acc:4f}')

    # load best model weights
    model.load_state_dict(best_model_wts)
    return model

In [8]:
def resnet18():
    model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)

    model = model.to(device)

    criterion = nn.CrossEntropyLoss()

    # Observe that all parameters are being optimized
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    return model, criterion, optimizer, exp_lr_scheduler

df_resnet18 = crossValidateModel(resnet18, lambda a, b, c, d, e, f: train_model(a, b, c, d, e, f, device, num_epochs=num_epochs), device)

Epoch 1/15
----------
train Loss: 1.3192 Acc: 0.5115
test Loss: 3.6831 Acc: 0.1485

Epoch 2/15
----------
train Loss: 0.9644 Acc: 0.6548
test Loss: 4.3349 Acc: 0.2848

Epoch 3/15
----------
train Loss: 0.7158 Acc: 0.7498
test Loss: 1.9586 Acc: 0.4980

Epoch 4/15
----------
train Loss: 0.6104 Acc: 0.7905
test Loss: 2.7854 Acc: 0.4051

Epoch 5/15
----------
train Loss: 0.4834 Acc: 0.8322
test Loss: 3.0663 Acc: 0.4354

Epoch 6/15
----------
train Loss: 0.4302 Acc: 0.8539
test Loss: 4.3997 Acc: 0.4051

Epoch 7/15
----------
train Loss: 0.3107 Acc: 0.8989
test Loss: 4.3451 Acc: 0.3081

Epoch 8/15
----------
train Loss: 0.1921 Acc: 0.9373
test Loss: 2.8645 Acc: 0.5131

Epoch 9/15
----------
train Loss: 0.1736 Acc: 0.9449
test Loss: 3.0373 Acc: 0.5121

Epoch 10/15
----------
train Loss: 0.1546 Acc: 0.9527
test Loss: 3.0042 Acc: 0.5293

Epoch 11/15
----------
train Loss: 0.1397 Acc: 0.9583
test Loss: 3.1494 Acc: 0.4848

Epoch 12/15
----------
train Loss: 0.1540 Acc: 0.9543
test Loss: 2.6739 Ac

In [9]:
df_resnet18

Unnamed: 0,accuracy,precision,recall,f1
0,0.529293,0.529293,0.529293,0.529293
1,0.691919,0.691919,0.691919,0.691919
2,0.788675,0.788675,0.788675,0.788675
3,0.699697,0.699697,0.699697,0.699697
4,0.498483,0.498483,0.498483,0.498483


In [13]:
df_resnet18["accuracy"].mean()

0.6416135061433343

# ResNet50

In [10]:
def resnet50():
    model = models.resnet50(weights=models.ResNet50_Weights.DEFAULT)
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)

    model = model.to(device)

    criterion = nn.CrossEntropyLoss()

    # Observe that all parameters are being optimized
    optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

    return model, criterion, optimizer, exp_lr_scheduler

df_resnet50 = crossValidateModel(resnet50, lambda a, b, c, d, e, f: train_model(a, b, c, d, e, f, device, num_epochs=num_epochs), device)

Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to C:\Users\marcg/.cache\torch\hub\checkpoints\resnet50-11ad3fa6.pth
100.0%


Epoch 1/15
----------
train Loss: 1.0957 Acc: 0.5615
test Loss: 1.9070 Acc: 0.2929

Epoch 2/15
----------
train Loss: 0.5245 Acc: 0.8135
test Loss: 2.1914 Acc: 0.3990

Epoch 3/15
----------
train Loss: 0.3101 Acc: 0.8918
test Loss: 3.0108 Acc: 0.2515

Epoch 4/15
----------
train Loss: 0.2014 Acc: 0.9297
test Loss: 2.9953 Acc: 0.3121

Epoch 5/15
----------
train Loss: 0.1474 Acc: 0.9469
test Loss: 3.2556 Acc: 0.3030

Epoch 6/15
----------
train Loss: 0.1080 Acc: 0.9654
test Loss: 2.3715 Acc: 0.3737

Epoch 7/15
----------
train Loss: 0.0802 Acc: 0.9735
test Loss: 3.2044 Acc: 0.3172

Epoch 8/15
----------
train Loss: 0.0494 Acc: 0.9843
test Loss: 3.0202 Acc: 0.3747

Epoch 9/15
----------
train Loss: 0.0425 Acc: 0.9861
test Loss: 2.9194 Acc: 0.3596

Epoch 10/15
----------
train Loss: 0.0436 Acc: 0.9866
test Loss: 3.1926 Acc: 0.3535

Epoch 11/15
----------
train Loss: 0.0365 Acc: 0.9889
test Loss: 2.9226 Acc: 0.3606

Epoch 12/15
----------
train Loss: 0.0336 Acc: 0.9909
test Loss: 3.1883 Ac

In [11]:
df_resnet50

Unnamed: 0,accuracy,precision,recall,f1
0,0.39899,0.39899,0.39899,0.39899
1,0.767677,0.767677,0.767677,0.767677
2,0.782609,0.782609,0.782609,0.782609
3,0.757331,0.757331,0.757331,0.757331
4,0.574317,0.574317,0.574317,0.574317


In [12]:
df_resnet50["accuracy"].mean()

0.6561846983485001