In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import time

from tqdm.auto import tqdm

from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
from torch.optim.lr_scheduler import ReduceLROnPlateau

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')

In [3]:
# Required constants.
ROOT_DIR = '.'
VALID_SPLIT = 0.15
IMAGE_SIZE = 224  # Image size of resize when applying transforms.
BATCH_SIZE = 64
NUM_WORKERS = 4 # Number of parallel processes for data preparation.

In [4]:
pretrained = True
lr = 0.0001
epochs = 50

## Utils

In [5]:
def save_model(epochs, model, optimizer, criterion, pretrained):
    """
    Function to save the trained model to disk.
    """
    torch.save(model.state_dict(), f"{ROOT_DIR}/model.pth")

In [6]:
def get_data_loaders(dataset_train, dataset_valid):
    """
    Prepares the training and validation data loaders.

    :param dataset_train: The training dataset.
    :param dataset_valid: The validation dataset.

    Returns the training and validation data loaders.
    """
    train_loader = DataLoader(
        dataset_train, batch_size=BATCH_SIZE, 
        shuffle=True, num_workers=NUM_WORKERS
    )
    valid_loader = DataLoader(
        dataset_valid, batch_size=BATCH_SIZE, 
        shuffle=False, num_workers=NUM_WORKERS
    )
    return train_loader, valid_loader 

In [7]:
# Image normalization transforms.
def normalize_transform(pretrained):
    if pretrained: # Normalization for pre-trained weights.
        normalize = transforms.Normalize(
            mean=[0.485, 0.456, 0.406],
            std=[0.229, 0.224, 0.225]
            )
    else: # Normalization when training from scratch.
        normalize = transforms.Normalize(
            mean=[0.5, 0.5, 0.5],
            std=[0.5, 0.5, 0.5]
        )
    return normalize

In [8]:
# Training transforms
def get_train_transform(IMAGE_SIZE, pretrained):
    train_transform = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.RandomRotation(30,),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        normalize_transform(pretrained)
    ])
    return train_transform

In [9]:
# Validation transforms
def get_valid_transform(IMAGE_SIZE, pretrained):
    valid_transform = transforms.Compose([
        transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
        transforms.ToTensor(),
        normalize_transform(pretrained)
    ])
    return valid_transform

In [10]:
def get_datasets(pretrained):

    DATA_DIR = f"{ROOT_DIR}/train"

    dataset = datasets.ImageFolder(
        DATA_DIR, 
        transform=(get_train_transform(IMAGE_SIZE, pretrained))
    )
    dataset_test = datasets.ImageFolder(
        DATA_DIR, 
        transform=(get_valid_transform(IMAGE_SIZE, pretrained))
    )
    dataset_size = len(dataset)

    # Calculate the validation dataset size.
    valid_size = int(VALID_SPLIT*dataset_size)
    # Radomize the data indices.
    indices = torch.randperm(len(dataset)).tolist()
    # Training and validation sets.
    dataset_train = Subset(dataset, indices[:-valid_size])
    dataset_valid = Subset(dataset_test, indices[-valid_size:])

    class_weights = 1/torch.Tensor([dataset.targets.count(i) for i in range(len(dataset.classes))])

    return dataset_train, dataset_valid, dataset.classes, class_weights

In [11]:
def get_data_loaders(dataset_train, dataset_valid):
    """
    Prepares the training and validation data loaders.

    :param dataset_train: The training dataset.
    :param dataset_valid: The validation dataset.

    Returns the training and validation data loaders.
    """
    train_loader = DataLoader(
        dataset_train, batch_size=BATCH_SIZE, 
        shuffle=True, num_workers=NUM_WORKERS
    )
    valid_loader = DataLoader(
        dataset_valid, batch_size=BATCH_SIZE, 
        shuffle=False, num_workers=NUM_WORKERS
    )
    return train_loader, valid_loader 

In [12]:
import torchvision.models as models
import torch.nn as nn
def build_model(pretrained=True, fine_tune=True, num_classes=10):
    if pretrained:
        print('[INFO]: Loading pre-trained weights')
    else:
        print('[INFO]: Not loading pre-trained weights')
    model = models.efficientnet_b0(pretrained=pretrained)
    if fine_tune:
        print('[INFO]: Fine-tuning all layers...')
        for params in model.parameters():
            params.requires_grad = True
    elif not fine_tune:
        print('[INFO]: Freezing hidden layers...')
        for params in model.parameters():
            params.requires_grad = False
            
    num_features = model.classifier[-1].in_features
    # Change the final classification head.
    model.classifier[-1] = nn.Sequential(
      nn.Dropout(p=0.5),
      nn.Linear(num_features,num_classes),
      nn.Softmax(dim=1) 
    )
#     model.classifier[-1] =  nn.Linear(num_features,num_classes)

    return model


In [13]:
device = ('cuda' if torch.cuda.is_available() else 'cpu')

In [14]:
# Training function.
def train(model, trainloader, optimizer, criterion):
    model.train()
    print('Training')
    train_running_loss = 0.0
    train_running_correct = 0
    counter = 0
    for i, data in tqdm(enumerate(trainloader), total=len(trainloader)):
        counter += 1
        image, labels = data
        image = image.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        # Forward pass.
        outputs = model(image)
        # Calculate the loss.
        loss = criterion(outputs, labels)
        train_running_loss += loss.item()
        # Calculate the accuracy.
        _, preds = torch.max(outputs.data, 1)
        train_running_correct += (preds == labels).sum().item()
        # Backpropagation
        loss.backward()
        # Update the weights.
        optimizer.step()
    
    # Loss and accuracy for the complete epoch.
    epoch_loss = train_running_loss / counter
    epoch_acc = 100. * (train_running_correct / len(trainloader.dataset))
    return epoch_loss, epoch_acc

In [15]:
# Validation function.
def validate(model, testloader, criterion):
    model.eval()
    print('Validation')
    valid_running_loss = 0.0
    valid_running_correct = 0
    counter = 0
    with torch.no_grad():
        for i, data in tqdm(enumerate(testloader), total=len(testloader)):
            counter += 1
            
            image, labels = data
            image = image.to(device)
            labels = labels.to(device)
            # Forward pass.
            outputs = model(image)
            # Calculate the loss.
            loss = criterion(outputs, labels)
            valid_running_loss += loss.item()
            # Calculate the accuracy.
            _, preds = torch.max(outputs.data, 1)
            valid_running_correct += (preds == labels).sum().item()
        
    # Loss and accuracy for the complete epoch.
    epoch_loss = valid_running_loss / counter
    epoch_acc = 100. * (valid_running_correct / len(testloader.dataset))
    return epoch_loss, epoch_acc

## Main

In [16]:
dataset_train, dataset_valid, dataset_classes, class_weights = get_datasets(pretrained=pretrained)
print(f"[INFO]: Number of training images: {len(dataset_train)}")
print(f"[INFO]: Number of validation images: {len(dataset_valid)}")
print(f"[INFO]: Class weights: {class_weights}")
print(f"[INFO]: Class names: {dataset_classes}\n")

[INFO]: Number of training images: 11189
[INFO]: Number of validation images: 1974
[INFO]: Class weights: tensor([0.0008, 0.0007, 0.0005, 0.0008, 0.0003, 0.0015, 0.0017, 0.0010, 0.0018,
        0.0021, 0.0018])
[INFO]: Class names: ['canal', 'electric', 'flooding', 'light', 'road', 'sanitary', 'sewer', 'sidewalk', 'spam', 'stray', 'traffic']



In [17]:
train_loader, valid_loader = get_data_loaders(dataset_train, dataset_valid)

In [18]:
print(f"Computation device: {device}")
print(f"Learning rate: {lr}")
print(f"Epochs to train for: {epochs}\n")


Computation device: cuda
Learning rate: 0.0001
Epochs to train for: 50



In [19]:
model = build_model(
    pretrained=pretrained, 
    fine_tune=True, 
    num_classes=len(dataset_classes)
).to(device)

[INFO]: Loading pre-trained weights
[INFO]: Fine-tuning all layers...




In [20]:
# Total parameters and trainable parameters.
total_params = sum(p.numel() for p in model.parameters())
print(f"{total_params:,} total parameters.")
total_trainable_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad)
print(f"{total_trainable_params:,} training parameters.")

4,021,639 total parameters.
4,021,639 training parameters.


In [21]:
# Optimizer.
optimizer = optim.Adam(model.parameters(), lr=lr)
# Loss function.
criterion = nn.CrossEntropyLoss(weight=class_weights.to(device))


In [22]:
# Lists to keep track of losses and accuracies.
train_loss, valid_loss = [], []
train_acc, valid_acc = [], []
min_val_loss, counter  = 1000, 0
# Start the training.
for epoch in range(epochs):
    print(f"[INFO]: Epoch {epoch+1} of {epochs}")
    train_epoch_loss, train_epoch_acc = train(model, train_loader, 
                                            optimizer, criterion)
    valid_epoch_loss, valid_epoch_acc = validate(model, valid_loader,  
                                                criterion)
    train_loss.append(train_epoch_loss)
    valid_loss.append(valid_epoch_loss)
    train_acc.append(train_epoch_acc)
    valid_acc.append(valid_epoch_acc)
    print(f"Training loss: {train_epoch_loss:.3f}, training acc: {train_epoch_acc:.3f}")
    print(f"Validation loss: {valid_epoch_loss:.3f}, validation acc: {valid_epoch_acc:.3f}")
    print('[INFO]: Learning rate: ', optimizer.param_groups[0]['lr'])
    if valid_epoch_loss < min_val_loss :
        print(f"[INFO]: Save model state at ecpoch {epoch+1} (min validation loss)")
        save_model(epochs, model, optimizer, criterion, pretrained)
        min_val_loss = valid_epoch_loss
        counter = 0
    else :
        counter += 1
    
    if counter >= 5:
        print("[INFO]: stop training validation loss didn't change after 10 epoch")
        break
    print('-'*50)
    


[INFO]: Epoch 1 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 2.266, training acc: 29.842
Validation loss: 2.117, validation acc: 43.617
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 1 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 2 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 2.037, training acc: 46.009
Validation loss: 1.993, validation acc: 50.912
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 2 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 3 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.959, training acc: 52.301
Validation loss: 1.958, validation acc: 56.130
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 3 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 4 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.923, training acc: 56.743
Validation loss: 1.942, validation acc: 57.497
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 4 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 5 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.897, training acc: 59.094
Validation loss: 1.937, validation acc: 58.359
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 5 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 6 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.882, training acc: 60.774
Validation loss: 1.932, validation acc: 58.663
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 6 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 7 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.868, training acc: 61.507
Validation loss: 1.927, validation acc: 58.561
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 7 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 8 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.853, training acc: 63.321
Validation loss: 1.921, validation acc: 58.916
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 8 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 9 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.841, training acc: 64.268
Validation loss: 1.917, validation acc: 60.537
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 9 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 10 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.833, training acc: 64.849
Validation loss: 1.917, validation acc: 60.588
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 10 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 11 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.821, training acc: 65.940
Validation loss: 1.919, validation acc: 60.537
[INFO]: Learning rate:  0.0001
--------------------------------------------------
[INFO]: Epoch 12 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.815, training acc: 66.887
Validation loss: 1.925, validation acc: 60.385
[INFO]: Learning rate:  0.0001
--------------------------------------------------
[INFO]: Epoch 13 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.810, training acc: 67.370
Validation loss: 1.921, validation acc: 60.740
[INFO]: Learning rate:  0.0001
--------------------------------------------------
[INFO]: Epoch 14 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.800, training acc: 68.675
Validation loss: 1.925, validation acc: 59.625
[INFO]: Learning rate:  0.0001
--------------------------------------------------
[INFO]: Epoch 15 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.796, training acc: 68.424
Validation loss: 1.917, validation acc: 61.145
[INFO]: Learning rate:  0.0001
[INFO]: Save model state at ecpoch 15 (min validation loss)
--------------------------------------------------
[INFO]: Epoch 16 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.790, training acc: 69.443
Validation loss: 1.918, validation acc: 61.044
[INFO]: Learning rate:  0.0001
--------------------------------------------------
[INFO]: Epoch 17 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.787, training acc: 69.622
Validation loss: 1.921, validation acc: 60.334
[INFO]: Learning rate:  0.0001
--------------------------------------------------
[INFO]: Epoch 18 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.780, training acc: 70.775
Validation loss: 1.925, validation acc: 60.588
[INFO]: Learning rate:  0.0001
--------------------------------------------------
[INFO]: Epoch 19 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.776, training acc: 71.141
Validation loss: 1.923, validation acc: 61.246
[INFO]: Learning rate:  0.0001
--------------------------------------------------
[INFO]: Epoch 20 of 50
Training


  0%|          | 0/175 [00:00<?, ?it/s]

Validation


  0%|          | 0/31 [00:00<?, ?it/s]

Training loss: 1.772, training acc: 71.633
Validation loss: 1.932, validation acc: 60.486
[INFO]: Learning rate:  0.0001
[INFO]: stop training validation loss didn't change after 10 epoch


In [23]:
# torch.save(model.state_dict(), f"{ROOT_DIR}/model_overfit.pth")
# print('saved')