# Import modules

In [1]:
import os
import cv2
import time
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from sklearn.metrics import confusion_matrix

from torch.utils.data import DataLoader, Dataset
from torch.utils.data import RandomSampler

import torchvision.transforms as T
import torchvision.models as models
from torchvision.utils import make_grid
from torchvision.datasets import ImageFolder

from matplotlib import pyplot as plt
from dataloader import *

In [2]:
TRAIN_DIR = '../dataset/train/'
TEST_DIR = '../dataset/test/'

# Check hardware accelerator
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


In [3]:
### Exploring Dataset

classes = os.listdir(TRAIN_DIR)
print("Total Classes: ",len(classes))

train_count = 0
test_count = 0

for _class in classes:
    train_count += len(os.listdir(TRAIN_DIR + _class))
    test_count += len(os.listdir(TEST_DIR + _class))
    
print("Total train images: ",train_count)
print("Total test images: ",test_count)

Total Classes:  4
Total train images:  7330
Total test images:  34


In [4]:
# Utility to apply transforms
def get_transform():
  mean = (127.5)
  std = (127.5)
  normalize = T.Normalize(mean=mean, std=std)
  return T.Compose([normalize])

# Loading Classification Dataset

In [5]:
train_dataset = CustomDataset(TRAIN_DIR, transforms=get_transform())
test_dataset = CustomDataset(TEST_DIR, transforms=get_transform())

print(len(train_dataset))
print(len(test_dataset))

7330
34


In [6]:
train_data_loader = DataLoader(dataset = train_dataset, batch_size = 64, shuffle=True)
test_data_loader = DataLoader(dataset = test_dataset, batch_size = 64, shuffle=False)

In [7]:
print(len(train_data_loader))

58


In [8]:
def set_device():
  if torch.cuda.is_available():
    dev = "cuda:0"
  else:
    dev = "cpu"
  return torch.device(dev)

In [9]:
name = 'models'

try:
    os.makedirs(os.path.join(os.getcwd(), f'{name}'))
except FileExistsError:
    print("Directory already exists!")
    pass

modelDir = os.path.join(os.getcwd(), f'{name}')

Directory already exists!


# Define Training Loop

In [10]:
# Training function
def train_model(model, train_loader, test_loader, criterion, optimizer, scheduler, num_epochs):
  device = set_device()
  train_loss = []
  train_acc = []

  for epoch in range(num_epochs):
    print("Epoch number {}".format(epoch + 1))
    start = time.time()
    model.train()
    running_loss = 0.0
    running_correct = 0
    total = 0
      
    # Training
    for data in train_data_loader:
      images, labels = data
      images = images.to(device)
      labels = labels.to(device)
      total += labels.size(0)
      
      #Reset Grads
      optimizer.zero_grad()
      
      #Forward ->
      outputs = model(images)

      # pred
      _, predicted = torch.max(outputs.data, 1)

      
      #Calculate Loss & Backward, Update Weights (Step)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      running_loss += loss.item() 
      running_correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(train_data_loader)
    epoch_acc = 100.00 * running_correct / total

    train_loss.append(epoch_loss)
    train_acc.append(epoch_acc)

    print("  - Training dataset: Got %d out of %d images correctly (%.3f%%). \nEpoch loss: %.3f"
        % (running_correct, total, epoch_acc, epoch_loss))

    test_acc = evaluate_model(model, test_loader)
      
    end = time.time()

    print("-  Epoch Time : {} \n".format(int(end-start)))

  print('Finished')
  return model, train_acc, train_loss, test_acc

In [11]:
# Testing function
def evaluate_model(model, test_loader):
  model.eval()
  predicted_correctly_on_epoch = 0
  total = 0
  best_acc = 0.0
  acc = []
  device = set_device()

  with torch.no_grad():
    for data in test_data_loader: 
      images, labels = data
      images = images.to(device)
      labels = labels.to(device)
      total += labels.size(0)

      outputs = model(images)
      _, predicted = torch.max(outputs.data, 1)

      predicted_correctly_on_epoch += (predicted == labels).sum().item()
  
  epoch_acc = 100.0 * predicted_correctly_on_epoch / total
  acc.append(epoch_acc)

  if epoch_acc > best_acc:
    best_acc = epoch_acc
    torch.save(model.state_dict(), os.path.join(modelDir, 'best_model.pth'))

  print("  - Testing dataset: Got %d out of %d images correctly (%.3f%%)"
        % (predicted_correctly_on_epoch, total, epoch_acc))
  
  return acc

# Define the model

In [12]:
# # for resnet
# model = models.resnet18(pretrained=True)
# num_features = model.fc.in_features
# num_classes = 4
# model.fc = nn.Linear(num_features, num_classes)
# print(model)

# for mobilenet
model = models.mobilenet_v2(pretrained=True)
num_features = model.classifier[1].in_features
num_classes = 4
model.classifier[1] = nn.Linear(num_features, num_classes)
print(model)

MobileNetV2(
  (features): Sequential(
    (0): ConvNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU6(inplace=True)
    )
    (1): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU6(inplace=True)
        )
        (1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (2): InvertedResidual(
      (conv): Sequential(
        (0): ConvNormActivation(
          (0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (1): BatchNorm2d(96, eps=1e-05,

# Define Hyperparameters

In [13]:
model = model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 0.0001)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
EPOCHS = 20

# Training phase

In [14]:
model_trained, TRAIN_LOSS, TRAIN_ACC, TEST_ACC = train_model(model, train_data_loader, test_data_loader, criterion, optimizer, lr_scheduler, num_epochs=EPOCHS)

Epoch number 1


RuntimeError: CUDA out of memory. Tried to allocate 114.00 MiB (GPU 0; 12.00 GiB total capacity; 9.75 GiB already allocated; 0 bytes free; 9.79 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
# Save the final model as well
torch.save(model.state_dict(), os.path.join(modelDir, 'model.pth'))