Importing required libraries

In [1]:
import os
import matplotlib.pyplot as plt
import cv2
import random
import numpy as np
from tqdm import tqdm
import copy
import time

import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torch.utils.data import random_split
import torchvision.transforms as transforms
from torchvision import models
from torch import optim, nn

In [2]:
device = 'cpu'

if torch.cuda.is_available() :
    device = 'cuda'

Creating dataset

In [3]:
class ImageDataset(Dataset) :

    def __init__(self, transform) :
        self.root_path = 'PACS/kfold/'

        # Listing the domains
        self.domains = os.listdir(self.root_path)

        # Listing the classes 
        self.classes = os.listdir(self.root_path+'cartoon')

        # Transformations
        self.transforms = transform

        self.images = []
        self.domains_y = []
        self.classes_y = []

        for i_dom, domain in enumerate(self.domains) :
            for i_cla, cla in enumerate(self.classes) :
                for image in os.listdir(self.root_path+domain+'/'+cla) :
                    # Finding image path
                    image_path = self.root_path+domain+'/'+cla+'/'+image
                    img = cv2.imread(image_path)
                    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
                    self.images.append(img)

                    # One hot encoding domain
                    domainVector = np.zeros(5)
                    domainVector[-1] = 1
                    domainVector[i_dom] = 1
                    self.domains_y.append(domainVector)

                    # One hot encoding class
                    classVector = np.zeros(7)
                    classVector[i_cla] = 1
                    self.classes_y.append(classVector)

        self.images = np.array(self.images)
        self.domains_y = np.array(self.domains_y)
        self.classes_y = np.array(self.classes_y)

    def __getitem__(self, index) :

        return self.transforms(self.images[index].astype('float')/255), self.domains_y[index], self.classes_y[index]

    def __len__(self) :
        return len(self.images)

Defining transforms

In [4]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Resize(256),
                                transforms.CenterCrop(224),
                                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

In [5]:
dataset = ImageDataset(transform=transform)

In [6]:
# Train and test split
train_dataset, val_dataset, test_dataset = random_split(dataset, [6000, 1000, 2991])

In [7]:
train_dataloader = DataLoader(dataset=train_dataset, batch_size=4, shuffle=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=4, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=4, shuffle=True)

In [8]:
dataloaders = {
    'train' : train_dataloader,
    'val' : val_dataloader,
    'test' : test_dataloader
}

dataset_sizes = {
    'train' : 6000,
    'val': 1000,
    'test' : 2991
}

In [9]:
def train_model(model, criterion, optimizer, epochs=1):
    since = time.time()
    
    best_model_wts = copy.deepcopy(model.state_dict())
    best_loss = 0.0
    best_acc = 0
    
    for ep in range(epochs):
        print(f"Epoch {ep}/{epochs-1}")
        print("-"*10)
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            running_loss = 0.0
            running_corrects = 0
                
            for images, domain, labels in tqdm(dataloaders[phase]):
                images = images.to(device)
                labels = labels.to(device)
                
                _, labels_list = torch.max(labels, 1)
                labels_list = labels_list.to(device)
                
                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(images.float())
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                
                if phase == 'train':
                    loss.backward()
                    optimizer.step()
                    
                running_loss += loss.item() * images.size(0)
                running_corrects += torch.sum(preds == labels_list)
                
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            
            print(f"{phase} Loss:{epoch_loss:.4f} Acc:{epoch_acc:.4f}")
            
            if phase == 'val':
                if ep == 0:
                    best_loss = epoch_loss
                    best_acc = epoch_acc
                    best_model_wts = copy.deepcopy(model.state_dict())
                else:
                    if epoch_loss < best_loss:
                        best_loss = epoch_loss
                        best_acc = epoch_acc
                        best_model_wts = copy.deepcopy(model.state_dict())
            
        print()
        
    time_elapsed = time.time() - since
    
    print(f'Training complete in {time_elapsed // 60}m {time_elapsed % 60}s')
    print(f'Best val loss: {best_loss:.4f}')
    print(f'Best acc: {best_acc}')
    
    model.load_state_dict(best_model_wts)
    
    return model

In [10]:
model = models.resnet18(pretrained=True)

# Changing the last layer of the model
model.fc = nn.Linear(512, 7)

model = nn.Sequential(model, 
                        nn.Softmax(1))

# To GPU
model = model.to(device)

# Defining loss function
criterion = nn.CrossEntropyLoss()

# Defining optimizer
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [11]:
model = train_model(model, criterion, optimizer, 2)

Epoch 0/1
----------


100%|██████████| 1500/1500 [14:49<00:00,  1.69it/s]


train Loss:1.5536 Acc:0.6487


100%|██████████| 250/250 [00:45<00:00,  5.51it/s]


val Loss:1.2941 Acc:0.8780

Epoch 1/1
----------


100%|██████████| 1500/1500 [12:27<00:00,  2.01it/s]


train Loss:1.3601 Acc:0.8212


100%|██████████| 250/250 [00:36<00:00,  6.80it/s]

val Loss:1.2690 Acc:0.8970

Training complete in 28.0m 39.850985288619995s
Best val loss: 1.2690
Best acc: 0.897





In [12]:
# Testing loop
running_correct = 0
total = 0

for img, dom, cla in tqdm(test_dataloader) :
  img = img.to(device)
  dom = dom.to(device)
  cla = cla.to(device)

  model.eval()

  pred = model(img.float())

  _, pred_list = torch.max(pred, 1)
  pred_list = pred_list.to(device)

  _, cla_list = torch.max(cla, 1)
  cla_list = cla_list.to(device)

  running_correct += torch.sum(pred_list == cla_list)

print(f'Accuracy of the test dataset : {running_correct/len(test_dataset)}')

100%|██████████| 748/748 [02:10<00:00,  5.75it/s]

Accuracy of the test dataset : 0.8963557481765747



