In [None]:
from __future__ import print_function
from __future__ import division
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import pandas as pd
from skimage import io, transform
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.optim import lr_scheduler
import torch.nn.functional as F
from torch.autograd import Variable
import albumentations as A
import cv2
from albumentations.pytorch.transforms import ToTensorV2 as ToTensorV2
import pandas as pd

In [None]:
SEED = 42
torch.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [None]:
class IITAR13K(Dataset):
   
    def __init__(self, csv_file, root_dir, transform=None, target_transform=None):
       
        self.boxes = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.boxes)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        img_name = os.path.join(self.root_dir,
                                self.boxes.iloc[idx, 2])
        img = Image.open(img_name).convert("RGB")
        
        if self.transform:
            img = self.transform(img)
            
        label = self.boxes.iloc[idx, 18]
        bound_boxes = self.boxes.iloc[idx, 14:18]
        bound_boxes = np.array([bound_boxes])/600
        bound_boxes = bound_boxes.astype('float').reshape(4)
        
        '''if self.target_transform:
            bound_boxes = self.transform(bound_boxes)
            label = self.transform(label)'''
            
        #sample = {'images': img, 'boxes': bound_boxes, 'labels': label}
        #target = {'boxes': bound_boxes, 'labels': label}
        return img, bound_boxes, label

In [None]:
data_transform = transforms.Compose([
        transforms.Resize((600,600)),
        
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225])
    ])

In [None]:
dataset = IITAR13K(csv_file=  "../input/labels-224/ch_graph_obj_train.csv",
                                          root_dir= '../input/iit-ar-13k-dataset/training_images/training_images',
                                           transform=data_transform
                                          )
dataset_val = IITAR13K(csv_file= '../input/labels-224/ch_graph_obj_test.csv',
                                           root_dir= '../input/iit-ar-13k-dataset/test_images/test_images',
                                           transform=data_transform                                        
                                          )

dataset_test = IITAR13K(csv_file= '../input/labels-224/ch_graph_obj_val.csv',
                                           root_dir= '../input/iit-ar-13k-dataset/validation_images/validation_images',
                                           transform=data_transform) 

In [None]:
print(len(dataset))
print(len(dataset_val))
print(len(dataset_test))

In [None]:
class BB_model(nn.Module):
    def __init__(self,fine_tuning=False):
        super(BB_model, self).__init__()
        resnet = models.resnet50(pretrained=True)
        if fine_tuning== True :
            for param in resnet.parameters():
                param.requires_grad = False
        self.features = nn.Sequential(*list(resnet.children())[:-2])
        #self.features1 = nn.Sequential(*layers[:6])
        #self.features2 = nn.Sequential(*layers[6:])
        num_features = 2048
        self.classifier = nn.Sequential(nn.Linear(num_features, 5))
        self.bb = nn.Sequential(nn.Dropout(), 
                                nn.Linear(num_features, 1024), nn.ReLU(),
                                nn.Linear(1024, 512), nn.ReLU(),
                                nn.Linear(512, 256), nn.ReLU(),
                                nn.Linear(256, 128), nn.ReLU(),
                                nn.BatchNorm1d(128), 
                                nn.Dropout(), 
                                nn.Linear(128, 4), nn.Sigmoid())
        
    def forward(self, x):
        x = self.features(x)
        #x = self.features2(x)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1,1))(x)
        x = x.view(x.shape[0], -1)
        return self.bb(x), self.classifier(x)

In [None]:
def bb_iou(boxA, boxB):
     # determine the (x, y)-coordinates of the intersection rectangle
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    # compute the area of intersection rectangle
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)
    # compute the area of both the prediction and ground-truth
    # rectangles
    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)
    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = interArea / float(boxAArea + boxBArea - interArea)
    # return the intersection over union value
    return iou

In [None]:
# pos 0 = TP, pos 1 = FP, pos 2 = FN
def metric(out_bb, out_class, y_bb, y_class, batch_size = 4):
    tresholds = np.arange(0.5, 1, 0.05)
    res = {str(th): {str(i): np.array([0, 0, 0]) for i in range(5)} for th in tresholds}
    for i in range(batch_size):
        
        IoU = bb_iou(y_bb[i], out_bb[i])
        for th in tresholds:
            our_class = str(int(y_class[i]))
            if IoU >= th:
                a, b = torch.max(out_class[i], 0)
                if  b == y_class[i]:
                    res[str(th)][our_class][0] += 1 # TP
                else:
                    res[str(th)][our_class][1] += 1 # FP
            else:
                res[str(th)][our_class][2] += 1 # FN
                
    return res

In [None]:
class FocalLoss(nn.Module):
    def __init__(self, ignore_index: int = -100, gamma: float = 1.0,
                 reduction: str = 'mean', alpha=None) -> None:
        super(FocalLoss, self).__init__()
        self.alpha: float = alpha
        self.gamma: float = gamma
        self.reduction: str = reduction
        self.eps: float = 1e-6
        self.ce = nn.CrossEntropyLoss(ignore_index=ignore_index, weight=None)
        self.softmax = nn.LogSoftmax(dim=1)
        self.nll = nn.NLLLoss(reduce='none')

    def forward(  # type: ignore
            self,
            input: torch.Tensor,
            target: torch.Tensor) -> torch.Tensor:

        # probs = self.softmax(input)
        probs = F.log_softmax(input, dim=1)
        p = torch.pow(1 - probs, self.gamma)
        # focal = self.alpha * p * self.ce(input, target)
        focal = self.alpha * p * self.nll(F.log_softmax(input, dim=1), target)

        loss_tmp = torch.sum(focal, dim=1)

        if self.reduction == 'none':
            loss = loss_tmp
        elif self.reduction == 'mean':
            loss = torch.mean(loss_tmp)
        elif self.reduction == 'sum':
            loss = torch.sum(loss_tmp)
        else:
            raise NotImplementedError("Invalid reduction mode: {}".format(self.reduction))
        return loss

In [None]:
def val_metrics(model, valid_dl, epochs, loss_type, floss_a, floss_g, C = 1):
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0 
    predit = np.array([])
    with torch.no_grad():
        for x, y_bb, y_class, in valid_dl:
            batch = y_class.shape[0]
            x = x.to(device)
            y_class = y_class.to(device)
            y_bb = y_bb.to(device)
            out_bb, out_class = model(x)
            #print(out_bb)

            if loss_type == 'cross':
                loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
            if loss_type == 'foc':
                floss=FocalLoss(alpha=floss_a, gamma=floss_g)
                loss_class = floss.forward(out_class,y_class)

            loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)

            loss_bb = loss_bb.sum()
            loss = loss_class + loss_bb
            _, pred = torch.max(out_class, 1)
            correct += pred.eq(y_class).sum().item()
            sum_loss += loss.item()
            total += batch
    return sum_loss/total, correct/total

In [None]:
def train_epocs(model, optimizer, train_dl, val_dl, floss_a, floss_g, epochs=40, loss_type='foc'):
    idx = 0
    train_losses = []
    val_losses = []
    for i in range(epochs):
        start = time.time()
        model.train()
        total = 0
        sum_loss = 0
        for x, y_bb, y_class in train_dl:

            batch = y_class.shape[0]
            x = x.to(device)
            y_class = y_class.to(device)
            y_bb = y_bb.to(device)
            out_bb, out_class, = model(x)
            #print(out_class.shape)
            #print(y_class.shape)
            
            if loss_type == 'cross':
                loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
            if loss_type == 'foc':
                floss=FocalLoss(alpha=floss_a, gamma=floss_g)
                loss_class = floss.forward(out_class,y_class)
                
            loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)
            loss_bb = loss_bb.sum()
            loss = loss_class + loss_bb
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            idx += 1
            total += batch
            sum_loss += loss.item()
        train_loss = sum_loss/total
        val_loss, val_acc = val_metrics(model, val_dl, epochs=i, loss_type = 'foc', floss_a =floss_a, floss_g =floss_g)
        train_losses.append(train_loss)
        val_losses.append(val_loss)
        print("train_loss %.3f val_loss %.3f val_acc %.3f" % (train_loss, val_loss, val_acc))
        print(f'time for one epoch: {time.time()-start}, epoch: {i}')
    return sum_loss/total, train_losses, val_losses

In [None]:
def train_epocs_hyper(model, optimizer, train_dl, val_dl, epochs, loss_type, floss_a, floss_g, C = 1):
    idx = 0
    epoch_acc = {}
    for i in range(epochs):
        start = time.time()
        model.train()
        total = 0
        sum_loss = 0
        for x, y_bb, y_class in train_dl:

            batch = y_class.shape[0]
            x = x.to(device)
            y_class = y_class.to(device)
            y_bb = y_bb.to(device)
            out_bb, out_class, = model(x)
            #print(out_class)
            
            if loss_type == 'cross':
                loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
            if loss_type == "foc":
                floss=FocalLoss(alpha=floss_a, gamma=floss_g)
                loss_class = floss.forward(out_class,y_class)
                
            loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)
            loss_bb = loss_bb.sum()
            loss = loss_class + loss_bb
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            idx += 1
            total += batch
            sum_loss += loss.item()
        
        train_loss = sum_loss/total
        val_loss, val_acc = val_metrics(model, val_dl, epochs=i, loss_type = loss_type, floss_a = floss_a, floss_g = floss_g)
        epoch_acc.update({"epoch_{}".format(i):val_acc})
        
        print("train_loss %.3f val_loss %.3f val_acc %.3f" % (train_loss, val_loss, val_acc))
        print(f'time for une epoch: {time.time()-start}, epoch: {i}')
    return epoch_acc

In [None]:
dataloaders = {'val': torch.utils.data.DataLoader(dataset_val, batch_size=4,
                                             shuffle=False, num_workers=4, drop_last=True),
              }

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
num_epochs = 10

num_batches = [16]
dloaders = ["torch.utils.data.DataLoader(dataset, batch_size="+str(nb)+", shuffle=True, num_workers=4, drop_last=True)" for nb in num_batches]

wdecays = [0]
learn_rates =[0.0001,0.001]
mom = [0.02]

floss_alpha = [0.6,0.8,0.9]
floss_gamma = [1]

optimizers =["torch.optim.SGD(parameters, lr="+str(i)+",momentum="+str(j)+",weight_decay =" + str(w)+ ")" for i in learn_rates for j in mom for w in wdecays]

loss_types = ["foc"]
accur = {}
tot_epochs = num_epochs*len(loss_types)*len(optimizers)*len(dloaders)*len(floss_alpha)*len(floss_gamma)
print("About to tune {} epochs...".format(tot_epochs))
for lt in loss_types:
    for dl in dloaders:
        for o in optimizers:
            for fa in floss_alpha:
                for fg in floss_gamma:
                    parameters = filter(lambda p: p.requires_grad, model.parameters())
                    model = BB_model().to(device)
                    
                    params = "{dl}, {o}, lt = {lt}, fa = {fa}, fg = {fg}".format(dl = dl,o = o,lt = lt, fa = fa, fg = fg)
                    print(params)
                    accur.update({params:train_epocs_hyper(model, eval(o), eval(dl), dataloaders['val'], epochs=num_epochs, loss_type= lt, floss_a =fa, floss_g = fg)})

------------------------------------------------

Running best hyperparameters

In [None]:
num_batch = 16

dataloaders = {'train': torch.utils.data.DataLoader(dataset, batch_size=num_batch,
                                             shuffle=True, num_workers=4, drop_last=True),
                'val': torch.utils.data.DataLoader(dataset_val, batch_size=4,
                                             shuffle=False, num_workers=4, drop_last=True),
               'test':  torch.utils.data.DataLoader(dataset_test, batch_size=4,
                                             shuffle=False, num_workers=4, drop_last=True),
              }

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model = BB_model().to('cuda')
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.SGD(parameters, lr=0.001, momentum= 0.09)


In [None]:
acc, train_losses, val_losses = train_epocs(model, optimizer, dataloaders['train'], dataloaders['val'], floss_a=0.6, floss_g=1, epochs=10, loss_type='foc')

In [None]:
plt.figure()
x = range(10)
# plotting the line 1 points 
plt.plot(x, train_losses, label = "train")
# plotting the line 2 points 
plt.plot(x, val_losses, label = "valid")

plt.xlabel('epochs')
# Set the y axis label of the current axis.
plt.ylabel('loss')
# Set a title of the current axes.
plt.title('Loss per each epoch')
# show a legend on the plot
plt.legend()
# Display a figure.
plt.show()

In [None]:
tresholds = np.arange(0.5, 1, 0.05)
res = {str(th): {str(i): np.array([0, 0, 0]) for i in range(5)} for th in tresholds}

model.eval()
with torch.no_grad():
    for x, y_bb, y_class, in dataloaders['test']:
            batch = y_class.shape[0]
            x = x.to(device)
            y_class = y_class.to(device)
            y_bb = y_bb.to(device)
            out_bb, out_class = model(x)

            prov_res = metric(out_bb, out_class, y_bb, y_class, batch_size = batch)

            for th in tresholds:
                for i in range(5):
                    res[str(th)][str(i)] += prov_res[str(th)][str(i)] 

In [None]:
df = pd.DataFrame.from_dict(res)
df.columns = ['0.5', '0.55', '0.6', '0.65', '0.70', '0.75', '0.8', '0.85', '0.90', '0.95' ]
df

In [None]:
for i in df.index:
    for j in df.columns:
        l = df[j][i]
        TP = l[0]; FP = l[1]; FN=l[2]
        if TP + FP == 0:
            prec = 0
        else:
            prec = TP/(TP+FP)
        
        if TP + FN == 0:
            rec = 0
        else:
            rec = TP/(TP+FN)
        
        df[j][i] = [round(prec, 5), round(rec, 5)]

In [None]:
df

In [None]:
f1 = df
for i in f1.index:
    for j in f1.columns:
        l = f1[j][i]
        P = l[0]; R = l[1]
        if P+R == 0:
            F_mes = 0
        else:
            F_mes = 2*((P*R)/(P+R))
            
        f1[j][i] = round(F_mes, 5)

In [None]:
f1 = f1.rename(columns={'0.5': 'IoU_th=0.5', '0.55': 'IoU_th=0.55','0.6': 'IoU_th=0.6','0.65': 'IoU_th=0.65',
                        '0.70': 'IoU_th=0.7','0.75': 'IoU_th=0.75','0.8': 'IoU_th=0.8','0.85': 'IoU_th=0.85',
                        '0.90': 'IoU_th=0.9','0.95': 'IoU_th=0.95'},
               index={'0': 'natural_image', '1': 'table', '2':'signature', '3': 'figure', '4': 'logo'})

In [None]:
f1

In [None]:
# class accuracies
classes = ['natural_image','table','signature','figure','logo']
class_correct = list(0. for i in range(5))
class_total = list(0. for i in range(5))
model.eval()
with torch.no_grad():
    for x, y_bb, y_class in dataloaders['test']:
        x = x.to(device)
        labels = y_class.to(device)
        out_bb, out_class, = model(x)
        _, predicted = torch.max(out_class, 1)
        c = (predicted == labels).squeeze()
        for i in range(4):
            label = labels[i]
            class_correct[label] += c[i].item()
            class_total[label] += 1


for i in range(5):
    print('Accuracy of ' + classes[i] + ': ' + str(round(class_correct[i] / class_total[i], 5)))