In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        pass
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# README

This is the implementation code for the first model. The preprocessed dataset we used to train this model has been made public. 

Each cell is self-explanatory.

To reproduce the result in our paper:

please click on the **Run** tab and choose *run all*. 

The training step is found at cell 16.

The validation accuracy is zero for the first few iterations because the model hasn't reached the level where a region of interest is detected with IOU > 0.8 for an image. After a few iterations it will start to show a 
different result. 

In [None]:
import os
import random
import math
from datetime import datetime
from collections import Counter
import pandas as pd
import numpy as np

import cv2
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET

import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models
from scipy import stats 

In [None]:
# preprocess and loading 
def normalize(im):
    """Normalizes images with Imagenet stats."""
    imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
    return (im - imagenet_stats[0])/imagenet_stats[1]

class GAICDataset(Dataset):
    def __init__(self, paths, bb, transforms=True):
        self.paths = paths.values
        self.bb = bb.values
#         self.transforms = transforms
    def __len__(self):
        return len(self.paths)
    
    def __getitem__(self, idx):
        path = self.paths[idx]
#         y_class = self.y[idx]
        x = cv2.imread(str(path)).astype(np.float32)
        x = cv2.cvtColor(x, cv2.COLOR_BGR2RGB)/255
        y_bb = np.array(self.bb[idx])
#         x, y_bb = transformsXY(path, self.bb[idx], self.transforms)
        x = normalize(x)
        x = np.rollaxis(x, 2)
        return x, y_bb

In [None]:
def get_dataset(path):
    main_path = '../input/processeddataset/Processed2/'
    df_train = pd.read_csv(main_path + path)
    df_train['new_path'] = df_train['new_path'].apply(lambda x: f'{main_path}{x}')
    X_train = df_train['new_path']
    Y = df_train['new_bb']
    Y = Y.apply(lambda x: list(map(float, x[1:-1].split())))
    print(X_train.shape, Y.shape)
    return X_train, Y

X_train, Y_train = get_dataset('df_train.csv')
X_val, Y_val = get_dataset('df_val.csv')
X_test, Y_test = get_dataset('df_test.csv')

train_ds = GAICDataset(X_train, Y_train)
valid_ds = GAICDataset(X_val, Y_train)
test_ds = GAICDataset(X_test, Y_test)

print(f'size of train data {len(X_train)}, size of test {len(X_test)}, size of val {len(X_val)}')

In [None]:
batch_size = 64
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size)
test_dl = DataLoader(test_ds,  batch_size=batch_size)

In [None]:
class BB_model(nn.Module):
    def __init__(self):
        super(BB_model, self).__init__()
        resnet = models.resnet34(pretrained=True)
        layers = list(resnet.children())[:8]
        self.features1 = nn.Sequential(*layers[:6])
        self.features2 = nn.Sequential(*layers[6:])
#         self.classifier = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        self.bb = nn.Sequential(nn.BatchNorm1d(512), nn.Linear(512, 4))
        
    def forward(self, x):
        x = self.features1(x)
        x = self.features2(x)
        x = F.relu(x)
        x = nn.AdaptiveAvgPool2d((1,1))(x)
        x = x.view(x.shape[0], -1)
        return  self.bb(x) #self.classifier(x),

In [None]:
def update_optimizer(optimizer, lr):
    for i, param_group in enumerate(optimizer.param_groups):
        param_group["lr"] = lr

In [None]:
def train_epocs(model, optimizer, train_dl, val_dl, epochs=10,C=1000):
    idx = 0
    for i in range(epochs):
        model.train()
        total = 0
        sum_loss = 0
        for x, y_bb in train_dl:
            batch = x.shape[0]
            x = x.cuda().float()
#             y_class = y_class.cuda()
            y_bb = y_bb.cuda().float()
#             print(y_bb)
            out_bb = model(x)
#             print('outbb', out_bb)
#             loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
            loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)
            loss_bb = loss_bb.sum()
            loss =  loss_bb/C #loss_class +
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            idx += 1
            total += batch
            sum_loss += loss.item()
#             print('loss', loss.item())
        train_loss = sum_loss/total
        val_loss, val_acc = val_metrics(model, valid_dl, C)
        print("train_loss %.3f val_loss %.3f val_acc %.3f " % (train_loss, val_loss, val_acc))
    return sum_loss/total

In [None]:
def bb_intersection_over_union(boxA, boxB):
    
#     print('boxes', boxA, boxB)
    
    xA = max(boxA[0], boxB[0])
    yA = max(boxA[1], boxB[1])
    xB = min(boxA[2], boxB[2])
    yB = min(boxA[3], boxB[3])
    
    interArea = max(0, xB - xA + 1) * max(0, yB - yA + 1)

    boxAArea = (boxA[2] - boxA[0] + 1) * (boxA[3] - boxA[1] + 1)
    boxBArea = (boxB[2] - boxB[0] + 1) * (boxB[3] - boxB[1] + 1)

    iou = interArea / float(boxAArea + boxBArea - interArea)
    return iou

def run(bboxes1, bboxes2):
    
    x11, y11, x12, y12 = np.split(bboxes1, 4, axis=1)
    x21, y21, x22, y22 = np.split(bboxes2, 4, axis=1)
    xA = np.maximum(x11, x21)
    yA = np.maximum(y11, y21)
    xB = np.minimum(x12, x22)
    yB = np.minimum(y12, y22)
    interArea = np.maximum((xB - xA + 1), 0) * np.maximum((yB - yA + 1), 0)
    boxAArea = (x12 - x11 + 1) * (y12 - y11 + 1)
    boxBArea = (x22 - x21 + 1) * (y22 - y21 + 1)
    iou = interArea / (boxAArea + boxBArea - interArea)
    return iou

def pears(a, b):
    pearc = []
    for i, j in zip(a, b):
        pearc.append(stats.pearsonr(i,j)[0])
    return sum(pearc)
    
def val_metrics(model, valid_dl, C=1000):
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0 
    perc = 0
    for x, y_bb in valid_dl:
        batch = x.shape[0]
        x = x.cuda().float()
#         y_class = y_class.cuda()
        y_bb = y_bb.cuda().float()
        with torch.no_grad():
            out_bb = model(x)
#         print(y_bb, out_bb)
#         loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
        loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)
        loss_bb = loss_bb.sum()
        loss = loss_bb/C
        y_bb = y_bb.cpu().numpy()
        out_bb = out_bb.cpu().numpy()
        iou = run(y_bb, out_bb).sum(axis=1)
#         perc += pears(y_bb, out_bb)
#         print('iou=', iou)
        
        correct += sum(iou.flatten() > 0.8)
#         _, pred = torch.max(out_class, 1)
#         correct += pred.eq(y_class).sum().item()
        sum_loss += loss.item()
        total += batch
    return sum_loss/total, correct/total

In [None]:
USE_GPU = True

if USE_GPU and torch.cuda.is_available():
    print('using device: cuda')
else:
    print('using device: cpu')

In [None]:
model = BB_model().cuda()
parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = torch.optim.Adam(parameters)

In [None]:
train_epocs(model, optimizer, train_dl, valid_dl, epochs=25)

In [None]:
def test_metrics(model, valid_dl, C=1000):
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0 
#     perc = 0
    for x, y_bb in valid_dl:
        batch = x.shape[0]
        x = x.cuda().float()
#         y_class = y_class.cuda()
        y_bb = y_bb.cuda().float()
        with torch.no_grad():
            out_bb = model(x)
#         print(y_bb, out_bb)
#         loss_class = F.cross_entropy(out_class, y_class, reduction="sum")
        loss_bb = F.l1_loss(out_bb, y_bb, reduction="none").sum(1)
        loss_bb = loss_bb.sum()
        loss = loss_bb/C
        y_bb = y_bb.cpu().numpy()
        out_bb = out_bb.cpu().numpy()
        iou = run(y_bb, out_bb).sum(axis=1)
#         perc += pears(y_bb, out_bb)
#         print('iou=', iou)
        
        correct += sum(iou.flatten() > 0.8)
#         _, pred = torch.max(out_class, 1)
#         correct += pred.eq(y_class).sum().item()
        sum_loss += loss.item()
        total += batch
        mydict = {
            'x': denormalize(x.cpu().numpy().transpose((0, 2, 3, 1))),
            'y_bb': y_bb,
            'out_bb': out_bb,
            'ious': iou
        }
        break
    return sum_loss/total, correct/total, mydict

In [None]:
test_lossp, test_accp, mydict = test_metrics(model, test_dl, C=1000)

In [None]:
# random visualization
plt.imshow(mydict['x'][3])
plt.show()

In [None]:
x = mydict['x']
xs = x[0]   #.cpu().numpy()
# xp = xs.reshape((300, 447, 3))
xr = xs # np.transpose(xs,(1, 2, 0))

In [None]:
def denormalize(im):
    """Normalizes images with Imagenet stats."""
    imagenet_stats = np.array([[0.485, 0.456, 0.406], [0.229, 0.224, 0.225]])
#     return (im - imagenet_stats[0])/imagenet_stats[1]
    return im*imagenet_stats[1] + imagenet_stats[0]
xr = denormalize(xr)

In [None]:
def create_corner_rect(bb, color='red'):
    bb = np.array(bb, dtype=np.float32)
    return plt.Rectangle((bb[1], bb[0]), bb[3]-bb[1], bb[2]-bb[0], color=color,
                         fill=False, lw=1.6)
    
nrows=4
ncols=3
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(12, 12))
for i in range(nrows):
    for j in range(ncols):
        num = i*nrows + j
        axes[i][j].imshow(mydict['x'][num])
        axes[i][j].add_patch(create_corner_rect(mydict['out_bb'][num], 'red'))
        axes[i][j].add_patch(create_corner_rect(mydict['y_bb'][num], 'greenyellow'))
        axes[i][j].set(title=mydict['ious'][num])
        axes[i][j].axis('off')
plt.subplots_adjust(left=0.125,
                    bottom=0.1, 
                    right=0.9, 
                    top=0.9, 
                    wspace=0.05, 
                    hspace=0.05)
plt.show()

In [None]:
# test_loss, test_acc = val_metrics(model, test_dl, C=1000)

In [None]:
# test_loss, test_acc

In [None]:
# #Reading an image
# def read_image(path):
#     return cv2.cvtColor(cv2.imread(str(path)), cv2.COLOR_BGR2RGB)

# def resize_image_bb(read_path,write_path,sz=300):
#     """Resize an image and its bounding box and write image to new path"""
#     read_path = Path(read_path)
#     im = read_image(read_path)
#     im_resized = cv2.resize(im, (int(1.49*sz), sz))
# #     Y_resized = cv2.resize(create_mask(bb, im), (int(1.49*sz), sz))
#     print(write_path)
#     new_path = f'{write_path}/{read_path.parts[-1]}'
#     print(new_path)
#     cv2.imwrite(new_path, cv2.cvtColor(im_resized, cv2.COLOR_RGB2BGR))
#     return new_path

In [None]:
# resize_path = resize_image_bb('../input/d/kal1224/random/tsinghua.jpg', '/kaggle/working')

In [None]:
# def create_bb_array(x):
#     """Generates bounding box array from a train_df row"""
#     return np.array([x[5],x[4],x[7],x[6]])

# #Reading an image
# def read_image(path):
#     return cv2.cvtColor(cv2.imread(str(path)), cv2.COLOR_BGR2RGB)

# def create_corner_rect(bb, color='red'):
#     bb = np.array(bb, dtype=np.float32)
#     return plt.Rectangle((bb[1], bb[0]), bb[3]-bb[1], bb[2]-bb[0], color=color,
#                          fill=False, lw=3)
# def show_corner_bb(im, bb):
#     plt.imshow(im)
#     plt.gca().add_patch(create_corner_rect(bb))
    
# im = cv2.imread('/kaggle/working/tsinghua.jpg')
# # bb = create_bb_array(df_train.values[42])
# bb = bb.cpu().detach().numpy()
# # print(bb.cpu())
# # print(bb)
# show_corner_bb(im, bb[0])

In [None]:
# paths = []
# bbs = [1, 2, 3, 4]
# df_trial = pd.DataFrame([])

In [None]:
# df_trial['path'] = '/kaggle/working/tsinghua.jpg'
# df_trial['bb'] = bbs
# df_trial['class'] = 1

In [None]:
# trial_ds = GAICDataset(df_trial['path'], df_trial['bb'], df_trial['class'])

In [None]:
# trial_dl = DataLoader(valid_ds, batch_size=1)

In [None]:
# im = im.reshape((1,)+im.shape)
# x = torch.from_numpy(im)
# x = x.cuda().float()
# for x, y in trial_dl:
#     print(x.shape, y)
#     bb = model(x.cuda().float())
#     break
# #     