In [None]:
import os
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import torch
import matplotlib.pyplot as plt
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader
from albumentations.pytorch.transforms import ToTensorV2
from tqdm.notebook import tqdm
import albumentations as A

device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

In [None]:
TRAIN_PATH = '../input/vinbigdata-chest-xray-resized-png-1024x1024/train/'
df_train = pd.read_csv('../input/vinbigdata-chest-xray-abnormalities-detection/train.csv')
df_train_meta = pd.read_csv('../input/vinbigdata-chest-xray-resized-png-1024x1024/train_meta.csv')
IMAGE_SIZE = 1024

In [None]:
df_train.fillna(0, inplace = True)
df_train.class_id = df_train.class_id + 1
df_train.loc[df_train.class_id == 15, ['class_id']] = 0

In [None]:
print(df_train.shape)
df_train.head()

In [None]:
print(df_train_meta.shape)
df_train_meta.head()

In [None]:
def update_bboxes(df_train, df_train_meta):
    
    df_temp = pd.merge(df_train, df_train_meta, on = "image_id")
    df_temp['x_min'] = ((df_temp['x_min'] / df_temp['dim1']) * IMAGE_SIZE).astype('int')
    df_temp['y_min'] = ((df_temp['y_min'] / df_temp['dim0']) * IMAGE_SIZE).astype('int')
    df_temp['x_max'] = ((df_temp['x_max'] / df_temp['dim1']) * IMAGE_SIZE).astype('int')
    df_temp['y_max'] = ((df_temp['y_max'] / df_temp['dim0']) * IMAGE_SIZE).astype('int')
    
    return df_temp.drop(['dim0', 'dim1'], axis = 1)

In [None]:
df_train = update_bboxes(df_train, df_train_meta)
df_train.loc[df_train.class_id == 0, ['x_max', 'y_max']] = 1
df_train.head()

In [None]:
df_train['area']= (df_train['x_max'] - df_train['x_min']) * (df_train['y_max'] - df_train['y_min'])
print(df_train[df_train.area == 0].shape)
df_train = df_train[df_train.area != 0].reset_index(drop = True)
df_train.drop(['area'], axis = 1, inplace = True)
df_train

In [None]:
class_dict = dict(zip(df_train["class_id"].unique(), df_train['class_name'].unique()))
len(class_dict)

In [None]:
class VinBigData:
    def __init__(self, root, df, transforms = None):
        self.root = root
        self.transforms = transforms
        self.imgs = list(sorted(os.listdir(root)))
        self.df = df
        
    def __getitem__(self, index):
        img_path = self.imgs[index]
        
        image = cv2.imread(os.path.join(self.root,img_path))
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        image = image / 255
        image = image.astype('float32')
        
        records = self.df.loc[self.df.image_id == img_path.split('.')[0],:].reset_index(drop = True)
        
        if records.loc[0, "class_id"] == 0:
            records = records.loc[[0], :]
        boxes = records[['x_min','y_min', 'x_max', 'y_max']].values
        area = (boxes[:,2] - boxes[:,0]) * (boxes[:,3] - boxes[:,1])
        labels = records['class_id'].values
        
        target = {}
        target['boxes'] = torch.tensor(boxes)
        target['labels'] = torch.tensor(labels, dtype=torch.int64)
        target['image_id'] = torch.tensor([index])
        target['area'] = torch.tensor(area, dtype=torch.float32)
        target['iscrowd'] = torch.zeros(labels.shape[0], dtype=torch.int64)
        
        if self.transforms:
            sample = {
                'image': image,
                'bboxes': target['boxes'],
                'labels': labels
            }
            sample = self.transforms(**sample)
            image = sample['image']
            
            target['boxes'] = torch.tensor(sample['bboxes'])
        
        if target["boxes"].shape[0] == 0:
            # Albumentation cuts the target (class 14, 1x1px in the corner)
            target["boxes"] = torch.from_numpy(np.array([[0.0, 0.0, 1.0, 1.0]]))
            target["area"] = torch.tensor([1.0], dtype=torch.float32)
            target["labels"] = torch.tensor([0], dtype=torch.int64)
        
        return image, target
    
    def __len__(self):
        return len(self.imgs)

In [None]:
def get_valid_transform():
    return A.Compose([
        ToTensorV2(p=1.0)
    ], bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})

In [None]:
train_data = VinBigData(TRAIN_PATH, df_train, get_valid_transform())

In [None]:
train_data[0]

In [None]:
def plot_image(index):
    plt.figure(figsize=(10,10))
    image, target = train_data[index]
    image = image.permute(1,2,0).numpy()
    for box,label in zip(target['boxes'], target['labels']):
        cv2.rectangle(image, (box[0], box[1]), (box[2], box[3]), (225,0,0), 1)
        class_index = label.numpy().reshape(1)[0]
        label = class_dict[class_index].upper()
        cv2.putText(image, 
                    label,
                    (box[0], box[1]), 
                    fontFace = cv2.FONT_HERSHEY_SIMPLEX, 
                    fontScale = 0.5,
                    color = (255, 0, 0),
                    thickness = 1,
                    lineType = cv2.LINE_AA
                   )
    plt.imshow(image)

In [None]:
plot_image(717)

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_dataloader = DataLoader(train_data, batch_size = 8, shuffle = False, collate_fn = collate_fn)

In [None]:
# load a model pre-trained pre-trained on COCO
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
# replace the classifier with a new one, that has
# num_classes which is user-defined
num_classes = len(class_dict)
# get number of input features for the classifier
in_features = model.roi_heads.box_predictor.cls_score.in_features
# replace the pre-trained head with a new one
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
# move model to the right device
model.to(device)

In [None]:
# construct an optimizer
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
# and a learning rate scheduler which decreases the learning rate by 10x every 3 epochs
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
# let's train it for 10 epochs
num_epochs = 1
error_batch = []
for epoch in tqdm(range(num_epochs)):
    #initiating loss and num_iterations
    total_loss = 0
    itr = 0
    for images, targets in train_dataloader:
        images = [image.to(device) for image in images]
        targets = [{k: v.to(device) for k,v in t.items()} for t in targets]
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_value = losses.item()
        #update loss and itr
        total_loss += loss_value
        itr +=1
        #update the learning rate
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        if itr%500 == 0:
            print("Itr : {} loss: {}".format(itr, total_loss/itr))
        lr_scheduler.step()
    print("Epoch : {} loss: {}".format(epoch, total_loss/itr))

# Specify a path
PATH = "./VinBigDataFasterRCNN.pth"
# Save
torch.save(model.state_dict(), PATH)