In [1]:
%matplotlib inline
import cv2, os, math, time, ast
from datetime import datetime
import time
from PIL import Image
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from typing import List
from torchvision.ops import box_iou

import torch
import torchvision
from torch.utils.data import DataLoader, Dataset
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN_ResNet50_FPN_Weights
from torchvision.models.detection import FasterRCNN
from sklearn.model_selection import train_test_split
from torchinfo import summary
from torchvision.models.detection.backbone_utils import resnet_fpn_backbone
from torchvision.models import ResNet101_Weights

In [2]:
#from google.colab import drive
#drive.mount('/content/gdrive', force_remount=True)
#% cd /content/gdrive/MyDrive/datasets/gbr_cots/
cwd = os.getcwd()

In [3]:
base_dir = 'h:/Python/ob'
train_csv = os.path.join(base_dir, "train.csv")
train_df = pd.read_csv(train_csv)
train_df["img_path"] = os.path.join(base_dir, "train_images") + "/video_" + train_df.video_id.astype(str) + "/" + train_df.video_frame.astype(str) + ".jpg"
train_df["annotations"] = train_df["annotations"].apply(eval)
train_df["a_count"] = train_df["annotations"].apply(len)
train_df = train_df.drop(columns=['video_id', 'sequence', 'video_frame', 'sequence_frame'])
train_df_positive = train_df[train_df['a_count'] != 0]
train_df_positive= train_df_positive.reset_index(drop=True)
print('shape of train data with annotations:', train_df_positive.shape)

train_df_ratio = (train_df_positive.set_index('image_id').explode('annotations').
                  apply(lambda row: pd.Series(row['annotations']), axis=1).reset_index())
train_df_ratio['aspect_ratio'] = train_df_ratio['height']/train_df_ratio['width']

train_df_p, val_df_p = train_test_split(train_df_positive, test_size=0.1, random_state=0)
print('shape of training data:', train_df_p.shape)
print('shape of validation data:', val_df_p.shape)
print('Min number of bboxs per image:', min(train_df_positive.a_count))
print('Max number of bboxs per image:', max(train_df_positive.a_count))
print('Max width of all bboxs:', max(train_df_ratio.width))
print('Max height of all bboxs:', max(train_df_ratio.height))
print('Min width of all bboxs:', min(train_df_ratio.width))
print('Min height of all bboxs:', min(train_df_ratio.height))

print('{:.2%} of all aspect ratios greater than 2'.
      format(train_df_ratio[train_df_ratio['aspect_ratio'] > 1.5].shape[0]/train_df_ratio.shape[0]))

print('{:.2%} of all aspect ratios less than 0.25'.
      format(train_df_ratio[train_df_ratio['aspect_ratio'] < 0.5].shape[0]/train_df_ratio.shape[0]))

shape of train data with annotations: (4919, 4)
shape of training data: (4427, 4)
shape of validation data: (492, 4)
Min number of bboxs per image: 1
Max number of bboxs per image: 18
Max width of all bboxs: 243
Max height of all bboxs: 222
Min width of all bboxs: 17
Min height of all bboxs: 13
1.47% of all aspect ratios greater than 2
1.15% of all aspect ratios less than 0.25


In [4]:
class COTS_Dataset(torch.utils.data.Dataset):
    def __init__(self, df_img, df_bbox, original_size=(1280, 720), resize_size=(1280, 720)):
        self.df_img = df_img
        self.df_bbox = df_bbox
        self.orginal_size = original_size
        self.resize_size = resize_size
        
    def __getitem__(self, idx):
        # load images and masks
        row = self.df_img.iloc[idx]
        img = Image.open(row['img_path']).convert('RGB')
        if True: 
            img = np.array(img)/255.
        else:
            img = np.array(img.resize((self.w, self.h), resample=Image.BILINEAR))/255.
        data = self.df_bbox[self.df_bbox['image_id'] == row['image_id']]
        labels = ['cots'] * data.shape[0]
        data = data[['x','y','width','height']].values
        area = data[:, 2] * data[:, 3]
        data[:,[2]] += data[:,[0]]
        data[:,[3]] += data[:,[1]]
        boxes = data.astype(np.uint32).tolist() # convert to absolute coordinates
        # torch FRCNN expects ground truths as a dictionary of tensors
        iscrowd = torch.zeros((data.shape[0],), dtype=torch.int64)
        target = {}
        target["boxes"] = torch.Tensor(boxes).float()
        target["labels"] = torch.Tensor([1 for i in labels]).long()
        target["image_id"] = torch.tensor([idx])
        target["area"] = torch.Tensor(area).float()
        target["iscrowd"] = iscrowd

        img = torch.tensor(img).permute(2,0,1)
        return img.to(device).float(), target
    
    def collate_fn(self, batch):
        return tuple(zip(*batch)) 

    def __len__(self):
        return self.df_img.shape[0]

In [5]:
train_ds = COTS_Dataset(train_df_p, train_df_ratio)
test_ds = COTS_Dataset(val_df_p, train_df_ratio)

train_loader = DataLoader(train_ds, batch_size=4, collate_fn=train_ds.collate_fn, drop_last=False)
test_loader = DataLoader(test_ds, batch_size=4, collate_fn=test_ds.collate_fn, drop_last=False)

# test
# img, tgt = next(iter(test_loader))   
# print(img[0].shape)
# print(tgt)

In [6]:
from torchvision.models.detection.anchor_utils import AnchorGenerator
min_size = 810
max_size = 1440
df = pd.read_csv(base_dir + '/train_models/cots_mean_std.csv', header=0)
image_mean = df['total_mean'].tolist()
image_std = df['total_std'].tolist()

anchor_sizes = ((8,), (16,), (32,), (64,), (128,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
backbone = resnet_fpn_backbone('resnet101', weights=ResNet101_Weights.IMAGENET1K_V2, trainable_layers=2)

DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

#model = fasterrcnn_resnet50_fpn(weights=FasterRCNN_ResNet50_FPN_Weights.DEFAULT) 
#image_mean=image_mean, image_std =image_std, anchor_generator=anchor_generator, 

#in_features = model.roi_heads.box_predictor.cls_score.in_features
#model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2)

model = FasterRCNN(backbone, num_classes=2)
no_of_epochs = 12

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.SGD(model.parameters(), lr=0.005,momentum=0.9, weight_decay=0.0005)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
model.to(device)

#summary(model, input_size=(1, 3, 720, 1280))



FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d(64, eps=1e-05)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d(64, eps=1e-05)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d(64, eps=1e-05)
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d(256, eps=1e-05)
          (relu

In [None]:
time_start = datetime.now()
loss = []
loss_box_reg = []
loss_clf = []
loss_rpn_box = []
loss_obj = []
lr = []

loss_val = []
loss_box_reg_val = []
loss_clf_val = []
loss_rpn_box_val = []
loss_obj_val = []

n_batches, n_batches_val = len(train_loader), len(test_loader)

for epoch in range(no_of_epochs):
    time_begin = time.time()
    loss_accum = 0
    loss_reg_accum = 0 
    loss_cls_accum = 0 
    loss_rpn_box_reg_accum = 0 
    loss_objectness_accum = 0 
    
    val_loss_accum = 0 
    val_loss_reg_accum = 0
    val_loss_cls_accum = 0 
    val_loss_rpn_box_reg_accum = 0 
    val_loss_objectness_accum = 0 
    
    for batch_idx, (images, targets) in enumerate(train_loader, 1):
        
        images = list(image.to(DEVICE) for image in images)
        targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]

        # Predict
        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())
        loss_reg = loss_dict['loss_box_reg'].item()
        loss_cls = loss_dict['loss_classifier'].item()
        loss_rpn_box_reg = loss_dict['loss_rpn_box_reg'].item()
        loss_objectness = loss_dict['loss_objectness'].item()
        loss_accum += losses.item()
        loss_reg_accum += loss_reg
        loss_cls_accum += loss_cls
        loss_rpn_box_reg_accum += loss_rpn_box_reg
        loss_objectness_accum += loss_objectness
        
        # Back-prop
        optimizer.zero_grad()
        losses.backward()
        optimizer.step()
        lr_epoch = lr_scheduler.get_last_lr()[0]

    
    # update the learning rate
    if lr_scheduler is not None:
        lr_scheduler.step()
        
    with torch.no_grad():
        for batch_idx, (images, targets) in enumerate(test_loader, 1):
            images = list(image.to(DEVICE) for image in images)
            targets = [{k: v.to(DEVICE) for k, v in t.items()} for t in targets]
            
            val_loss_dict = model(images, targets)
            val_batch_loss = sum(loss for loss in val_loss_dict.values()).item()
            val_loss_reg = val_loss_dict['loss_box_reg'].item()
            val_loss_cls = val_loss_dict['loss_classifier'].item()
            val_loss_rpn_box_reg = val_loss_dict['loss_rpn_box_reg'].item()
            val_loss_objectness = val_loss_dict['loss_objectness'].item()
            val_loss_accum += val_batch_loss
            val_loss_reg_accum += val_loss_reg
            val_loss_cls_accum += val_loss_cls
            val_loss_rpn_box_reg_accum += loss_rpn_box_reg
            val_loss_objectness_accum += loss_objectness
    
    train_loss = loss_accum / n_batches
    val_loss = val_loss_accum / n_batches_val
    loss.append(train_loss)
    loss_val.append(val_loss)
    lr.append(lr_epoch)
    
    loss_box_reg.append(loss_reg_accum/n_batches)
    loss_clf.append(loss_cls_accum/n_batches)
    loss_rpn_box.append(loss_rpn_box_reg_accum/n_batches)
    loss_obj.append(loss_objectness_accum/n_batches)
    
    loss_box_reg_val.append(val_loss_reg_accum/n_batches_val)
    loss_clf_val.append(val_loss_cls_accum/n_batches_val)
    loss_rpn_box_val.append(val_loss_rpn_box_reg_accum/n_batches_val)
    loss_obj_val.append(val_loss_objectness_accum/n_batches_val)
    
    # Save model
    chk_name = f'./fasterrcnn_resnet101_fpn_free_last2_e{epoch+1}.bin'
    torch.save(model.state_dict(), chk_name)
    
    elapsed = time.time() - time_begin
    dict_01 = {'Epoch': epoch+1, 'no_of_epochs': no_of_epochs, 
               'train_loss': train_loss, 'val_loss': val_loss, 
               'chk_name': chk_name, 'time_used': elapsed}
    print('Epoch {Epoch: 2d}/{no_of_epochs:2d}_Train loss: {train_loss:.3f}, Val loss: {val_loss:.3f} --> {chk_name} time_used: {time_used:.2f} seconds'.format(**dict_01))

print('Training Completed and time used: ', datetime.now() - time_start)
    

Epoch  1/12_Train loss: 0.285, Val loss: 0.246 --> ./fasterrcnn_resnet101_fpn_free_last2_e1.bin time_used: 1541.92 seconds
Epoch  2/12_Train loss: 0.228, Val loss: 0.234 --> ./fasterrcnn_resnet101_fpn_free_last2_e2.bin time_used: 1528.98 seconds
Epoch  3/12_Train loss: 0.191, Val loss: 0.205 --> ./fasterrcnn_resnet101_fpn_free_last2_e3.bin time_used: 1545.39 seconds
Epoch  4/12_Train loss: 0.154, Val loss: 0.183 --> ./fasterrcnn_resnet101_fpn_free_last2_e4.bin time_used: 1545.08 seconds


In [None]:
dict_01 = {'loss': loss, 'loss_box_reg': loss_box_reg, 'loss_classifier': loss_clf, 
           'lr': lr, 'loss_val': loss_val, 'loss_box_reg_val': loss_box_reg_val, 
           'loss_classifier_val': loss_clf_val, 'loss_rpn_box': loss_rpn_box,
          'loss_obj': loss_obj, 'loss_rpn_box_val': loss_rpn_box_val, 'loss_obj_val': loss_obj_val}
df_training_log = pd.DataFrame(dict_01)
df_training_log.to_csv('./training_log_FasterRCNN_resnet101_fpn_free_last2.csv', index=False)


In [None]:
df_training_log = pd.read_csv('./training_log_FasterRCNN_resnet101_fpn_free_last2.csv', header=0)

In [None]:
from matplotlib.ticker import MaxNLocator
epoch = np.arange(1,13,1)

fig, ax = plt.subplots(2,2,figsize=(12, 12))
ax[0, 0].set_xlim(1, 12)
ax[0, 0].plot(epoch, df_training_log['loss'], label='Train Loss')
ax[0, 0].plot(epoch, df_training_log['loss_val'], label='Validation Loss')
ax[0, 0].set_title('Loss', fontsize=15)
ax[0, 0].set_xlabel('Total Epoch', fontsize=12)
ax[0, 0].legend()

ax[0, 1].set_xlim(1, 12)
ax[0, 1].plot(epoch, df_training_log['loss_box_reg'], color='c', 
              label='Train Bounding Box Regression Loss')
ax[0, 1].plot(epoch, df_training_log['loss_box_reg_val'], color='m',
              label='Validation Bounding Box Regression Loss')
ax[0, 1].plot(epoch, df_training_log['loss_classifier'], color='c', 
              linestyle= 'dashdot', label='Train Classifier Loss')
ax[0, 1].plot(epoch, df_training_log['loss_classifier_val'], color='m', 
              linestyle= 'dashdot', label='Validation Classifier Loss')
ax[0, 1].set_title('Bounding Box Regression Loss and Classifier Loss', fontsize=15)
ax[0, 1].set_xlabel('Epoch', fontsize=12)
ax[0, 1].legend()

ax[1, 0].set_xlim(1, 12)
ax[1, 0].plot(epoch, df_training_log['loss_rpn_box'], color='c', 
              label='Train RPN Box Regression Loss')
ax[1, 0].plot(epoch, df_training_log['loss_rpn_box_val'], color='m', 
              label='Validation RPN Box Regression Loss')
ax[1, 0].plot(epoch, df_training_log['loss_obj'], color='c', 
              linestyle= 'dashdot', label='Train Objectness Loss')
ax[1, 0].plot(epoch, df_training_log['loss_obj_val'], color='m',
              linestyle= 'dashdot', label='Validation Objectness Loss')
ax[1, 0].set_title('RPN Loss and Objectness Loss', fontsize=15)
ax[1, 0].set_xlabel('Epoch', fontsize=12)
ax[1, 0].legend()

ax[1, 1].set_xlim(1, 12)
ax[1, 1].plot(epoch, df_training_log['lr'], label='Learning Rate')
ax[1, 1].set_title('Learning Rate', fontsize=15)
ax[1, 1].set_xlabel('Epoch', fontsize=12)
ax[1, 1].legend()
fig.suptitle('Training Curves of Faster RCNN', fontsize=18)
fig.tight_layout()