In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%%capture
!pip install torch-snippets==0.499.16

In [3]:
import os
if not os.path.exists('images'):
    from google.colab import files
    files.upload() # upload kaggle.json
    !mkdir -p ~/.kaggle
    !mv kaggle.json ~/.kaggle/
    !ls ~/.kaggle
    !chmod 600 /root/.kaggle/kaggle.json
    !kaggle datasets download -d sshikamaru/car-object-detection
    !unzip -qq car-object-detection.zip
    !rm car-object-detection.zip
    !cp '/content/drive/MyDrive/object detection/Cars-1900-Short-small.m4v' .


Saving kaggle.json to kaggle.json
kaggle.json
Downloading car-object-detection.zip to /content
 93% 104M/112M [00:01<00:00, 82.3MB/s] 
100% 112M/112M [00:01<00:00, 86.5MB/s]


In [4]:
from torch_snippets import *
from PIL import Image
import cv2
import numpy as np
import glob
IMAGE_ROOT = '/content/data/training_images'
DF_RAW = df = pd.read_csv('/content/data/train_solution_bounding_boxes (1).csv')
H, W , _ =  np.array(Image.open('/content/data/training_images/vid_4_28320.jpg').convert('RGB')).shape

In [5]:
df.head()

Unnamed: 0,image,xmin,ymin,xmax,ymax
0,vid_4_1000.jpg,281.259045,187.035071,327.727931,223.225547
1,vid_4_10000.jpg,15.163531,187.035071,120.329957,236.43018
2,vid_4_10040.jpg,239.192475,176.764801,361.968162,236.43018
3,vid_4_10020.jpg,496.483358,172.363256,630.02026,231.539575
4,vid_4_10060.jpg,16.63097,186.54601,132.558611,238.386422


In [6]:
df[['xmin' , 'ymin' , 'xmax' , 'ymax']]  /=  [W , H , W , H] 
df = df.rename(columns={'image' : 'ImageID'})

In [7]:
label2target = {'car' : 1}
label2target['background'] = 0
target2label = {t:l for l,t in label2target.items()}
background_class = label2target['background']
num_classes = len(label2target)

In [8]:
label2target

{'car': 1, 'background': 0}

In [9]:
target2label

{1: 'car', 0: 'background'}

In [10]:
background_class

0

In [11]:
def preprocess_image(img):
  img = torch.tensor(img).permute(2,0,1)
  return img.to(device).float()

In [12]:
class OpenDataset(torch.utils.data.Dataset):
    w, h = 224, 224
    def __init__(self, df, image_dir=IMAGE_ROOT):
        self.image_dir = image_dir
        self.files = glob.glob(self.image_dir+'/*')
        self.df = df
        self.image_infos = df.ImageID.unique()
    def __getitem__(self, ix):
        # load images and masks
        image_id = self.image_infos[ix]
        img_path = find(image_id, self.files)
        img = Image.open(img_path).convert("RGB")
        img = np.array(img.resize((self.w, self.h), resample=Image.BILINEAR))/255.
        data = df[df['ImageID'] == image_id]
        labels = ['car'] * len(data)
        data = data[['xmin','ymin','xmax','ymax']].values
        data[:,[0,2]] *= self.w
        data[:,[1,3]] *= self.h
        boxes = data.astype(np.uint32).tolist() # convert to absolute coordinates
        # torch FRCNN expects ground truths as a dictionary of tensors
        target = {}
        target["boxes"] = torch.Tensor(boxes).float()
        target["labels"] = torch.Tensor([label2target[i] for i in labels]).long()
        img = preprocess_image(img)
        return img, target
    def collate_fn(self, batch):
        return tuple(zip(*batch)) 

    def __len__(self):
        return len(self.image_infos)
     

In [13]:
from sklearn.model_selection import train_test_split
trn_ids, val_ids = train_test_split(df.ImageID.unique(), test_size=0.1, random_state=99)
trn_df, val_df = df[df['ImageID'].isin(trn_ids)], df[df['ImageID'].isin(val_ids)]
len(trn_df), len(val_df)

train_ds = OpenDataset(trn_df)
test_ds = OpenDataset(val_df)

train_loader = DataLoader(train_ds, batch_size=4, collate_fn=train_ds.collate_fn, drop_last=True)
test_loader = DataLoader(test_ds, batch_size=4, collate_fn=test_ds.collate_fn, drop_last=True)

In [14]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [15]:
def get_model():
  model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained = True)
  in_features = model.roi_heads.box_predictor.cls_score.in_features
  model.roi_heads.box_predictor = FastRCNNPredictor(in_features # number of input channels
                                                    , num_classes)
  return model

In [16]:
model = get_model()

Downloading: "https://download.pytorch.org/models/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth" to /root/.cache/torch/hub/checkpoints/fasterrcnn_resnet50_fpn_coco-258fb6c6.pth


  0%|          | 0.00/160M [00:00<?, ?B/s]

In [17]:
def train_batch(inputs, model, optimizer):
    model.train()
    input, targets = inputs
    input = list(image.to(device) for image in input)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
    optimizer.zero_grad()
    losses = model(input, targets)
    loss = sum(loss for loss in losses.values())
    loss.backward()
    optimizer.step()
    return loss, losses

@torch.no_grad() # this will disable gradient computation in the function below
def validate_batch(inputs, model):
    model.train() # to obtain the losses, model needs to be in train mode only. # #Note that here we are not defining the model's forward method 
#and hence need to work per the way the model class is defined
    input, targets = inputs
    input = list(image.to(device) for image in input)
    targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

    optimizer.zero_grad()
    losses = model(input, targets)
    loss = sum(loss for loss in losses.values())
    return loss, losses

In [18]:
model = get_model().to(device)
optimizer = torch.optim.SGD(model.parameters(), lr=0.005,
                            momentum=0.9, weight_decay=0.0005)
n_epochs = 14
log = Report(n_epochs)

In [19]:
for epoch in range(n_epochs):
  _n = len(train_loader)
  for ix , inputs in enumerate(train_loader):
    loss , losses = train_batch(inputs , model , optimizer)
    loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = \
            [losses[k] for k in ['loss_classifier','loss_box_reg','loss_objectness','loss_rpn_box_reg']]
    pos = (epoch + (ix+1)/_n)
    log.record(pos, trn_loss=loss.item(), trn_loc_loss=loc_loss.item(), 
                   trn_regr_loss=regr_loss.item(), trn_objectness_loss=loss_objectness.item(),
                   trn_rpn_box_reg_loss=loss_rpn_box_reg.item(), end='\r')
    

  for ix,inputs in enumerate(test_loader):
        loss, losses = validate_batch(inputs, model)
        loc_loss, regr_loss, loss_objectness, loss_rpn_box_reg = \
          [losses[k] for k in ['loss_classifier','loss_box_reg','loss_objectness','loss_rpn_box_reg']]
        pos = (epoch + (ix+1)/_n)
        log.record(pos, val_loss=loss.item(), val_loc_loss=loc_loss.item(), 
                  val_regr_loss=regr_loss.item(), val_objectness_loss=loss_objectness.item(),
                  val_rpn_box_reg_loss=loss_rpn_box_reg.item(), end='\r')
  if (epoch+1)%(n_epochs//5)==0: log.report_avgs(epoch+1)

EPOCH: 2.000  trn_objectness_loss: 0.001  trn_loss: 0.149  val_regr_loss: 0.122  trn_rpn_box_reg_loss: 0.002  trn_regr_loss: 0.109  trn_loc_loss: 0.037  val_loc_loss: 0.042  val_loss: 0.167  val_rpn_box_reg_loss: 0.002  val_objectness_loss: 0.001  (142.00s - 851.98s remaining)
EPOCH: 4.000  trn_objectness_loss: 0.000  trn_loss: 0.107  val_regr_loss: 0.108  trn_rpn_box_reg_loss: 0.001  trn_regr_loss: 0.078  trn_loc_loss: 0.028  val_loc_loss: 0.044  val_loss: 0.155  val_rpn_box_reg_loss: 0.002  val_objectness_loss: 0.001  (277.34s - 693.35s remaining)
EPOCH: 6.000  trn_objectness_loss: 0.000  trn_loss: 0.107  val_regr_loss: 0.115  trn_rpn_box_reg_loss: 0.001  trn_regr_loss: 0.081  trn_loc_loss: 0.025  val_loc_loss: 0.043  val_loss: 0.161  val_rpn_box_reg_loss: 0.003  val_objectness_loss: 0.001  (412.59s - 550.13s remaining)
EPOCH: 8.000  trn_objectness_loss: 0.000  trn_loss: 0.095  val_regr_loss: 0.123  trn_rpn_box_reg_loss: 0.001  trn_regr_loss: 0.069  trn_loc_loss: 0.024  val_loc_loss:

In [20]:
def video_prediction(detection_threshold):
  RESIZE_TO = (224 , 224)
  RESIZE_OUT = (W , H)

  model.eval()
  videoCapture = cv2.VideoCapture(f'/content/Cars-1900-Short-small.m4v')
  fps = videoCapture.get(cv2.CAP_PROP_FPS)

  videoWriter = cv2.VideoWriter(f'/content/Cars-1900-Short-small-output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), 30, 
                      RESIZE_OUT)
  success, frame = videoCapture.read()

  while success:
    orig_frame = cv2.resize(frame , RESIZE_TO).copy()
    frame = cv2.cvtColor(orig_frame, cv2.COLOR_BGR2RGB).astype(np.float32)
    frame = frame/255.
    frame = torch.tensor(np.expand_dims(frame , 0)).permute(0 , 3 , 1 ,2).float().to(device)
    outputs = model(frame)
    outputs = [{k: v.to('cpu') for k, v in t.items()} for t in outputs]

    if len(outputs[0]['boxes']) != 0:
          boxes = outputs[0]['boxes'].detach().numpy()
          scores = outputs[0]['scores'].detach().numpy()
          # filter out boxes according to `detection_threshold`
          boxes = boxes[scores >= detection_threshold].astype(np.int32)
          classes = outputs[0]['labels'][scores >= detection_threshold]

          draw_boxes = boxes.copy()
          # get all the predicited class names
          pred_classes = [target2label[i] for i in classes.numpy()]
          # draw the bounding boxes and write the class name on top of it
          for j, box in enumerate(draw_boxes):
              class_name = pred_classes[j]
              cv2.rectangle(orig_frame,
                          (int(box[0]), int(box[1])),
                          (int(box[2]), int(box[3])),
                          color = [0 , 0 , 0] , thickness=1)
              cv2.putText(orig_frame , class_name ,
                          (int(box[0]) , int(box[1]-5)),
                          cv2.FONT_HERSHEY_SIMPLEX , 
                          0.3 ,
                          color =[0 , 0 , 0],
                          thickness=1,
                          lineType=cv2.LINE_AA)

    videoWriter.write(cv2.resize(orig_frame,RESIZE_OUT))
    success, frame = videoCapture.read()

  print('TEST PREDICTIONS COMPLETE')
  cv2.destroyAllWindows()

In [21]:
video_prediction(0.8)

In [22]:
!cp '/content/Cars-1900-Short-small-output.mp4' '/content/drive/MyDrive/object detection/'