<a href="https://colab.research.google.com/github/stephenjkaplan/snow-grooming-object-detection/blob/master/Snow_Grooming_Object_Detection_with_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Setup

##### Download additional utility files.

Torchvision

In [1]:
%%shell

# Download TorchVision repo to use some files from
# references/detection
git clone https://github.com/pytorch/vision.git
cd vision
git checkout v0.3.0

cp references/detection/utils.py ../
cp references/detection/transforms.py ../
cp references/detection/coco_eval.py ../
cp references/detection/engine.py ../
cp references/detection/coco_utils.py ../

fatal: destination path 'vision' already exists and is not an empty directory.
HEAD is now at be37608 version check against PyTorch's CUDA version




Get YouTube Downloader

In [2]:
!git clone https://github.com/ankandrew/YT-Downloader-Trimmer.git ytdownloader    # for downloading and trimming videos
!pip install -r /content/ytdownloader/requirements.txt            

fatal: destination path 'ytdownloader' already exists and is not an empty directory.


##### Imports 

In [3]:
root_dir = '/content/drive/My Drive/Colab Notebooks/snow_grooming/'

In [4]:
import os
import sys
import time
from datetime import datetime
from google.colab import drive

import numpy as np
from matplotlib import style
import matplotlib.pyplot as plt
style.use('fivethirtyeight')

from PIL import Image
from ytdownloader.downloader import Downloader

import cv2
import utils
import torch
import torchvision

# custom modules
import sys
root_dir = '/content/drive/My Drive/Colab Notebooks/snow_grooming/'
sys.path.append(root_dir)

from utilities import get_object_detection_model, train_one_epoch, \
  get_validation_loss, evaluate
from dataset import GoogleOpenImageDataset

##### Make my Google Drive files available.

In [5]:
drive.mount("/content/drive")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##### Useful global variables.

In [6]:
obj_class_labels = ['tree', 'person', 'street light']

In [7]:
dataset = GoogleOpenImageDataset(root_dir, obj_class_labels, max_images_per_class=2000)

In [8]:
dataset[40]

(tensor([[[0.0078, 0.0078, 0.0039,  ..., 0.1490, 0.1529, 0.1608],
          [0.0157, 0.0118, 0.0118,  ..., 0.1451, 0.1569, 0.1647],
          [0.0078, 0.0078, 0.0078,  ..., 0.1529, 0.1608, 0.1686],
          ...,
          [0.5882, 0.5686, 0.5765,  ..., 0.6431, 0.6431, 0.6392],
          [0.5686, 0.5725, 0.5882,  ..., 0.6392, 0.6353, 0.6314],
          [0.5765, 0.5804, 0.5961,  ..., 0.6235, 0.6196, 0.6157]],
 
         [[0.3294, 0.3294, 0.3255,  ..., 0.4039, 0.4039, 0.4039],
          [0.3373, 0.3333, 0.3333,  ..., 0.4000, 0.4078, 0.4078],
          [0.3294, 0.3294, 0.3294,  ..., 0.4078, 0.4118, 0.4118],
          ...,
          [0.6118, 0.5922, 0.6000,  ..., 0.6549, 0.6549, 0.6510],
          [0.5922, 0.5961, 0.6118,  ..., 0.6510, 0.6471, 0.6431],
          [0.6000, 0.6039, 0.6196,  ..., 0.6353, 0.6314, 0.6275]],
 
         [[0.4784, 0.4784, 0.4824,  ..., 0.5529, 0.5451, 0.5490],
          [0.4863, 0.4824, 0.4902,  ..., 0.5490, 0.5490, 0.5529],
          [0.4784, 0.4863, 0.4863,  ...,

### Modeling & Training

##### Create training/validation/test datasets.

In [9]:
# use our dataset and defined transformations
dataset_train = GoogleOpenImageDataset(root_dir, obj_class_labels, max_images_per_class=2000, train=True)
dataset_val = GoogleOpenImageDataset(root_dir, obj_class_labels, max_images_per_class=2000)
dataset_test = GoogleOpenImageDataset(root_dir, obj_class_labels, max_images_per_class=2000)

In [10]:
# define train/val/test split (80/20 train_val/test and then 80/20 train/val)
train_percent = 0.64
val_percent = 0.16
test_percent = 0.20
total_size = len(dataset_train)
train_size = int(train_percent*total_size)
val_size = int(val_percent*total_size)
test_size = total_size - train_size - val_size

splits = [train_size, val_size, test_size]

In [11]:
# split the dataset in train, val and test set
torch.manual_seed(1)
indices = torch.randperm(total_size).tolist()
train_idx, val_idx, test_idx = torch.utils.data.random_split(indices, splits)

# make subsets based on train/val/test splits
dataset_train = torch.utils.data.Subset(dataset_train, train_idx)
dataset_val = torch.utils.data.Subset(dataset_val, val_idx)
dataset_test = torch.utils.data.Subset(dataset_test, test_idx)

# TODO IMPORT UTILS USING STUFF IN OTHER NOTEBOOK, LOOKUP WHAT DATALOADER DOES
# define training and validation data loaders
data_loader_train = torch.utils.data.DataLoader(
    dataset_train, batch_size=2, shuffle=True, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_val = torch.utils.data.DataLoader(
    dataset_val, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

data_loader_test = torch.utils.data.DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=utils.collate_fn)

##### Define hyperparameters.


In [12]:
# optimizer
learning_rate = 0.005
momentum = 0.9
weight_decay = 0.0005

# learning rate schedule
step_size = 3   # learning rate will step every __ epochs
gamma = 0.1    # learning rate will be multiplied by gamma every step 

num_epochs = 10

trainable_layers = 3

##### Train Neural Network

In [13]:
# train on the GPU or on the CPU, if a GPU is not available
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

In [None]:
# Load Faster R-CNN Model pretrained on COCO and replace its classifier with a new one that has `num_classes`.
model = get_object_detection_model(len(obj_class_labels) + 1, trainable_backbone_layers=trainable_layers) # extra class for background

# move model to the right device
model.to(device)

# create optimizer that will only train final layers
optimizer = torch.optim.SGD(
    params=[p for p in model.parameters() if p.requires_grad], 
    lr=learning_rate, 
    momentum=momentum,
    weight_decay=weight_decay
)

# and a learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size,
                                               gamma=gamma)

training_losses = []
validation_losses = []
for epoch in range(num_epochs):
    # train for one epoch, printing every 10 iterations
    metric_logger, loss_tracker = train_one_epoch(model, optimizer, 
                                                  data_loader_train, device, 
                                                  epoch, print_freq=100)
    
    # save losses for plotting later
    training_losses.append(np.mean(loss_tracker))
    validation_loss = get_validation_loss(model, data_loader_val, device)
    validation_losses.append(np.mean(validation_loss))
    
    # update the learning rate
    lr_scheduler.step()
    # evaluate on the test dataset
    evaluate(model, data_loader_val, device=device)

# plot loss curve
plt.plot(training_losses)
plt.plot(validation_losses)
plt.title(f'Loss Curve (Learning Rate = {learning_rate})')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Training Loss', 'Validation Loss'])

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /pytorch/torch/csrc/utils/python_arg_parser.cpp:766.)
  keep = keep.nonzero().squeeze(1)


Epoch: [0]  [   0/1920]  eta: 0:27:31  lr: 0.000010  loss: 1.5411 (1.5411)  loss_classifier: 1.1804 (1.1804)  loss_box_reg: 0.2661 (0.2661)  loss_objectness: 0.0758 (0.0758)  loss_rpn_box_reg: 0.0188 (0.0188)  time: 0.8603  data: 0.3723  max mem: 3104
Epoch: [0]  [ 100/1920]  eta: 0:06:41  lr: 0.000509  loss: 0.4545 (0.8029)  loss_classifier: 0.1045 (0.3267)  loss_box_reg: 0.0608 (0.0831)  loss_objectness: 0.1504 (0.3311)  loss_rpn_box_reg: 0.0326 (0.0619)  time: 0.2137  data: 0.0073  max mem: 4037
Epoch: [0]  [ 200/1920]  eta: 0:06:12  lr: 0.001009  loss: 0.2497 (0.5981)  loss_classifier: 0.0748 (0.2181)  loss_box_reg: 0.0469 (0.0784)  loss_objectness: 0.0919 (0.2480)  loss_rpn_box_reg: 0.0245 (0.0537)  time: 0.2106  data: 0.0068  max mem: 4037
Epoch: [0]  [ 300/1920]  eta: 0:05:48  lr: 0.001508  loss: 0.2666 (0.5138)  loss_classifier: 0.0805 (0.1806)  loss_box_reg: 0.0557 (0.0761)  loss_objectness: 0.0658 (0.2054)  loss_rpn_box_reg: 0.0290 (0.0517)  time: 0.2160  data: 0.0068  max me

##### Evaluate on Test Set.

In [None]:
evaluate(model, data_loader_test, device=device)

In [None]:
 assert False

#### Persist model.

In [None]:
now = datetime.now()
datetime_str = now.strftime("%m-%d-%Y %H%M")
torch.save(model, f'{root_dir}models/model_{datetime_str}')

### Prediction & Visualization

In [None]:
# 09-06-2020 0502 | training batch size = 2, learning rate = 0.001, min loss ~ 0.13, AP < 0.1
# 09-08-2020 2041 | more data/slightly changed classes, learning rate = 0.01, 
# 09-09-2020 1658 | just tree, lr = 0.001
# didn't save the next one, but it was lr = 0.01, did not improve perf
# 09-09-2020 2305  | same as the last one I saved, pretty much. hadn't been evluating the model properly so redid it. 
# 09-10-2020 0437 | tree and person
# 09-10-2020 1725 | tree, person, building, street light, 1000 images each
# 09-11-2020 0039 | tree, person, street light, ~4000 images each , SGD optimizer, 60/20/20 split
# 09-12-2020 1801 | tree, person, stree light, ~2000 images each, 64/16/20 split, added all bounding boxes

model = torch.load('/content/drive/My Drive/Colab Notebooks/snow_grooming/models/model_09-11-2020 0039')

##### Pick an image from the test set.

In [None]:
test_img_idx = 400

In [None]:
# pick one image from the test set
img, _ = dataset_test[test_img_idx]

Image.fromarray(img.mul(255).permute(1, 2, 0).byte().numpy())

##### Make a boundary box prediction.

In [None]:
def make_boundary_box_prediction(image_no_box):
# put the model in evaluation mode
  model.eval()
  with torch.no_grad():
      prediction = model([image_no_box.to(device)])

  return prediction

In [None]:
predict_example = make_boundary_box_prediction(img)
predict_example

##### Define function for drawing boundary box.

In [None]:
class_lookup_table = {
    1: (obj_class_labels[0], (255, 0, 0)),
    2: (obj_class_labels[1], (0, 255, 0)),
    3: (obj_class_labels[2], (255, 255, 0)),
}

def draw_all_boundary_boxes(image_path, prediction, threshold=0.5):
    # get boundary boxes, scores, and labels from prediction
    boxes = prediction[0]['boxes'].tolist()
    scores = prediction[0]['scores'].tolist()
    class_labels = prediction[0]['labels'].tolist()    

    image = cv2.imread(image_path)
    # im is a PIL Image object
    #im_arr = np.asarray(image)
    for box, score, label in zip(boxes, scores, class_labels):
      # convert rgb array to opencv's bgr format
      #image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
      if score < threshold:
        continue
      x1 = int(box[0])
      y1 = int(box[3])
      x2 = int(box[2])
      y2 = int(box[1])
      # pts1 and pts2 are the upper left and bottom right coordinates of the rectangle
      cv2.rectangle(image, (x1, y1), (x2, y2), class_lookup_table[label][1], 3)
      obj_label = 'pole' if class_lookup_table[label][0] == 'street light' else class_lookup_table[label][0]
      cv2.putText(image, obj_label, (x1, y2-10), cv2.FONT_HERSHEY_SIMPLEX, 0.8, class_lookup_table[label][1], 2)
    #im_arr = cv2.cvtColor(im_arr_bgr, cv2.COLOR_BGR2RGB)
    #Image.fromarray(image)
    return image


In [None]:
test_img_idx_abs = test_idx[test_img_idx]
path = dataset.imgs[test_img_idx_abs]

In [None]:
image = draw_all_boundary_boxes(path, predict_example)
Image.fromarray(image)

### Demo

Demo on ski resort video footage.

##### Download video from YouTube.



In [None]:
video_url = "https://www.youtube.com/watch?v=3tg_DOaUZ4Y"
#video_url = "https://www.youtube.com/watch?v=LKBQ0J-RUF8"
video_quality = 1080 
only_video = True 
do_trim = False 
start = "00:28:16" 
end = "00:28:46"
output_file = 'pb600_winchcat_full.mp4'
#output_file = 'groomer.mp4'

In [None]:
yt_d = Downloader(video_url, output_file, quality=video_quality, only_vid=only_video)
yt_d.download()

if do_trim:
  yt_d.trim(start, end, delete_original=False)
  os.rename('downloaded_vid_trimmed.mp4', 'downloaded_vid.mp4')

In [None]:
for f in os.listdir():
  if 'frame' in f:
    os.remove(f)

##### Split video into frames.

In [None]:
cap = cv2.VideoCapture('pb600_winchcat_full.mp4')
#cap = cv2.VideoCapture('groomer.mp4')
i=0
while(cap.isOpened()):
    ret, frame = cap.read()
    if ret == False:
        break
    cv2.imwrite('frame'+str(i)+'.jpg',frame)
    i+=1

cap.release()
cv2.destroyAllWindows()
print(f'{i + 1} Frames Created.')

##### Draw boundary boxes on each frame.

In [None]:
for idx in range(i):
  if idx % 100 == 0:
    print(f'Making boundary box predictions ({idx}/{i})...')

  # load image
  img_frame = Image.open(f'frame{idx}.jpg').convert("RGB")
  transforms = get_transform(train=False)
  # make prediction
  prediction = make_boundary_box_prediction(transforms(img_frame))

  # draw box
  img_frame = draw_all_boundary_boxes(f'frame{idx}.jpg', prediction) 

  # resave image
  cv2.imwrite(f'frame{idx}.jpg', img_frame)

print('Done!')

In [None]:
Image.open('frame6100.jpg').convert("RGB")

In [None]:
def convert_frames_to_video(num_frames,path_out,fps):
    frame_array = []
    files = [f'frame{idx}.jpg' for idx in range(num_frames)]
 
    #for sorting the file names properly
    #files.sort(key = lambda x: int(x[5:-4]))
    for f, filename in enumerate(files):
        if f % 100 == 0:
            print(f'Processing frame ({f}/{len(files)})...')
        try:
          #reading each files
          img = cv2.imread(filename)
          height, width, layers = img.shape
          size = (width,height)
          #inserting the frames into an image array
          frame_array.append(img)
        except AttributeError:
          continue
 
    out = cv2.VideoWriter(path_out,cv2.VideoWriter_fourcc(*'MJPG'), fps, size)
 
    for i in range(len(frame_array)):
        # writing to a image array
        out.write(frame_array[i])
    out.release()

convert_frames_to_video(i + 1, root_dir + '/groomer_boxes.mp4', fps=30)