# Install detectron2

In [0]:
# install dependencies: (use cu101 because colab has CUDA 10.1)
!pip install -U torch==1.4 torchvision==0.5 -f https://download.pytorch.org/whl/cu101/torch_stable.html 
!pip install cython pyyaml==5.1
!pip install -U 'git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI'
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab

# install detectron2:
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html

Looking in links: https://download.pytorch.org/whl/cu101/torch_stable.html
Requirement already up-to-date: torch==1.4 in /usr/local/lib/python3.6/dist-packages (1.4.0)
Requirement already up-to-date: torchvision==0.5 in /usr/local/lib/python3.6/dist-packages (0.5.0)
Collecting git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI
  Cloning https://github.com/cocodataset/cocoapi.git to /tmp/pip-req-build-zmbmg296
  Running command git clone -q https://github.com/cocodataset/cocoapi.git /tmp/pip-req-build-zmbmg296
Building wheels for collected packages: pycocotools
  Building wheel for pycocotools (setup.py) ... [?25l[?25hdone
  Created wheel for pycocotools: filename=pycocotools-2.0-cp36-cp36m-linux_x86_64.whl size=275269 sha256=4b772492c888105851cb6476fe84d84c675e49195d8b3b107fc30f73d7f65c15
  Stored in directory: /tmp/pip-ephem-wheel-cache-oknpkk_7/wheels/90/51/41/646daf401c3bc408ff10de34ec76587a9b3ebfac8d21ca5c3a
Successfully built pycocotools
Installing collected p

1.4.0 True
gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/index.html


# download the data

In [0]:
!rm -rf greenthumbs
!git clone https://github.com/kierangilliam/greenthumbs.git

Cloning into 'greenthumbs'...
remote: Enumerating objects: 250, done.[K
remote: Counting objects: 100% (250/250), done.[K
remote: Compressing objects: 100% (124/124), done.[K
remote: Total 3116 (delta 126), reused 245 (delta 125), pack-reused 2866[K
Receiving objects: 100% (3116/3116), 497.29 MiB | 39.87 MiB/s, done.
Resolving deltas: 100% (1457/1457), done.
Checking out files: 100% (2759/2759), done.


In [0]:
"""
Overall goal
1. Hyper parameter selection - Pick best model hyperparameters from train test 
  splits of 65%, 75%, 85% on two versions of data: augmented and not augmented
2. Find the best data augmentation method with those given hyperparameters
  that boosts model accuracry
3. Test if false positives are removed by adding an additional check after
  our detectron model gives us better results
"""

'\nOverall goal\n1. Hyper parameter selection - Pick best model hyperparameters from train test \n  splits of 65%, 75%, 85% with the following \n2. Find the best data augmentation method with those given hyperparameters\n  that boosts model accuracry\n3. Test if false positives are removed by adding an additional check after\n  our detectron model gives us better results\n'

In [0]:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                                      Imports
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
import detectron2
# detectron2 utilities
from detectron2.utils.logger import setup_logger
setup_logger()
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
from detectron2.engine import DefaultTrainer
from detectron2.data.datasets import register_coco_instances, load_coco_json
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

# common libs
import numpy as np
import cv2
import random
import os
import json

# colab
from google.colab.patches import cv2_imshow
from google.colab import drive



"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                                      Constants 
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
BASE_DIR = 'greenthumbs/data'
NUM_CLASSES = 15
DRIVE = '/content/drive/My Drive/Green Thumbs'
CLASSES = ['tomato_fruit_unripe', 'tomato_fruit', 'tomato_seedling', 'tomato_young_plant', 'tomato_flower', 'bell_pepper_fruit', 'bell_pepper_young_plant', 'bell_pepper_flower', 'bell_pepper_fruit_unripe', 'bell_pepper_seedling', 'cucumber_flower', 'cucumber_plant', 'cucumber_seedling', 'cucumber_fruit', 'cucumber_fruit_unripe']


"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                               Hyper parameter selection
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
# Versions contain different amounts of data
# or different data augmentation methods
versions = ["v01", "v02"]
test_train_splits = ["65%", "75%", "85%"]
iters = [4001]
lrs = [.002 , .001, .0005]
batch_sizes_per_img = [64, 128] 
models = [ 
  "faster_rcnn_R_101_FPN_3x.yaml",   
  "faster_rcnn_R_50_C4_3x.yaml",
  "faster_rcnn_X_101_32x8d_FPN_3x.yaml",
]



"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
                               Lib
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
"""
ds: 65%, 75%, 85%... Describes the dataset split
"""
def get_train_test_coco_files(v, ds):
  train_coco_file = BASE_DIR + f'/{v}/train_{ds}_coco.json'
  test_coco_file  = BASE_DIR + f'/{v}/test_{ds}_coco.json'
  return train_coco_file, test_coco_file


def save_results(name, result):
  print('Saving results...')
  filename = f'{DRIVE}/{name}.json'
  contents = json.dumps(result)

  with open(filename, 'w') as f:
    f.write(contents)

  print(f'Saved {filename}')


def save_test_imgs(instance, cfg, version, ds, predictor):
  print('Saving images...')

  img_dir  = BASE_DIR + f'/{version}/ds/'
  _, test_coco_file = get_train_test_coco_files(version, ds)

  with open(test_coco_file, 'r') as f:
    data = json.loads(f.read())

  i = 1
  for d in random.sample(data['images'], 2):
      img = cv2.imread(img_dir + d["file_name"])

      outputs = predictor(img)

      v = Visualizer(
          img[:,:,::-1], 
          MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), 
          scale=0.5
      )
      v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
      
      output = f'{DRIVE}/out_imgs/{version}/{instance}___{i}.png'
      cv2.imwrite(output, v.get_image()[:, :, ::-1])
      # cv2_imshow(v.get_image()[:, :, ::-1])
      print(f'Saved {i} image {output}')
      
      i += 1

# Train & Test


In [0]:
def train(instance, v, ds, model, iterations, lr, batch_size, dry_run=False):
  img_dir  = BASE_DIR + f'/{v}/ds/'
  model_path = f'COCO-Detection/{model}'

  train_instance = f'train/{instance}'
  test_instance  = f'test/{instance}'

  train_coco_file, test_coco_file = get_train_test_coco_files(v, ds)

  register_coco_instances(train_instance, {}, train_coco_file, img_dir)
  register_coco_instances(test_instance, {}, test_coco_file, img_dir)
  MetadataCatalog.get(train_instance).set(thing_classes=CLASSES)
  MetadataCatalog.get(test_instance).set(thing_classes=CLASSES)

  cfg = get_cfg()
  cfg.merge_from_file(model_zoo.get_config_file(model_path))
  cfg.DATASETS.TRAIN = (train_instance,)
  cfg.DATASETS.TEST = ()
  cfg.MODEL.ROI_HEADS.NUM_CLASSES = NUM_CLASSES
  cfg.DATALOADER.NUM_WORKERS = 2
  cfg.SOLVER.IMS_PER_BATCH = 2
  cfg.OUTPUT_DIR = f'./outputs/{instance}'  

  # Hyperparameter seleciton  
  cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_path) # TODO What is checkpoint url?
  cfg.SOLVER.MAX_ITER = iterations
  cfg.SOLVER.BASE_LR = lr
  cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = batch_size

  os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
  trainer = DefaultTrainer(cfg) 
  trainer.resume_or_load(resume=False)

  if not dry_run:
    print(f'**********************************************')
    print(f'\t\t Begin train {instance}'                 )
    print(f'**********************************************')
    trainer.train()

  return cfg, trainer, train_instance, test_instance



def test(instance, cfg, trainer, test_instance):  
  print(f'**********************************************')
  print(f'\t\t Test {instance}'                          )
  print(f'**********************************************')
  cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
  cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.75 # threshold of 75% certainty at which we’ll consider the predictions as correct
  cfg.DATASETS.TEST = (test_instance, )  
  predictor = DefaultPredictor(cfg)
  evaluator = COCOEvaluator(test_instance, cfg, False, output_dir=cfg.OUTPUT_DIR)
  test_loader = build_detection_test_loader(cfg, test_instance)
  result = inference_on_dataset(trainer.model, test_loader, evaluator)  
  
  return result, predictor

from datetime import datetime
now = datetime.now()
date_time = now.strftime("%m-%d-%Y_%H:%M")


for v in versions:
  for ds in test_train_splits:
    for model in models:

      results = {}

      for iterations in iters:

        results[iterations] = {}

        for lr in lrs:

          results[iterations][lr] = {}

          for batch_size in batch_sizes_per_img:
            
            instance = f'{v}_{ds}_{model}_{iterations}_{lr}_{batch_size}'

            cfg, trainer, train_instance, test_instance = train(
                instance, v, ds, model, iterations, lr, batch_size
            )

            result, predictor = test(instance, cfg, trainer, test_instance)

            save_test_imgs(instance, cfg, v, ds, predictor)

            results[iterations][lr][batch_size] = result

          save_results(f'{v}_{ds}_{model}_{date_time}', results)

                       

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
      (1): BottleneckBlock(
        (conv1): Conv2d(
          256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv2): Conv2d(
          64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv3): Conv2d(
          64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
        )
      )
      (2): BottleneckBlock(
        (conv1): Conv2d(
          256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv2): Conv2d(
          64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
        (conv3): Con