In [2]:
import os
import numpy as np
import json
import csv
import pandas as pd
import random

In [3]:
# You may need to restart your runtime prior to this, to let your installation take effect
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import random
# from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2.structures import BoxMode
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import DatasetCatalog, MetadataCatalog

In [4]:
def get_animals_dicts(img_dir):
        
    csv_file = os.path.join(img_dir, "2020-02-14.csv")
    imgs_anns = pd.read_csv(csv_file, sep= ",", header=0)
    classes = list(set(imgs_anns['species']))
    
    dataset_dicts = []

    imgs_anns = imgs_anns.sample(frac=1)
    files = list(set(imgs_anns['file']))
    random.shuffle(files)
    dataset_dicts = []
    
    idx = 0
    
    for file in files:
        data = imgs_anns[imgs_anns['file'] == file]

        record = {}

        record["file_name"] = file
        record["image_id"] = idx
        idx += 1
        record["height"] = int(data['height'][:1])
        record["width"] = int(data['width'][:1])


        objs = []
        for index, row in data.iterrows():
            obj = {
                "bbox": [int(row['x']), int(row['y']), int(row['w']), int(row['h'])],
                "bbox_mode": BoxMode.XYXY_ABS,
                "category_id": classes.index(row['species']),
                "iscrowd": 0
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

In [5]:
for d in ["training", "val"]:
    DatasetCatalog.register("animals_" + d, lambda d=d: get_animals_dicts("animals_" + d + "/"))
    csv_file = os.path.join("animals_" + d, "2020-02-14.csv")
    imgs_anns = pd.read_csv(csv_file, sep= ",", header=0)
    classes = list(set(imgs_anns['species']))
    MetadataCatalog.get("animals_" + d).set(thing_classes=classes)
animal_metadata = MetadataCatalog.get("animals_training")

In [6]:
import random
from detectron2.utils.visualizer import Visualizer

dataset_dicts = get_animals_dicts('animals_training')

for d in random.sample(dataset_dicts, 3):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=animal_metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    cv2.imshow('', vis.get_image()[:, :, ::-1])
    cv2.waitKey(0) # waits until a key is pressed
    cv2.destroyAllWindows() # destroys the window showing image

In [7]:
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("animals_training",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 1
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Detection/faster_rcnn_X_101_32x8d_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 1000    # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(MetadataCatalog.get("animals_training").thing_classes)  # only has one class (ballon)

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

[32m[02/22 16:21:07 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[02/22 16:21:08 d2.data.build]: [0mRemoved 0 images with no usable annotations. 998 images left.
[32m[02/22 16:21:08 d2.data.build]: [0mDistribution of instances among all 2 categories:
[36m|  category  | #instances   |  category  | #instances   |
|:----------:|:-------------|:----------:|:-------------|
|  elephant  | 500          | chimpanzee | 500          |
|            |              |            |              |
|   total    | 1000         |            |              |[0m
[32m[02/22 16:21:08 d2.data.detection_utils]: [0mTransformGens used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[02/22 16:21:08 d2.data.build]: [0mUsing training sampler TrainingSampler


  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
'roi_heads.box_predictor.cls_score.weight' has shape (81, 1024) in the checkpoint but (3, 1024) in the model! Skipped.
'roi_heads.box_predictor.cls_score.bias' has shape (81,) in the checkpoint but (3,) in the model! Skipped.
'roi_heads.box_predictor.bbox_pred.weight' has shape (320, 1024) in the checkpoint but (8, 1024) in the model! Skipped.
'roi_heads.box_predictor.bbox_pred.bias' has shape (320,) in the checkpoint but (8,) in the model! Skipped.


[32m[02/22 16:21:09 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[02/22 16:21:27 d2.utils.events]: [0meta: 0:04:06  iter: 19  total_loss: 1.570  loss_cls: 1.006  loss_box_reg: 0.544  loss_rpn_cls: 0.002  loss_rpn_loc: 0.003  time: 0.8802  data_time: 0.0122  lr: 0.000005  max_mem: 6051M
[32m[02/22 16:21:46 d2.utils.events]: [0meta: 0:03:55  iter: 39  total_loss: 1.387  loss_cls: 0.903  loss_box_reg: 0.492  loss_rpn_cls: 0.009  loss_rpn_loc: 0.008  time: 0.9130  data_time: 0.0039  lr: 0.000010  max_mem: 6051M
[32m[02/22 16:22:04 d2.utils.events]: [0meta: 0:03:38  iter: 59  total_loss: 1.253  loss_cls: 0.722  loss_box_reg: 0.535  loss_rpn_cls: 0.006  loss_rpn_loc: 0.006  time: 0.9085  data_time: 0.0035  lr: 0.000015  max_mem: 6051M
[32m[02/22 16:22:21 d2.utils.events]: [0meta: 0:03:18  iter: 79  total_loss: 1.158  loss_cls: 0.583  loss_box_reg: 0.528  loss_rpn_cls: 0.004  loss_rpn_loc: 0.005  time: 0.8991  data_time: 0.0034  lr: 0.000020  max_mem: 6051M
[32m

In [8]:
# Look at training curves in tensorboard:
%load_ext tensorboard
%tensorboard --logdir output

In [11]:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3   # set the testing threshold for this model
cfg.DATASETS.TEST = ("animals_val", )
predictor = DefaultPredictor(cfg)

In [12]:
from detectron2.utils.visualizer import ColorMode
dataset_dicts = get_animals_dicts("animals_val")
for d in random.sample(dataset_dicts, 20):    
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    v = Visualizer(im[:, :, ::-1],
                   metadata=animal_metadata, 
                   scale=0.8   # remove the colors of unsegmented pixels
    )
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2.imshow("",v.get_image()[:, :, ::-1])
    cv2.waitKey(0) # waits until a key is pressed
    cv2.destroyAllWindows() # destroys the window showing image

In [13]:
img_dir = "animals_training/"
csv_file = os.path.join(img_dir, "2020-02-14.csv")
imgs_anns = pd.read_csv(csv_file, sep= ",", header=0)
classes = list(set(imgs_anns['species']))

dataset_dicts = []

imgs_anns = imgs_anns.sample(frac=1)
files = list(set(imgs_anns['file']))

In [19]:
random.shuffle(files)

In [20]:
files

['animals_training/ACP000dn0j_110.0.jpg',
 'animals_training/ACP000dsmz_230.0.jpg',
 'animals_training/ACP000d9yj_270.0.jpg',
 'animals_training/ACP000dt6u_350.0.jpg',
 'animals_training/ACP000d2pg_110.0.jpg',
 'animals_training/ACP000cygq_100.0.jpg',
 'animals_training/ACP000d3ne_140.0.jpg',
 'animals_training/ACP000dlsg_300.0.jpg',
 'animals_training/ACP000ceuw_300.0.jpg',
 'animals_training/ACP00028ye_120.0.jpg',
 'animals_training/ACP000drtb_270.0.jpg',
 'animals_training/ACP000bmhx_130.0.jpg',
 'animals_training/ACP000ccry_120.0.jpg',
 'animals_training/ACP000dls4_150.0.jpg',
 'animals_training/ACP000cnpq_330.0.jpg',
 'animals_training/ACP000dkrq_240.0.jpg',
 'animals_training/ACP000dl1t_270.0.jpg',
 'animals_training/ACP000d72t_90.0.jpg',
 'animals_training/ACP000dlhn_190.0.jpg',
 'animals_training/ACP000dad0_100.0.jpg',
 'animals_training/ACP000dt75_170.0.jpg',
 'animals_training/ACP000dpfm_300.0.jpg',
 'animals_training/ACP000djr7_50.0.jpg',
 'animals_training/ACP000cggc_270.0.