In [None]:
# install dependencies: 
!pip install pyyaml==5.1 'pycocotools>=2.0.1'
# !pip install imgaug==0.2.5
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.6/index.html
!gcc --version

import torch, torchvision, random, os, cv2, glob, json, pylab, time
import numpy as np
import pandas as pd
from google.colab.patches import cv2_imshow
print(torch.__version__, torch.cuda.is_available())
assert torch.__version__.startswith("1.7")

# opencv is pre-installed on colab
import matplotlib.pyplot as plt
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import skimage.io as io

# %reload_ext autoreload
# %autoreload

torch.manual_seed(123)
torch.cuda.manual_seed(123)
np.random.seed(123)
random.seed(123)

In [None]:
from detectron2.utils.logger import setup_logger
setup_logger()

from detectron2 import model_zoo
from detectron2.config import get_cfg
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
from detectron2.structures import BoxMode
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.visualizer import ColorMode

In [None]:
def extract_labelme(json_file):
    """extracts only the relevant information in the annotation files(.json) created from LabelMe  

    :params 
        - json_file: str, The path to the folder where your .json files are
    """

    f = open(json_file, "r") # open JSON file
    data = json.load(f) # return JSON file object as dict

    # extract only meaningful information for all objects in an image
    filename = data["imagePath"].split('/')[-1]
    width = int(data["imageWidth"])
    height = int(data["imageHeight"])
    obj_list = data['shapes'] # list of dicts

    return filename, width, height, obj_list


def check_annotations(dataset_name, d):
     """load annotation files(.json) created from LabelMe and create a list[dict] to be registered for Detectron2

    :params
        - dataset_name: str, The path to the folder where your .json files are
        - d: 
    """
    metadata = MetadataCatalog.get(dataset_name)

    print(d["image_id"])
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=0.5)
    out = visualizer.draw_dataset_dict(d)
    cv2_imshow(out.get_image()[:, :, ::-1])


def labelme_to_detectron(json_dir, label_type):
    """load annotation files(.json) created from LabelMe and create a list[dict] to be registered for Detectron2

    :params
        - json_dir: str, The path to the folder where your .json files are
    """

    dataset_dicts = [] # list[dict]
    dirFiles = sorted(glob.glob(os.path.join(json_dir, '*.json')))

    if label_type == 'pos' or label_type == 'behavior':
        df = pd.read_csv('{}/{}.csv'.format(json_dir, json_dir.split('/')[-1]))
    
    for json_file in dirFiles:

        filename, width, height, obj_list = extract_labelme(json_file) # extract annotation info from labelMe json file   

        obj_list = {obj["label"]:np.array(obj["points"]) for obj in obj_list} # rearrange list[dict] into dict{int: shape (8,2)}

        record = {} # dict for single image

        record["file_name"] = json_dir + '/' + filename # must be full path to where the image is
        record["height"] = height
        record["width"] = width
        record["image_id"] = int(filename.split('.')[0]) # 55555555.jpg (must be unique across #s)

        if record["image_id"] % 100 == 0:
            print(filename)

        objs = [] # list[dict]

        for key, value in obj_list.items():
            px = value[:,0]
            py = value[:,1]
            polygon = []

            for i in range(value.shape[0]):
                polygon.append(value[i][0]) 
                polygon.append(value[i][1])
            
            obj = {
                "bbox": [np.min(px), np.min(py), np.max(px), np.max(py)],
                "bbox_mode": BoxMode.XYXY_ABS,
                "category_id": int(key)-1,
                "segmentation": [polygon]
            }
            
            # find a row in the csv file that has the current image and the bird id
            if label_type == 'id':
                obj['category_id'] = 0
                objs.append(obj)
            
            # if label_type == 'pos' or 'behavior':
            else:
                # get the csv label matching chicken x in image y in the json file
                image_num = (df['Image'] == record['image_id'])
                chick_id = (df['Bird #'] == (obj['category_id']+1))
                row = df[image_num & chick_id]
                
                if row.empty:
                    print('DataFrame is empty!')
                
                else:
                    # select posture columns to get the one-hot-vector  
                    if label_type == 'pos':
                        y_behavior = row.loc[:,['STD', 'SIT']].to_numpy()

                    if label_type == 'behavior':
                        y_behavior = row.loc[:,['EAT', 'DRK', 'PRE', 'FOR']].to_numpy()

                    y_behavior = y_behavior[0]

                    # get only STD and SIT from the row    
                    if np.sum(y_behavior) != 0:
                        obj['category_id'] = int(np.argmax(y_behavior))
                        objs.append(obj)
               
        record["annotations"] = objs
    
        dataset_dicts.append(record) # list[dict]
        
    print("CONVERTED TO DETECTRON2 FORMAT!")
    return dataset_dicts


def register_datasets(dataset_name, data_dir, classes, label_type):
    """register chicken videos for training and evalating

    :params 
        - dataset_name: str, dataset registered on Detectron2 DatasetCatalog
        - data_dir: str, path where the annotation files are 
        - classes: list[str], list of classes
        - label_type: str, task that you want to train
    """

    # if dataset name is already in the catalog, then erase it to for re-registration
    if dataset_name in DatasetCatalog.list():
        DatasetCatalog.remove(dataset_name)
        MetadataCatalog.remove(dataset_name)

    DatasetCatalog.register(dataset_name, lambda data_dir=data_dir, label_type=label_type: labelme_to_detectron(data_dir, label_type)) # register your dataset with a name and a function to convert
    MetadataCatalog.get(dataset_name).thing_classes = classes # register list of classes to your metadata for your dataset

    print('{} registered successfully'.format(dataset_name))


def train_detector(dataset_name, model_file, output_path, max_iter, classes, lr):
    """train the detector with your registered dataset and save the trained weights to an output path

    returns: 
        - trainer: object detector(nn.module)
        - cfg: config file for the modules

    params:
        - dataset_name: registered dataset
        - model_file: model config file for setting up model for training
        - output_path: path to saving trained weights
        - max_iter: number of epochs to train
        - classes: list of classes used for training dataset
        - lr: constant learning rate
    """
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(model_file)) # select model of your choice
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_file) # to fine-tune it on our dataset, initialize from model zoo (pretrained weights from ImageNet)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes)  # number of classes 

    # pick all the datasets you want to train with
    cfg.DATASETS.TRAIN = ('{}286'.format(dataset_name),'{}287'.format(dataset_name),'{}288'.format(dataset_name),'{}289'.format(dataset_name),'{}290'.format(dataset_name),
                          '{}213'.format(dataset_name),'{}214'.format(dataset_name),'{}215'.format(dataset_name))
    cfg.OUTPUT_DIR = output_path

    # set up hyperparameters
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 32   # faster, and good enough for this toy dataset (default: 512)
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = lr  # pick a good LR
    cfg.SOLVER.MAX_ITER = max_iter # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset

    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True) # make directory for your output path if doesn't exist

    trainer = DefaultTrainer(cfg) 
    trainer.resume_or_load(resume=False)
    trainer.train()

    return trainer, cfg


def get_detector(config_file, nms_thresh, data_type, classes, weight_dir=None):
    """ for each image you make an prediction and find the indexes of correctly classified rois

    returns: 
        - model: object detector(nn.module)

    params:
        - config_file:
        - nms_thresh: it determins the minimum confidence threshold for rois to survive for NMS
        - data_type: 
        - classes:
        - weight_file: 
    """
    cfg = get_cfg()

    # add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
    cfg.merge_from_file(model_zoo.get_config_file(config_file))

    # set threshold for this model
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = nms_thresh 
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes)  # number of classes 

    # Upload initial or trained weights
    if weight_dir is None:
        cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(config_file) # use the pretrained weights

    else:
        cfg.MODEL.WEIGHTS = os.path.join(weight_dir, 'model_final.pth') # use the custom trained weights

    # load DefaultPredictor or model
    if data_type == 'model':
        model = build_model(cfg)  # returns a torch.nn.Module
        DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)
        device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
        model = model.to(device)
        model.eval()

    if data_type == 'predictor':
        model = DefaultPredictor(cfg)

    return model


def predict_video(dataset_name, model, model_weight_path):
    """make predictions on test frames by using your trained object detector and convert predicted frames into a video

    params:
        - dataset_name: str, the dataset that you want to test your trained detector on (must be registered via func 'register_datasets')
        - model: 
        - model_weight_path:
        - v_num:
    """ 

    frames = []
    dataset_dicts = DatasetCatalog.get(dataset_name) # load dataset that you want to test
    metadata = MetadataCatalog.get(dataset_name) # get metadata to use on Visualizer
    start_ts = time.time()

    for idx, d in enumerate(dataset_dicts):
        im = cv2.imread(d["file_name"])

        # make a prediction on each image and draw predictions on the image
        outputs = model(im)
        v = Visualizer(im[:, :, ::-1], metadata=metadata, scale=0.5)
        out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        frames.append(out.get_image()[:, :, ::-1])

        if idx % 100 == 0: 
            cv2_imshow(out.get_image()[:, :, ::-1])
            check_annotations(dataset_name, d)

    pathOut = os.path.join(model_weight_path, '{}_{}.avi'.format(dataset_name, model_weight_path.split('/')[-1]))
    fourcc = cv2.VideoWriter_fourcc(*'XVID') # a 4-byte code used to specify the video codec
    fps = 5
    size = (frames[0].shape[1], frames[0].shape[0])

    out = cv2.VideoWriter(pathOut, fourcc, fps, size)

    # writing video to a image array
    for i in range(len(frames)):
        out.write(frames[i]) 

    out.release()
    print("time elapsed: {}".format((time.time() - start_ts)/60))
    print('Prediction Video demo saved in {}'.format(pathOut))

def run_inference(img, model):
    start_ts = time.time()
    outputs = model(im)
    print("time elapsed: {}".format((time.time() - start_ts)/60))

    return outputs