# Steps to Follow

1. Install Dependencies

2. Loading and pre-porcessing data

3. Defining and training the model

4. Evaluating model performance

###1. Install Dependencies

In [None]:
!pip install!python -m pip install pyyaml==5.1
# Detectron2 has not released pre-built binaries for the latest pytorch (https://github.com/facebookresearch/detectron2/issues/4053)
# so we install from source instead. This takes a few minutes.
!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

# Install pre-built detectron2 that matches pytorch version, if released:
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
#!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/{CUDA_VERSION}/{TORCH_VERSION}/index.html

# exit(0)  # After installation, you may need to "restart runtime" in Colab. This line can also restart runtime pyyaml==5.1

ERROR: unknown command "install!python" - maybe you meant "install"
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-pn9l5hye
  Running command git clone -q https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-pn9l5hye
Collecting yacs>=0.1.8
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting fvcore<0.1.6,>=0.1.5
  Downloading fvcore-0.1.5.post20220512.tar.gz (50 kB)
[K     |████████████████████████████████| 50 kB 4.0 MB/s 
[?25hCollecting iopath<0.1.10,>=0.1.7
  Downloading iopath-0.1.9-py3-none-any.whl (27 kB)
Collecting omegaconf>=2.1
  Downloading omegaconf-2.2.2-py3-none-any.whl (79 kB)
[K     |████████████████████████████████| 79 kB 4.5 MB/s 
[?25hCollecting hydra-core>=1.1
  Downloading hydra_core-1.2.0-py3-none-any.whl (151 kB)
[K     |████████████████

In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2020 NVIDIA Corporation
Built on Mon_Oct_12_20:09:46_PDT_2020
Cuda compilation tools, release 11.1, V11.1.105
Build cuda_11.1.TC455_06.29190527_0
torch:  1.12 ; cuda:  cu113
detectron2: 0.6


###2. Loading and pre-processing data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!unzip '/content/drive/MyDrive/pose_dataset-200715-173328.zip'

Archive:  /content/drive/MyDrive/pose_dataset-200715-173328.zip
   creating: pose_dataset/
 extracting: pose_dataset/validation_images.zip  
  inflating: pose_dataset/person_keypoints_val2017.json  
  inflating: pose_dataset/person_keypoints_train2017.json  
 extracting: pose_dataset/training_images.zip  


In [None]:
# extract files
!unzip '/content/pose_dataset/training_images.zip'
!unzip '/content/pose_dataset/validation_images.zip'

Archive:  /content/pose_dataset/training_images.zip
   creating: content/train2017/
  inflating: content/train2017/000000206893.jpg  
  inflating: content/train2017/000000267408.jpg  
  inflating: content/train2017/000000282603.jpg  
  inflating: content/train2017/000000494811.jpg  
  inflating: content/train2017/000000509855.jpg  
  inflating: content/train2017/000000435673.jpg  
  inflating: content/train2017/000000042404.jpg  
  inflating: content/train2017/000000437481.jpg  
  inflating: content/train2017/000000541251.jpg  
  inflating: content/train2017/000000415499.jpg  
  inflating: content/train2017/000000542674.jpg  
  inflating: content/train2017/000000451213.jpg  
  inflating: content/train2017/000000477774.jpg  
  inflating: content/train2017/000000461957.jpg  
  inflating: content/train2017/000000564602.jpg  
  inflating: content/train2017/000000159370.jpg  
  inflating: content/train2017/000000317176.jpg  
  inflating: content/train2017/000000231325.jpg  
  inflating: con

In [None]:
from glob import glob

# for dealing with images
import cv2

# create lists
train_images  = []

# for each image
for i in glob('content/train2017/*.jpg'):
    
    img=cv2.imread(i)
    
    #append image to list
    train_images.append(img)

# create lists
val_images  = []

# for each image
for i in glob('content/val2017/*.jpg'):
    
    img=cv2.imread(i)
    
    #append image to list
    val_images.append(img)

###3. Defining and training the model

In [None]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("train_data", {}, 'drive/My Drive/pose_dataset/person_keypoints_train2017.json', "content/train2017/")

In [None]:
from detectron2.data import MetadataCatalog, DatasetCatalog
pose_metadata = MetadataCatalog.get("train_data").set(thing_classes=["person"])

In [None]:
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg

# define configure instance
cfg = get_cfg()

cfg.DATASETS.TRAIN = ("train_data",)

cfg.DATASETS.TEST = ()

# Get a model specified by relative path under Detectron2’s official configs/ directory.
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_X_101_32x8d_FPN_3x.yaml"))

# set threshold for this model
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7

In [None]:
import os
# create directory to save weights
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

In [None]:
# no. of images per batch
cfg.SOLVER.IMS_PER_BATCH = 2

# set base learning rate
cfg.SOLVER.BASE_LR = 0.001  

# no. of iterations 
cfg.SOLVER.MAX_ITER = 2000

# only has one class (person)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  

In [None]:
!mkdir datasets
!mkdir datasets/coco
!mkdir datasets/coco/annotations

In [None]:
!cp '/content/pose_dataset/person_keypoints_train2017.json' 'datasets/coco/annotations/'
!cp '/content/pose_dataset/person_keypoints_val2017.json' 'datasets/coco/annotations/'

In [None]:
!cp -avr content/train2017 datasets/coco/

'content/train2017' -> 'datasets/coco/train2017'
'content/train2017/000000206893.jpg' -> 'datasets/coco/train2017/000000206893.jpg'
'content/train2017/000000267408.jpg' -> 'datasets/coco/train2017/000000267408.jpg'
'content/train2017/000000282603.jpg' -> 'datasets/coco/train2017/000000282603.jpg'
'content/train2017/000000494811.jpg' -> 'datasets/coco/train2017/000000494811.jpg'
'content/train2017/000000509855.jpg' -> 'datasets/coco/train2017/000000509855.jpg'
'content/train2017/000000435673.jpg' -> 'datasets/coco/train2017/000000435673.jpg'
'content/train2017/000000042404.jpg' -> 'datasets/coco/train2017/000000042404.jpg'
'content/train2017/000000437481.jpg' -> 'datasets/coco/train2017/000000437481.jpg'
'content/train2017/000000541251.jpg' -> 'datasets/coco/train2017/000000541251.jpg'
'content/train2017/000000415499.jpg' -> 'datasets/coco/train2017/000000415499.jpg'
'content/train2017/000000542674.jpg' -> 'datasets/coco/train2017/000000542674.jpg'
'content/train2017/000000451213.jpg' -

In [None]:
!cp -avr content/val2017 datasets/coco/

'content/val2017' -> 'datasets/coco/val2017'
'content/val2017/000000388215.jpg' -> 'datasets/coco/val2017/000000388215.jpg'
'content/val2017/000000184978.jpg' -> 'datasets/coco/val2017/000000184978.jpg'
'content/val2017/000000008277.jpg' -> 'datasets/coco/val2017/000000008277.jpg'
'content/val2017/000000272049.jpg' -> 'datasets/coco/val2017/000000272049.jpg'
'content/val2017/000000321333.jpg' -> 'datasets/coco/val2017/000000321333.jpg'
'content/val2017/000000289659.jpg' -> 'datasets/coco/val2017/000000289659.jpg'
'content/val2017/000000010583.jpg' -> 'datasets/coco/val2017/000000010583.jpg'
'content/val2017/000000353051.jpg' -> 'datasets/coco/val2017/000000353051.jpg'
'content/val2017/000000001425.jpg' -> 'datasets/coco/val2017/000000001425.jpg'
'content/val2017/000000318080.jpg' -> 'datasets/coco/val2017/000000318080.jpg'
'content/val2017/000000360951.jpg' -> 'datasets/coco/val2017/000000360951.jpg'
'content/val2017/000000442323.jpg' -> 'datasets/coco/val2017/000000442323.jpg'
'conten

In [None]:
# Create trainer
trainer = DefaultTrainer(cfg)

In [None]:
trainer.resume_or_load(resume=False)

# train the model
trainer.train()

X-101-32x8d.pkl: 356MB [00:07, 47.5MB/s]                           

[32m[07/27 12:33:29 d2.checkpoint.c2_model_loading]: [0mRenaming Caffe2 weights ......





[32m[07/27 12:33:29 d2.checkpoint.c2_model_loading]: [0mFollowing weights matched with submodule backbone.bottom_up:
| Names in Model    | Names in Checkpoint                         | Shapes                                          |
|:------------------|:--------------------------------------------|:------------------------------------------------|
| res2.0.conv1.*    | res2_0_branch2a_{bn_b,bn_rm,bn_riv,bn_s,w}  | (256,) (256,) (256,) (256,) (256,64,1,1)        |
| res2.0.conv2.*    | res2_0_branch2b_{bn_b,bn_rm,bn_riv,bn_s,w}  | (256,) (256,) (256,) (256,) (256,8,3,3)         |
| res2.0.conv3.*    | res2_0_branch2c_{bn_b,bn_rm,bn_riv,bn_s,w}  | (256,) (256,) (256,) (256,) (256,256,1,1)       |
| res2.0.shortcut.* | res2_0_branch1_{bn_b,bn_rm,bn_riv,bn_s,w}   | (256,) (256,) (256,) (256,) (256,64,1,1)        |
| res2.1.conv1.*    | res2_1_branch2a_{bn_b,bn_rm,bn_riv,bn_s,w}  | (256,) (256,) (256,) (256,) (256,256,1,1)       |
| res2.1.conv2.*    | res2_1_branch2b_{bn_b,bn_rm,bn_ri

Some model parameters or buffers are not found in the checkpoint:
[34mbackbone.fpn_lateral2.{bias, weight}[0m
[34mbackbone.fpn_lateral3.{bias, weight}[0m
[34mbackbone.fpn_lateral4.{bias, weight}[0m
[34mbackbone.fpn_lateral5.{bias, weight}[0m
[34mbackbone.fpn_output2.{bias, weight}[0m
[34mbackbone.fpn_output3.{bias, weight}[0m
[34mbackbone.fpn_output4.{bias, weight}[0m
[34mbackbone.fpn_output5.{bias, weight}[0m
[34mproposal_generator.rpn_head.anchor_deltas.{bias, weight}[0m
[34mproposal_generator.rpn_head.conv.{bias, weight}[0m
[34mproposal_generator.rpn_head.objectness_logits.{bias, weight}[0m
[34mroi_heads.box_head.fc1.{bias, weight}[0m
[34mroi_heads.box_head.fc2.{bias, weight}[0m
[34mroi_heads.box_predictor.bbox_pred.{bias, weight}[0m
[34mroi_heads.box_predictor.cls_score.{bias, weight}[0m
[34mroi_heads.keypoint_head.conv_fcn1.{bias, weight}[0m
[34mroi_heads.keypoint_head.conv_fcn2.{bias, weight}[0m
[34mroi_heads.keypoint_head.conv_fcn3.{bias, weigh

[32m[07/27 12:33:31 d2.engine.train_loop]: [0mStarting training from iteration 0


  cpuset_checked))
  keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
  keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
  keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
  keypoints = torch.as_tensor(keypoints, dtype=torch.float32, device=device)
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[07/27 12:34:23 d2.utils.events]: [0m eta: 1:12:04  iter: 19  total_loss: 9.322  loss_cls: 0.4812  loss_box_reg: 0.002413  loss_keypoint: 8.103  loss_rpn_cls: 0.7081  loss_rpn_loc: 0.04232  time: 2.1375  data_time: 0.0338  lr: 1.9981e-05  max_mem: 5978M
[32m[07/27 12:35:05 d2.utils.events]: [0m eta: 1:07:08  iter: 39  total_loss: 8.95  loss_cls: 0.1188  loss_box_reg: 0.005309  loss_keypoint: 8.072  loss_rpn_cls: 0.692  loss_rpn_loc: 0.0404  time: 2.1177  data_time: 0.0119  lr: 3.9961e-05  max_mem: 6249M
[32m[07/27 12:35:50 d2.utils.events]: [0m eta: 1:09:08  iter: 59  total_loss: 8.911  loss_cls: 0.08114  loss_box_reg: 0.01267  loss_keypoint: 8.061  loss_rpn_cls: 0.6551  loss_rpn_loc: 0.06305  time: 2.1547  data_time: 0.0130  lr: 5.9941e-05  max_mem: 6249M
[32m[07/27 12:36:37 d2.utils.events]: [0m eta: 1:09:43  iter: 79  total_loss: 8.828  loss_cls: 0.08002  loss_box_reg: 0.01769  loss_keypoint: 8.017  loss_rpn_cls: 0.6169  loss_rpn_loc: 0.07239  time: 2.2050  data_time: 0.

###4. Evaluating model performance

In [None]:
register_coco_instances("validation_data", {}, 'drive/My Drive/pose_dataset/person_keypoints_val2017.json', "content/val2017/")

pose_metadata = MetadataCatalog.get("validation_data").set(thing_classes=["person"])

In [None]:
# load the final weights
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")

# set the testing threshold for this model
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

# List of the dataset names for validation. Must be registered in DatasetCatalog
cfg.DATASETS.TEST = ("validation_data", )

In [None]:
# set up predictor
from detectron2.engine import DefaultPredictor

# Create a simple end-to-end predictor with the given config that runs on single device for a single input image.
predictor = DefaultPredictor(cfg)

In [None]:
import random

#for drawing predictions on images
from detectron2.utils.visualizer import Visualizer

#to display an image
from google.colab.patches import cv2_imshow

#randomly select images
for img in random.sample(train_images,5):    
  
    #make predictions
    outputs = predictor(img)
    
    # Use `Visualizer` to draw the predictions on the image.
    v = Visualizer(img[:, :, ::-1], metadata = pose_metadata, scale=1)
    
    #draw prediction on image
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    
    #display image
    cv2_imshow(v.get_image()[:, :, ::-1])

In [None]:
# test evaluation
from detectron2.data import build_detection_test_loader
from detectron2.evaluation import COCOEvaluator, inference_on_dataset

predictor = DefaultPredictor(cfg)
evaluator = COCOEvaluator("validation_data", cfg, False, output_dir="./output/")
val_loader = build_detection_test_loader(cfg, "validation_data")
inference_on_dataset(trainer.model, val_loader, evaluator)