In [1]:
# install dependencies: 
!pip install pyyaml==5.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab
assert torch.__version__.startswith("1.7")
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html

1.7.0+cu101 True
gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
Copyright (C) 2017 Free Software Foundation, Inc.
This is free software; see the source for copying conditions.  There is NO
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

Looking in links: https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html


In [2]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
#from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog


# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow
import sys
import time
import pickle
import glob
import matplotlib.pyplot as plt
import tqdm

In [3]:
## Initialize class for predictor functions
class ReturnPredictions(object):
    def __init__(self,cfg):
        self.predictor = DefaultPredictor(cfg)
        self.metadata = MetadataCatalog.get(
                cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")

    def _frame_from_video(self,video):
        print(video)
        while video.isOpened():
            success,frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self,video):
        frame_gen = self._frame_from_video(video)
        for frame in frame_gen:
            yield self.predictor(frame),frame

    def run_on_image(self,image):
      # Convert image from OpenCV BGR format to Matplotlib RGB format.
      yield self.predictor(image)


## Load and setup model and predictor
cfg = get_cfg()

cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")

predictor = ReturnPredictions(cfg)


In [4]:
from google.colab import drive
drive.mount('/content/drive')
rootPath = '/content/drive/My Drive/DL_CV_FinalProject/'

## Check for GPU 
gpu_check = torch.cuda.is_available()
print(gpu_check)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
True


In [16]:
save = False
videoID = 'Kinjal_Fall' 
personThresh = 0.5
videoPath='/content/drive/My Drive/DL_CV_FinalProject/'+ videoID + '.mov'
print(videoPath)

/content/drive/My Drive/DL_CV_FinalProject/Kinjal_Fall.mov


In [17]:
storeKeypoints = []
storeFrames = []
noPointsArray = np.zeros((17,3))

vid = cv2.VideoCapture(videoPath)
output_generator = predictor.run_on_video(vid) 

imgID = 1

for output_frame in output_generator:
  print('---------------- Image Number: ',imgID)
  output = output_frame[0]
  frame = output_frame[1]
  storeFrames.append(frame)
  imgH,imgW = output['instances'].image_size
  personScore = output['instances'].scores.cpu().numpy()
  keypoints_all = output['instances'].pred_keypoints.cpu().numpy()
  person_scores = output['instances'].scores.cpu().numpy()

  bound_box = output['instances'].pred_boxes.tensor.cpu().numpy()
  idx_scores_valid = np.argwhere(person_scores>personThresh)

  imgID += 1

  if len(idx_scores_valid) == 0:
    print('Could not detect person!')
    storeKeypoints.append(noPointsArray)
  elif len(idx_scores_valid) == 1:
    storeKeypoints.append(keypoints_all[0])
  elif len(idx_scores_valid) > 1:
    print('ERROR: More than one person detected! This script is only valid for videos with 1 person so selecting person with max score. ')
    storeKeypoints.append(keypoints_all[np.argmax(person_scores)])

  try:
    print("All Scores:",person_scores)
    imgCopy = cv2.cvtColor(frame.copy(),cv2.COLOR_BGR2RGB)
    for key in keypoints_all:
      for pt in key:
        print('Confidence = ',pt[2])
        if pt[2] > 0.09:
          cv2.circle(imgCopy, (pt[0],pt[1]), 10, (0,255,0), -1)
  except: 
    print("All Scores:",person_scores)
    print('Could not draw points')

  try:
    for box in bound_box:
      cv2.rectangle(imgCopy, (box[0],box[1]), (box[2],box[3]), (255,0,0),5)
  except:
    print('cout not draw bounding box')

  plt.imshow(imgCopy)
  plt.show()

storeKeypoints_ = np.stack(storeKeypoints,axis=0)
  
#### From detectron2 Code ####
# COCO_PERSON_KEYPOINT_NAMES = (
#     "nose",
#     "left_eye", "right_eye",
#     "left_ear", "right_ear",
#     "left_shoulder", "right_shoulder",
#     "left_elbow", "right_elbow",
#     "left_wrist", "right_wrist",
#     "left_hip", "right_hip",
#     "left_knee", "right_knee",
#     "left_ankle", "right_ankle",
# )
# Frames x 17 x 3 

Output hidden; open in https://colab.research.google.com to view.

In [9]:
onsetIdx = np.array([97])
if save:
  savePath = rootPath + 'keypoints/'
  np.save(savePath + videoID + '.npy',storeKeypoints_)
  np.save(savePath + videoID + 'onsetLabel.npy',onsetIdx)
  

In [8]:
storeFrames_ = np.stack(storeFrames,axis=0)