In [2]:
# SET VIDEO ID HERE
videoID = 'Maia_Fall' 
personThresh = 0.8
videoPath='/content/drive/My Drive/DL_CV_FinalProject/Datasets/our_fall_dataset/'+ videoID + '.MOV'
print(videoPath)

from google.colab import drive
drive.mount('/content/drive')
rootPath = '/content/drive/My Drive/DL_CV_FinalProject/'


/content/drive/My Drive/DL_CV_FinalProject/Datasets/our_fall_dataset/Maia_Fall.MOV
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# install dependencies: 
!pip install pyyaml==5.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
!gcc --version
# opencv is pre-installed on colab
assert torch.__version__.startswith("1.7")
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/cu101/torch1.7/index.html

In [10]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
#from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog


# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow
import sys
import time
import pickle
import glob
import matplotlib.pyplot as plt
import tqdm

In [None]:
## Check for GPU 
gpu_check = torch.cuda.is_available()
print(gpu_check)

In [11]:
## Initialize class for predictor functions
class ReturnPredictions(object):
    def __init__(self,cfg):
        self.predictor = DefaultPredictor(cfg)
        self.metadata = MetadataCatalog.get(
                cfg.DATASETS.TEST[0] if len(cfg.DATASETS.TEST) else "__unused")

    def _frame_from_video(self,video):
        print(video)
        while video.isOpened():
            success,frame = video.read()
            if success:
                yield frame
            else:
                break

    def run_on_video(self,video):
        frame_gen = self._frame_from_video(video)
        for frame in frame_gen:
            
            yield self.predictor(frame),frame

    def run_on_image(self,image):
      # Convert image from OpenCV BGR format to Matplotlib RGB format.
      yield self.predictor(image)


## Load and setup model and predictor
cfg = get_cfg()

cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")

predictor = ReturnPredictions(cfg)


In [12]:
def resizeImg(orig_frame, scale):
  # resize input image to lower dimensionality
  width = int(orig_frame.shape[1] * scale)
  height = int(orig_frame.shape[0] * scale)
  dim = (width, height)
  resized = cv2.resize(orig_frame, dim, interpolation = cv2.INTER_AREA)
  return resized

def resizeKPB(keyPoints,bbox, scale):
  # rescale keypoints and bounding boxes to match new image dimensions 
  keyPoints[:,:-1] = keyPoints[:,:-1]*scale
  bbox = bbox*scale
  return keyPoints,bbox

def removeOutliers(signal,scale_lower=1,scale_upper=3):
  # for cases where no box detected, remove 
  avg = np.mean(signal)
  std = np.std(signal)
  upper = avg+scale_upper*std # upper is scale_upper std from the mean
  lower = avg-scale_lower*std # lower is scale_lower std from the mean
  

  # only return values where the input is greater than the lower threshold and less than the upper 
  out = signal[np.logical_and(signal>lower,signal<upper)]
  #out = signal[signal>lower]
  
  return out

# def removeOutliers_bbox(signal,scale_lower=1,scale_upper=1):
#   avg = np.mean(signal)
#   std = np.std(signal)
#   upper = avg+scale_upper*std
#   lower = avg-scale_lower*std

#   out = signal[np.logical_and(signal>lower,signal<upper)]
#   #out = signal[signal>lower]
  
#   return out

In [None]:
storeKeypoints = [] 
storeFrames = [] # store grayscale frames
storeBBox = []
storeFlow = []

# arrays of 0s to append when no person present 
noPointsArray = np.zeros((17,3))
noBoxArray = np.zeros(4)

# create detectron points and bounding box generator 
vid = cv2.VideoCapture(videoPath)
output_generator = predictor.run_on_video(vid) 

imgID = 0

# iterate through frames 
for output_frame in output_generator:

  # scale factor = how much to decrease dimensionality --> helps with OF compute needed 
  scale_factor = 0.3

  print('---------------- Image Number: ',imgID)
  output = output_frame[0]
  frame = output_frame[1]

  # scale image down 
  g_frame = resizeImg(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY),scale_factor)
  storeFrames.append(g_frame)

  # extract values from output 
  personScore = output['instances'].scores.cpu().numpy()
  keypoints_all = output['instances'].pred_keypoints.cpu().numpy()
  person_scores = output['instances'].scores.cpu().numpy()
  bound_box = output['instances'].pred_boxes.tensor.cpu().numpy()

  # extract indices of cases where person is detected 
  idx_scores_valid = np.argwhere(person_scores>personThresh)

  # skip the first frame 
  if imgID == 0:
    storeKeypoints.append(noPointsArray)
    storeBBox.append(noBoxArray)
    imgID += 1
    continue

  imgID += 1

  # calculate Dense Optical Flow between current image and previous image 
  flow = cv2.calcOpticalFlowFarneback(storeFrames[-2], storeFrames[-1], None, 0.5, 3, 25, 3, 5, 1.2, 0)

  if len(idx_scores_valid) == 0:
    # if no person detected 

    print('Could not detect person!')
    storeKeypoints.append(noPointsArray)
    storeBBox.append(noBoxArray)

    # magnitude based on all pixels in image
    u = flow[:,:,0]
    v = flow[:,:,1]

    # get magnitude, remove outliers, then take mean 
    flow_mag = np.mean(removeOutliers(np.sqrt(u ** 2 + v ** 2),scale_lower=1,scale_upper=3))

    storeFlow.append(flow_mag)
  else:    
    print("All Scores:",person_scores)

    # resize RGB image for showing figure 
    imgCopy = resizeImg(cv2.cvtColor(frame.copy(),cv2.COLOR_BGR2RGB),scale_factor)

    # resize kps and bboxes 
    key,box = resizeKPB(keypoints_all[np.argmax(person_scores)],
                        bound_box[np.argmax(person_scores)], 
                        scale_factor)

    # extract pixels that are within the bounding box of the current frame 
    u = flow[int(box[1]):int(box[3]),int(box[0]):int(box[2]),0]
    v = flow[int(box[1]):int(box[3]),int(box[0]):int(box[2]),1]

    # get magnitude, remove outliers, then take mean 
    flow_mag = np.mean(removeOutliers(np.sqrt(u ** 2 + v ** 2),scale_lower=2,scale_upper=2))
    storeFlow.append(flow_mag)

    storeKeypoints.append(key)
    storeBBox.append(box)

    # draw bounding boxes and key points 
    cv2.rectangle(imgCopy, (box[0],box[1]), (box[2],box[3]), (255,0,0),2)
    for pt in key:
      #print('Confidence = ',pt[2])
      if pt[2] > 0.1:
        cv2.circle(imgCopy, (pt[0],pt[1]), 2, (0,255,0), -1)

    plt.imshow(imgCopy,cmap='gray')
    plt.show()

    

storeKeypoints_ = np.stack(storeKeypoints,axis=0)
storeBBox_ = np.stack(storeBBox,axis=0)
storeFrames_ = np.stack(storeFrames,axis=0)
storeFlow_ = np.array(storeFlow)

#### From detectron2 Code ####
# COCO_PERSON_KEYPOINT_NAMES = (
#     "nose",
#     "left_eye", "right_eye",
#     "left_ear", "right_ear",
#     "left_shoulder", "right_shoulder",
#     "left_elbow", "right_elbow",
#     "left_wrist", "right_wrist",
#     "left_hip", "right_hip",
#     "left_knee", "right_knee",
#     "left_ankle", "right_ankle",
# )
# Frames x 17 x 3 

In [None]:
f_plot = np.arange(len(storeFlow_))
plt.plot(f_plot,storeFlow_,'k')
plt.xlim([0,len(storeFlow_)-1])
plt.ylabel('Average Optical Flow Magnitude')
plt.xlabel('Frame Number')
