In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities.
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
fatal: destination path 'detectron2' already exists and is not an empty directory.
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
torch:  2.0 ; cuda:  cu118
detectron2: 0.6


In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random

from torchvision.ops import box_iou

from PIL import Image
from google.colab.patches import cv2_imshow
from google.colab import files

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from google.colab import drive
from copy import copy
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
cfg = get_cfg()
# add project-specific config (e.g., TensorMask) here if you're not running a model in detectron2's core library
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.75  # set threshold for this model
# Find a model from detectron2's model zoo. You can use the https://dl.fbaipublicfiles... url as well
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)

[04/22 14:44:07 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from https://dl.fbaipublicfiles.com/detectron2/COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x/138205316/model_final_a3ec72.pkl ...


In [None]:
def expand_bboxes(bboxes, height, width):
  square_bboxes = []
  for bbox in bboxes:
    w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
    if h >= w:
      bbox[2] = min(bbox[2] + (h - w)/2, width)
      bbox[0] = max(bbox[0] - (h - w)/2, 0)
    else:
      bbox[3] = min(bbox[3] + (w - h)/2, height)
      bbox[1] = max(bbox[1] - (w - h)/2, 0)
    bbox = bbox.to(torch.int)
    square_bboxes.append(bbox)
  return square_bboxes

In [None]:
def ema(new_bbox, ema_before):
  alpha = 0.3
  return new_bbox * alpha + ema_before * (1 - alpha)

In [None]:
def video_cropping(video):
  cap = cv2.VideoCapture(f'/content/drive/MyDrive/Данные/{video}')

  name = video.split('-')[0]
  frame_width = int(cap.get(3))
  frame_height = int(cap.get(4))
  fps = cap.get(cv2.CAP_PROP_FPS)
  output_bboxes = torch.tensor([])
  frame_number = 0
  writer = []
  font = cv2.FONT_HERSHEY_SIMPLEX
  frames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
  v_time = round(frames/fps)

  while cap.isOpened():
    ret, frame = cap.read()

    if ret == False:
      break
    outputs = predictor(frame)

    classes = outputs["instances"].pred_classes
    bboxes = outputs["instances"].pred_boxes
    scores = outputs["instances"].scores

    human_bboxes = bboxes[(classes == 0) & (scores > 0.7)].tensor.to(torch.int).cpu().numpy()
    #print(human_bboxes)
    if (len(human_bboxes) == 0) & (len(output_bboxes) == 0):
      print('First frame don\'t have human!!!')
      continue

    idx_max_iou = 0
    best_bboxes = []

    # update bboxes for new video frame
    if len(output_bboxes) == 0:
      output_bboxes = torch.from_numpy(human_bboxes)
    elif len(output_bboxes) <= len(human_bboxes):
      for bbox in output_bboxes:
        iou = box_iou(bbox[None,], torch.from_numpy(human_bboxes))
        # check jumps of bbox
        if iou.max(dim = 1).values < 0.1:
          best_bboxes.append(bbox.cpu().numpy())
        else:
          idx_max_iou = iou.argmax(dim = 1).cpu().numpy()
          best_bboxes.extend(human_bboxes[idx_max_iou])
          human_bboxes = np.delete(human_bboxes, idx_max_iou, 0)

      output_bboxes = torch.from_numpy(np.array(best_bboxes))
    else:
      iou = box_iou(output_bboxes, torch.from_numpy(human_bboxes))
      idx_max_iou = iou.argmax(dim = 0).cpu().numpy()
      output_bboxes[idx_max_iou] = torch.from_numpy(human_bboxes)

    frame_number += 1
    if frame_number % (10 * fps) == 0:
      v_time -= 10
      print('Осталось (сек):', v_time)
    # files to write + EMA for bboxes
    if frame_number == 1:
      ema_before = torch.empty_like(output_bboxes).copy_(output_bboxes)
      out = cv2.VideoWriter(f'{name}.mpeg', cv2.VideoWriter_fourcc(*'MPEG'), fps, (frame_width, frame_height))
      for i in range(len(human_bboxes)):
        writer.append(cv2.VideoWriter(f'{name}-man-{i}.mpeg', cv2.VideoWriter_fourcc(*'MPEG'), fps, (512, 512)))
    elif frame_number <= 5:
      ema_before = ema(output_bboxes, ema_before)
    else:
      ema_before = ema(output_bboxes, ema_before)
      output_bboxes = ema_before.to(torch.int)

    # data collection for model
    square_bboxes = expand_bboxes(torch.empty_like(output_bboxes).copy_(output_bboxes), frame_height, frame_width)
    for i in range(len(square_bboxes)):
      cropping_frame = frame[square_bboxes[i][1]:square_bboxes[i][3], square_bboxes[i][0]:square_bboxes[i][2]]
      cropping_frame = cv2.resize(cropping_frame, (512,512), interpolation = cv2.INTER_LANCZOS4)
      writer[i].write(cropping_frame)

    # visualization of bboxes
    for i, bbox in enumerate(output_bboxes.cpu().numpy()):
      color = [0, 0, 0]
      color[i] = 255
      cv2.rectangle(frame, bbox[:2], bbox[-2:], color, 2)
      cv2.putText(frame, f'ID_{i}', (bbox[0], bbox[1] - 5), font, 0.5, color, 2)

    out.write(frame)

  cap.release()
  out.release()
  for item in writer:
    item.release()
  cv2.destroyAllWindows()

  # download output videos
  files.download(f'{name}.mpeg')
  for i in range(len(writer)):
    files.download(f'{name}-man-{i}.mpeg')

  return 0

In [None]:
all = os.listdir('/content/drive/MyDrive/Данные')
all

['Example11-farm6-camera1-2cows.mp4',
 'Example4-farm2-camera1-3cows.mp4',
 'Example3-farm1-camera2-6cows.mp4',
 'Example7-farm3-camera2-3cows.mp4',
 'Example9-farm4-camera2-2cows.mp4',
 'Example6-farm3-camera1-3cows.mp4',
 'Example5-farm2-camera2-4cows.mp4',
 'Example122-a.mp4',
 'Example123-a.mp4',
 'Example124-a.mp4',
 'Example111.1-a.mp4',
 'Example111.2-a.mp4']

In [None]:
for v in all[-5:-3]:
  print(v)
  video_cropping(v)

Example122-a.mp4


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


Осталось (сек): 31
Осталось (сек): 21
Осталось (сек): 11
Осталось (сек): 1


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Example123-a.mp4
Осталось (сек): 30
Осталось (сек): 20
Осталось (сек): 10
Осталось (сек): 0


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>