In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
import os
import pathlib

# Clone the tensorflow models repository if it doesn't already exist
if "models" in pathlib.Path.cwd().parts:
  while "models" in pathlib.Path.cwd().parts:
    os.chdir('..')
elif not pathlib.Path('models').exists():
  !git clone --depth 1 https://github.com/tensorflow/models

Cloning into 'models'...
remote: Enumerating objects: 4107, done.[K
remote: Counting objects: 100% (4107/4107), done.[K
remote: Compressing objects: 100% (3108/3108), done.[K
remote: Total 4107 (delta 1188), reused 2033 (delta 937), pack-reused 0[K
Receiving objects: 100% (4107/4107), 45.35 MiB | 21.61 MiB/s, done.
Resolving deltas: 100% (1188/1188), done.


In [4]:
# Install the Object Detection API
%%bash
cd models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf2/setup.py .
python -m pip install .

Processing /content/models/research
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting avro-python3 (from object-detection==0.1)
  Downloading avro-python3-1.10.2.tar.gz (38 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting apache-beam (from object-detection==0.1)
  Downloading apache_beam-2.56.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (14.5 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 14.5/14.5 MB 33.3 MB/s eta 0:00:00
Collecting lvis (from object-detection==0.1)
  Downloading lvis-0.5.3-py3-none-any.whl (14 kB)
Collecting tf-models-official>=2.5.1 (from object-detection==0.1)
  Downloading tf_models_official-2.16.0-py2.py3-none-any.whl (2.7 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 2.7/2.7 MB 52.6 MB/s eta 0:00:00
Collecting tensorflow_io (from object-detection==0.1)
  Downloading tensorflow_io-0.37.0-cp310-cp310-manylinux_2_

In [5]:
!pip install tensorflow=="2.15.0"

Collecting tensorflow==2.15.0
  Downloading tensorflow-2.15.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (475.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m475.2/475.2 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting ml-dtypes~=0.2.0 (from tensorflow==2.15.0)
  Downloading ml_dtypes-0.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m47.7 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.16,>=2.15 (from tensorflow==2.15.0)
  Downloading tensorboard-2.15.2-py3-none-any.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m31.7 MB/s[0m eta [36m0:00:00[0m
Collecting keras<2.16,>=2.15.0 (from tensorflow==2.15.0)
  Downloading keras-2.15.0-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m25.7 MB/s[0m eta [36m0:00:00[0m
Install

In [6]:
import tensorflow as tf
detection_model = tf.saved_model.load('/content/drive/MyDrive/IACV/Models/resnet_exported/saved_model')

In [7]:
from google.colab.patches import cv2_imshow
import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage import data, color, img_as_ubyte
from skimage.feature import canny
from skimage.transform import hough_ellipse
from skimage.draw import ellipse_perimeter
import math

SCORE_TRESHOLD = 0.5
EXPANSION_FACTOR = 0.01
BINARY_IMAGE_TRESHOLD = 160
CANNY_LOW_TRESHOLD = 180
CANNY_HIGH_TRESHOLD = 200
WHEEL_TEMPLATE = cv2.imread("/content/drive/My Drive/IACV/wheel_template.png")[:,:,0]

def convert_bb_to_opencv(bb, frame):
  start = (int(bb[1] * frame.shape[1]), int(bb[0] * frame.shape[0]))
  end = (int(bb[3] * frame.shape[1]), int(bb[2] * frame.shape[0]))
  return start, end

def convert_bbox_to_xywh(bbox, frame):
  bbox = convert_bb_to_opencv(bbox, frame)
  top_left = bbox[0]
  bottom_right = bbox[1]

  x = top_left[0]
  y = top_left[1]
  width = bottom_right[0] - top_left[0]
  height = bottom_right[1] - top_left[1]

  return (x, y, width, height)

def add_bbox_to_frame(detection, frame):
  s, e = convert_bb_to_opencv(detection, frame)
  new_frame =  cv2.rectangle(frame, s, e, (0, 255, 0), 2)
  return new_frame

def infer_boxes(frame, model):
    input_tensor = tf.convert_to_tensor([frame], dtype=tf.uint8)
    detections = model(input_tensor)

    boxes = []
    for i, d in enumerate(detections['detection_scores'][0]):
      if d > SCORE_TRESHOLD:
        boxes.append(detections['detection_boxes'][0][i])

    return boxes

def add_boxes_to_frame(boxes, frame):
    for bbox in boxes:
      frame = add_bbox_to_frame(bbox, frame)

    return frame

def detect_wheels_ellipses(boxes, image):
  best_matches = []
  for box in boxes:
    x, y, w, h = convert_bbox_to_xywh(box, image)
    bbc_x, bbc_y = x+(w // 2), y+(h // 2)
    patch = image[y:y+h, x:x+w]

    gray_image = cv2.cvtColor(patch, cv2.COLOR_BGR2GRAY)
    gray_image = cv2.equalizeHist(gray_image)
    _, binary_image = cv2.threshold(gray_image, BINARY_IMAGE_TRESHOLD, 255, cv2.THRESH_BINARY)
    binary_image = cv2.medianBlur(binary_image, ksize=5)

    edges = cv2.Canny(binary_image, CANNY_LOW_TRESHOLD, CANNY_HIGH_TRESHOLD)
    contours, _ = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)

    #input_height, input_width = gray_image.shape[:2]
    #template = cv2.resize(WHEEL_TEMPLATE, (input_width, input_height), interpolation=cv2.INTER_AREA)

    best_match_val = None
    best_match_ellipse = None
    for c in contours:
      if(len(c) < 5): continue
      ellipse = cv2.fitEllipse(c)
      (center, axes, angle) = ellipse

      if not math.isnan(center[0]) and not math.isnan(center[1]):
        new_center = (int(center[0] + x), int(center[1] + y))
        new_ellipse = (new_center, axes, angle)

        e_w, e_h = axes
        if(e_w < w / 2 or e_h < h / 2):
          continue

        ellipse_points = cv2.ellipse2Poly(new_center, (int(e_w/2), int(e_h/2)), int(angle), 0, 360, 10)

        if(not all(x <= point[0] <= x+w and y <= point[1] <= y+h for point in ellipse_points)):
          continue

        ellipse_mask = np.zeros_like(gray_image)
        cv2.ellipse(ellipse_mask, ellipse, (255, 255, 255), -1)
        ellipse_roi = cv2.bitwise_and(binary_image, ellipse_mask)

        '''
        result = cv2.matchTemplate(ellipse_roi, template, cv2.TM_CCOEFF_NORMED)
        _, max_val, _, max_loc = cv2.minMaxLoc(result)

        if(best_match_val == None or max_val > best_match_val):
          best_match_dist = max_val
          best_match_ellipse = new_ellipse
        '''

        dist = math.sqrt((bbc_x - new_center[0])**2 + (bbc_y - new_center[1])**2)

        if(best_match_val == None or dist > best_match_val):
            best_match_dist = dist
            best_match_ellipse = new_ellipse

    if(best_match_ellipse == None): continue
    best_matches.append(best_match_ellipse)
    image = cv2.ellipse(image, best_match_ellipse, (0, 255, 0), 2)
    image = cv2.circle(image, (bbc_x, bbc_y), 5, (255, 0, 255), -1)
  return image, best_matches

def detect_contact_points(ellipses, frame):
  contact_points = []
  for ellipse in ellipses:
    (center, axes, angle) = ellipse
    e_w, e_h = axes
    ellipse_points = cv2.ellipse2Poly(center, (int(e_w/2), int(e_h/2)), int(angle), 0, 360, 10)
    contact_point = max(ellipse_points, key=lambda point: point[1])
    contact_points.append(contact_point)
    frame = cv2.circle(frame, (contact_point[0], contact_point[1]), 5, (0, 0, 255), -1)

  return frame, contact_points

In [8]:
from transformers import AutoImageProcessor, DPTForDepthEstimation
import torch
from PIL import Image

DX, DY = 100, 35
image_processor = AutoImageProcessor.from_pretrained("Intel/dpt-large")
model = DPTForDepthEstimation.from_pretrained("Intel/dpt-large")

def generate_depth_map(frame):
  # prepare image for the model
  image = Image.fromarray(frame)
  inputs = image_processor(images=image, return_tensors="pt")

  with torch.no_grad():
      outputs = model(**inputs)
      predicted_depth = outputs.predicted_depth

  # interpolate to original size
  prediction = torch.nn.functional.interpolate(
      predicted_depth.unsqueeze(1),
      size=image.size[::-1],
      mode="bicubic",
      align_corners=False,
  )

  output = prediction.squeeze().cpu().numpy()
  formatted = (output * 255 / np.max(output)).astype("uint8")
  depth = Image.fromarray(formatted)
  return prediction[0][0], depth

def compute_3D_coordinates(depth_map, pixels_coords):
  coords_3D = []
  for x, y in pixels_coords:
    z = depth_map[y][x].item()
    coords_3D.append((x, y, z))

  return coords_3D

def add_coords_label(frame, coordinates):
  text_color = (0, 0, 255)  # Red
  thickness = 2
  font_scale = 0.5
  font_face = cv2.FONT_HERSHEY_SIMPLEX

  for x,y,z in coordinates:
    label_text = f'X: {round(x,2)}, Y: {round(y,2)}, Z: {round(z,2)}'
    frame = cv2.putText(frame, label_text, (x - DX, y + DY), font_face, font_scale, text_color, thickness)
  return frame

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


preprocessor_config.json:   0%|          | 0.00/285 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/942 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.37G [00:00<?, ?B/s]

Some weights of DPTForDepthEstimation were not initialized from the model checkpoint at Intel/dpt-large and are newly initialized: ['neck.fusion_stage.layers.0.residual_layer1.convolution1.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution1.weight', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.bias', 'neck.fusion_stage.layers.0.residual_layer1.convolution2.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
from tqdm import tqdm
import pandas as pd
import seaborn as sns

video_path = '/content/drive/My Drive/IACV/Videos/video_curve_short.mp4'
cap = cv2.VideoCapture(video_path)

num_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
progress_bar = tqdm(total = num_frames, desc='Video processing progress', position=0, leave=True)

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))

output_video_path = '/content/drive/My Drive/IACV/Processed_videos/F_RCNN_Deep_video_curve_short.mp4'
out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), 30, (frame_width, frame_height))

pixels_coords = []
world_coords = []
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
      break

    if frame is not None:
        boxes = infer_boxes(frame, detection_model)
        processed_frame = add_boxes_to_frame(boxes, frame)
        processed_frame, ellipses = detect_wheels_ellipses(boxes, processed_frame)
        processed_frame, contact_points = detect_contact_points(ellipses, processed_frame)
        pixels_coords += contact_points
        prediction, depth_map = generate_depth_map(frame)
        contact_points_3D = compute_3D_coordinates(prediction, contact_points)

        world_coords += contact_points_3D
        processed_frame = add_coords_label(processed_frame, contact_points_3D)

        #cv2_imshow(processed_frame)
        #sns.heatmap(depth_map)
        #plt.show()

        out.write(frame)
        progress_bar.update(1)

cap.release()
out.release()

pd.DataFrame(world_coords).to_csv('/content/drive/My Drive/IACV/Point_coluds/F_RCNN_Deep_video_curve_short.csv', index=False)

Video processing progress:  59%|█████▉    | 165/280 [18:40<12:36,  6.57s/it]