# Waste identification with instance segmentation in TensorFlow

This Colab notebook demonstrates an end-to-end pipeline for object detection, feature extraction, object tracking, and data aggregation using the Mask R-CNN model from TensorFlow Model Garden.
Key Steps in the Notebook:



*   Object Detection and segmentation – Detect objects in a set of images using Mask R-CNN.
*   Feature Extraction & Tracking – Extract object features and track them across multiple frames to eliminate duplicate counts.
*   Color Detection – Identify the color of each detected object.
*   Postprocessing – Aggregate tracking results and apply filtering to reduce false positives and false negatives.
*   Save detection and tracking results.
*   Push the final object count to a BigQuery table, which can be connected to a Looker dashboard for visualization in Google Cloud Platform (GCP).










To finish this task, a proper path for the saved models and images need to be provided. The path to the labels on which the models are trained is in the waste_identification_ml directory inside the Tensorflow Model Garden repository.

This notebook will output 3 folders and 1 csv file :


*   **prediction_folder** : Will contain prediction results with bbox and masks.
*   **tracking** : Will contain tracking visualization.
*   **cropped_objects** : Will contain category level detected objects.
*   **count.csv** : Will contain the individual counts of each category.






In [None]:
#@title Imports and Setup

!pip install -q trackpy

import sys
import tensorflow as tf
import csv
from typing import Any, TypedDict, Callable
import cv2
import logging
import numpy as np
import matplotlib.pyplot as plt
import glob
import natsort
import tqdm
import os
from PIL import Image
from scipy import ndimage
import pandas as pd
import skimage
import datetime
import trackpy as tp
import shutil

logging.disable(logging.WARNING)

%matplotlib inline

In [None]:
# Connect to Google drive if your data is stored there.
from google.colab import drive
drive.mount('/content/gdrive')

try:
  !ln -s /content/gdrive/My\ Drive/ /mydrive
  print('Successful')
except Exception as e:
  print(e)
  print('Not successful')

In [None]:
# # Connect to GCP bucket if your data is store there and copy them locally.
# !gcloud init

To visualize the images with the proper detected boxes and segmentation masks, we will use the TensorFlow Object Detection API. To install it we will clone the repo.



In [None]:
# Clone the tensorflow models repository.
!git clone --depth 1 https://github.com/tensorflow/models 2>/dev/null

In [None]:
sys.path.append('models/research/')
from object_detection.utils import ops as utils_ops
from object_detection.utils import visualization_utils as viz_utils

sys.path.append('models/official/projects/waste_identification_ml/model_inference/')
import color_and_property_extractor

In [None]:
#@title Utilities

_PROPERTIES = (
    'area',
    'bbox',
    'convex_area',
    'bbox_area',
    'major_axis_length',
    'minor_axis_length',
    'eccentricity',
    'centroid',
    'label',
    'mean_intensity',
    'max_intensity',
    'min_intensity',
    'perimeter'
)


class ItemDict(TypedDict):
  id: int
  name: str
  supercategory: str


def load_model(model_path: str) -> Callable:
    """Loads a TensorFlow SavedModel and returns a function for making predictions.

    Args:
      model_path: Path to the TensorFlow SavedModel.

    Returns:
      A function that can be used to make predictions.
    """
    try:
      print('loading model...')
      model = tf.saved_model.load(model_path)
      print('model loaded!')
      detection_fn = model.signatures['serving_default']
      return detection_fn
    except (OSError, ValueError, KeyError) as e:
        print(f"Error loading model: {e}")
        raise


def perform_detection(model: Callable, image: np.ndarray) -> dict[str, np.ndarray]:
    """Perform Mask R-CNN object detection on an image using the specified model.

    Args:
        model: A function that can be used to make predictions.
        image: A NumPy array representing the image to be processed.

    Returns:
        Detection results, where keys are output names and values are NumPy arrays.
    """
    detection_results = model(image)
    detection_results = {key: value.numpy() for key, value in detection_results.items()}
    return detection_results


def _read_csv_to_list(file_path: str) -> list[str]:
  """Reads a CSV file and returns its contents as a list.

  This function reads the given CSV file, skips the header, and assumes
  there is only one column in the CSV. It returns the contents as a list of
  strings.

  Args:
      file_path: The path to the CSV file.

  Returns:
      The contents of the CSV file as a list of strings.
  """
  data_list = []
  with open(file_path, 'r') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
      data_list.append(row[0])  # Assuming there is only one column in the CSV
  return data_list


def _categories_dictionary(objects: list[str]) -> dict[int, ItemDict]:
  """This function takes a list of objects and returns a dictionaries.

  A dictionary of objects, where each object is represented by a dictionary
  with the following keys:
    - id: The ID of the object.
    - name: The name of the object.
    - supercategory: The supercategory of the object.

  Args:
    objects: A list of strings, where each string is the name of an
      object.

  Returns:
    A tuple of two dictionaries, as described above.
  """
  category_index = {}
  for num, obj_name in enumerate(objects, start=1):
    obj_dict = {'id': num, 'name': obj_name, 'supercategory': 'objects'}
    category_index[num] = obj_dict
  return category_index


def load_labels(labels_path: str) -> tuple[list[str], dict[int, ItemDict]]:
    """
    Load label mappings from a CSV file and generate category indices.

    Args:
        labels_path (str): Path to the CSV file containing label mappings.

    Returns:
        Tuple[Dict[int, dict], Dict[int, dict]]:
        - A dictionary mapping category IDs to label details.
        - A processed category index dictionary.
    """
    labels = _read_csv_to_list(labels_path)
    category_index = _categories_dictionary(labels)
    return labels, category_index


def preprocess_image(path: str, height: int, width: int) -> tuple[np.ndarray, np.ndarray]:
    """
    Load an image from a file into a NumPy array, resize it, and expand dimensions for batch processing.

    Args:
        path: The file path to the image.
        height: Desired height of the resized image.
        width: Desired width of the resized image.

    Returns:
        original_image: The original image with shape (original_height, original_width, 3).
        resized_image: The resized image with shape (1, height, width, 3), suitable for model input.
    """
    original_image = cv2.imread(path)
    if original_image is None:
        raise FileNotFoundError(f"Image not found at path: {path}")

    original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
    resized_image = cv2.resize(original_image, (width, height), interpolation=cv2.INTER_AREA)
    resized_image = np.expand_dims(resized_image, axis=0)

    return original_image, resized_image


def filter_detection(results: dict[str, np.ndarray], valid_indices: np.ndarray) -> dict[str, np.ndarray]:
  """Filter the detection results based on the valid indices.

  Args:
    results: The detection results from the model.
    valid_indices: The indices of the valid detections.

  Returns:
    The filtered detection results.
  """
  if np.array(valid_indices).dtype == bool:
    new_num_detections = int(np.sum(valid_indices))
  else:
    new_num_detections = len(valid_indices)

  # Define the keys to filter
  keys_to_filter = [
      'detection_masks',
      'detection_masks_resized',
      'detection_masks_reframed',
      'detection_classes',
      'detection_boxes',
      'normalized_boxes',
      'detection_scores',
      'detection_classes_names',
  ]

  # Apply filtering to the specified keys
  filtered_output = {}

  for key in keys_to_filter:
    if key in results:
      if key == 'detection_masks':
        filtered_output[key] = results[key][:, valid_indices, :, :]
      elif key in ['detection_masks_resized', 'detection_masks_reframed']:
        filtered_output[key] = results[key][valid_indices, :, :]
      elif key in ['detection_boxes', 'normalized_boxes']:
        filtered_output[key] = results[key][:, valid_indices, :]
      elif key in ['detection_classes', 'detection_scores', 'detection_classes_names']:
        filtered_output[key] = results[key][:, valid_indices]
  filtered_output['num_detections'] = np.array([new_num_detections])

  return filtered_output



def reframe_masks(results: dict[str, np.ndarray], boxes: str, height: int, width: int) -> np.ndarray:
  """Reframe the masks to an image size.

  Args:
    results: The detection results from the model.
    boxes: The detection boxes.
    height: The height of the original image.
    width: The width of the original image.

  Returns:
    The reframed masks.
  """
  detection_masks = results['detection_masks'][0]
  detection_boxes = results[boxes][0]
  detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
      detection_masks, detection_boxes, height, width
  )
  detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, np.uint8)
  detection_masks_reframed = detection_masks_reframed.numpy()
  return detection_masks_reframed


def _calculate_area(mask: np.ndarray) -> int:
  """Calculate the area of the mask.

  Args:
    mask: The mask to calculate the area of.

  Returns:
    The area of the mask.
  """
  return np.sum(mask)


def _calculate_iou(mask1: np.ndarray, mask2: np.ndarray) -> float:
  """Calculate the intersection over union (IoU) between two masks.

  Args:
    mask1: The first mask.
    mask2: The second mask.

  Returns:
    The intersection over union (IoU) between the two masks.
  """
  intersection = np.logical_and(mask1, mask2).sum()
  union = np.logical_or(mask1, mask2).sum()
  return intersection / union if union != 0 else 0


def _is_contained(mask1: np.ndarray, mask2: np.ndarray) -> bool:
  """Check if mask1 is entirely contained within mask2.

  Args:
    mask1: The first mask.
    mask2: The second mask.

  Returns:
    True if mask1 is entirely contained within mask2, False otherwise.
  """
  return np.array_equal(np.logical_and(mask1, mask2), mask1)


def filter_masks(masks: np.ndarray, iou_threshold=0.8, area_threshold=None) -> np.ndarray:
  """Filter the overlapping masks.

  Filter the masks based on the area and intersection over union (IoU).

  Args:
    masks: The masks to filter.
    iou_threshold: The threshold for the intersection over union (IoU) between
      two masks.
    area_threshold: The threshold for the area of the mask.

  Returns:
    The indices of the unique masks.
  """
  # Calculate the area for each mask
  areas = np.array([_calculate_area(mask) for mask in masks])

  # Sort the masks based on area in descending order
  sorted_indices = np.argsort(areas)[::-1]
  sorted_masks = masks[sorted_indices]
  sorted_areas = areas[sorted_indices]

  unique_indices = []

  for i, mask in enumerate(sorted_masks):
    if (area_threshold is not None and sorted_areas[i] > area_threshold) or sorted_areas[i] < 4000:
      continue

    keep = True
    for j in range(i):
      if _calculate_iou(mask, sorted_masks[j]) > iou_threshold or _is_contained(
          mask, sorted_masks[j]
      ):
        keep = False
        break
    if keep:
      unique_indices.append(sorted_indices[i])

  return unique_indices


def adjust_image_size(height: int, width: int, min_size: int) -> tuple[int, int]:
  """Adjust the image size to ensure both dimensions are at least 1024.

  Args:
    height: The height of the image.
    width: The width of the image.
    min_size: Minimum size of the image dimension needed.

  Returns:
    The adjusted height and width of the image.
  """
  if height < min_size or width < min_size:
    return height, width

  # Calculate the scale factor to ensure both dimensions remain at least 1024
  scale_factor = min(height / min_size, width / min_size)

  new_height = int(height / scale_factor)
  new_width = int(width / scale_factor)

  return new_height, new_width


def display_bbox_masks_labels(
    result: dict[Any, np.ndarray],
    image: np.ndarray,
    category_index: dict[int, dict[str, str]],
    threshold: float,
) -> None:
  """Saves an image with visualized bounding boxes, labels, and masks.

  This function takes the output from Mask R-CNN, copies the original image,
  and applies visualizations of detection boxes, classes, and scores.
  If available, it also applies segmentation masks. The result is an image that
  juxtaposes the original with the annotated version, saved to the specified
  folder.

  Args:
    result: The output from theMask RCNN model, expected to contain detection
      boxes, classes, scores, reframed detection masks, etc.
    image: The original image as a numpy array.
    file_name: The filename for saving the output image.
    folder: The folder path where the output image will be saved.
    category_index: A dictionary mapping class IDs to class labels.
    threshold: Value between 0 and 1 to filter out the prediction results.
  """
  image_new = image.copy()
  image_new = cv2.cvtColor(image_new, cv2.COLOR_BGR2RGB)
  viz_utils.visualize_boxes_and_labels_on_image_array(
      image_new,
      result['normalized_boxes'][0],
      (result['detection_classes'][0] + 0).astype(int),
      result['detection_scores'][0],
      category_index=category_index,
      use_normalized_coordinates=True,
      max_boxes_to_draw=100,
      min_score_thresh=threshold,
      agnostic_mode=False,
      instance_masks=result.get('detection_masks_reframed', None),
      line_thickness=4,
  )
  return image_new


def save_bbox_masks_labels(
    result: dict[Any, np.ndarray],
    image: np.ndarray,
    file_name: str,
    folder: str,
    category_index: dict[int, dict[str, str]],
    threshold: float,
) -> None:
  """Saves an image with visualized bounding boxes, labels, and masks.

  This function takes the output from Mask R-CNN, copies the original image,
  and applies visualizations of detection boxes, classes, and scores.
  If available, it also applies segmentation masks. The result is an image that
  juxtaposes the original with the annotated version, saved to the specified
  folder.

  Args:
    result: The output from theMask RCNN model, expected to contain detection
      boxes, classes, scores, reframed detection masks, etc.
    image: The original image as a numpy array.
    file_name: The filename for saving the output image.
    folder: The folder path where the output image will be saved.
    category_index: A dictionary mapping class IDs to class labels.
    threshold: Value between 0 and 1 to filter out the prediction results.
  """
  image_new = image.copy()
  viz_utils.visualize_boxes_and_labels_on_image_array(
      image_new,
      result['normalized_boxes'][0],
      (result['detection_classes'][0] + 0).astype(int),
      result['detection_scores'][0],
      category_index=category_index,
      use_normalized_coordinates=True,
      max_boxes_to_draw=100,
      min_score_thresh=threshold,
      agnostic_mode=False,
      instance_masks=result.get('detection_masks_reframed', None),
      line_thickness=4,
  )

  concatenated_image = np.concatenate((image, image_new), axis=1)
  concatenated_image = Image.fromarray(concatenated_image)
  concatenated_image.save(os.path.join(folder, file_name))


def dilated_largest_component(mask: np.ndarray) -> np.ndarray:
    """Extracts the largest connected component and fills holes.

    Args:
        mask: Input binary mask (2D numpy array).

    Returns:
        Binary mask of the largest connected component.
    """
    mask = mask.astype(np.uint8)*255
    num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(mask, connectivity=8)
    largest_label = 1 + np.argmax(stats[1:, cv2.CC_STAT_AREA])
    largest_component_mask = np.zeros(mask.shape, dtype="uint8")
    largest_component_mask[labels == largest_label] = 1
    largest_component_mask = ndimage.binary_fill_holes(largest_component_mask).astype(int)
    return largest_component_mask


def extract_properties(image, results, masks):
  """Extract properties of the mask.

  Args:
    image: Corresponding image of the mask.
    results: The detection results from the model.
    masks: The masks to extract properties from.

  Returns:
    The extracted properties.
  """
  list_of_df = []
  for mask in results[masks]:
    mask = np.where(mask, 1, 0)
    df = pd.DataFrame(
        skimage.measure.regionprops_table(mask, intensity_image=image, properties=_PROPERTIES)
    )
    list_of_df.append(df)
  features = pd.concat(list_of_df, ignore_index=True)
  features.rename(
      columns={
          'centroid-0': 'y',
          'centroid-1': 'x',
          'bbox-0': 'bbox_0',
          'bbox-1': 'bbox_1',
          'bbox-2': 'bbox_2',
          'bbox-3': 'bbox_3',
      },
      inplace=True,
  )
  return features


def get_image_creation_time(image_path):
  """
  Retrieves the creation time of an image, trying multiple methods.

  Args:
    image_path: The path to the image file.

  Returns:
    A string representing the creation time in the format "%Y-%m-%d %H:%M:%S" if
    found, otherwise returns "Creation time not found".
  """

  try:
    # 1. Try EXIF data (if available)
    image = Image.open(image_path)
    exif_data = image._getexif()
    if exif_data:
      datetime_tag_id = 36867  # Tag ID for "DateTimeOriginal"
      datetime_str = exif_data.get(datetime_tag_id)
      if datetime_str:
        datetime_obj = datetime.datetime.strptime(datetime_str, "%Y:%m:%d %H:%M:%S")
        return datetime_obj.strftime("%Y-%m-%d %H:%M:%S")

    # 2. Try file modification time (less accurate, but better than nothing)
    file_modified_time = os.path.getmtime(image_path)
    datetime_obj = datetime.datetime.fromtimestamp(file_modified_time)
    return datetime_obj.strftime("%Y-%m-%d %H:%M:%S")

  except FileNotFoundError:
    return "Image not found"
  except Exception as e:
    return f"Error: {e}"


def apply_tracking(df,
        search_range_x,
        search_range_y,
        memory):
  """Apply tracking to the dataframe.

  Args:
    df: The dataframe to apply tracking to.
    search_range_x: The search range of pixels for tracking along x axis.
    search_range_y: The search range of pixels for tracking along y axis.
    memory: The frames memory for tracking.

  Returns:
    The tracking result dataframe.
  """
  # Define the columns to link for tracking.
  # Additional features that can be used are 'area', 'label', 'color',
  # 'eccentricity', 'convex_area', 'mean_intensity-0', 'mean_intensity-1',
  # 'mean_intensity-2', 'max_intensity-0', 'max_intensity-1', 'max_intensity-2',
  # 'min_intensity-0',  'min_intensity-1', 'min_intensity-2'.
  tracking_columns = [
      'x',
      'y',
      'frame',
      'bbox_0',
      'bbox_1',
      'bbox_2',
      'bbox_3',
      'major_axis_length',
      'minor_axis_length',
      'perimeter',
  ]

  # Perform the tracking operation on the specified columns
  track_df = tp.link_df(df[tracking_columns], search_range=(search_range_y, search_range_x), memory=memory)

  # Copy the additional columns from the original dataframe
  additional_columns = [
      'source_name',
      'image_name',
      'detection_scores',
      'detection_classes_names',
      'detection_classes',
      'color',
      'creation_time'
  ]
  track_df[additional_columns] = df[additional_columns]

  track_df.drop(columns=['frame'], inplace=True)
  track_df.reset_index(drop=True, inplace=True)

  return track_df


def process_tracking_result(df):
    """Process the tracking result dataframe.

    Args:
      df: Dataframe to be aggregated.

    Returns:
      Processed dataframe.
    """
    # Get class information with the new include_groups parameter
    class_info = df.groupby('particle', as_index=False).apply(
        select_class_with_scores,
        include_groups=False
    )

    grouped = df.groupby('particle').agg({
        'source_name': 'first',
        'image_name': 'first',
        'detection_scores': 'max',
        'creation_time': 'first',
        'bbox_0': 'first',
        'bbox_1': 'first',
        'bbox_2': 'first',
        'bbox_3': 'first',
    }).reset_index()

    # Add class information
    grouped['detection_classes'] = class_info['class_id']
    grouped['detection_classes_names'] = class_info['class_name']

    return grouped

def select_class_with_scores(group):
    """
    Select class based on modal class, falling back to highest score for ties.
    Returns both class ID and class name.
    """
    # Get the value counts of classes
    class_counts = group['detection_classes'].value_counts()

    # If there's a clear winner (one mode), use it
    if len(class_counts) == 1 or class_counts.iloc[0] > class_counts.iloc[1]:
        class_id = group['detection_classes'].mode().iloc[0]
    else:
        # If there's a tie, look at highest score for each tied class
        tied_classes = class_counts[class_counts == class_counts.iloc[0]].index
        class_max_scores = {
            cls: group[group['detection_classes'] == cls]['detection_scores'].max()
            for cls in tied_classes
        }
        class_id = max(class_max_scores.items(), key=lambda x: x[1])[0]

    # Get corresponding class name
    class_name = group[group['detection_classes'] == class_id]['detection_classes_names'].iloc[0]
    return pd.Series({'class_id': class_id, 'class_name': class_name})


def resize_bbox(y1, x1, y2, x2, old_height, old_width, new_height, new_width):
    """Resize bounding box coordinates based on new image size.

    Args:
        y1, x1, y2, x2 (int/float): Original bounding box coordinates.
        old_height, old_width (int): Original image dimensions.
        new_height, new_width (int): New image dimensions.

    Returns:
        (new_y1, new_x1, new_y2, new_x2): Rescaled bounding box coordinates.
    """
    # Compute scale factors
    scale_x = new_width / old_width
    scale_y = new_height / old_height

    # Scale bounding box coordinates
    new_y1 = int(y1 * scale_y)
    new_x1 = int(x1 * scale_x)
    new_y2 = int(y2 * scale_y)
    new_x2 = int(x2 * scale_x)

    return new_y1, new_x1, new_y2, new_x2

## Import and load pre-trained models.

In [None]:
%%bash
wget https://storage.googleapis.com/tf_model_garden/vision/\
waste_identification_ml/Jan2025_ver2_merged_1024_1024.zip -q

unzip Jan2025_ver2_merged_1024_1024.zip > /dev/null 2>&1

In [None]:
detection_fn = load_model('Jan2025_ver2_merged_1024_1024/')

## Load label map data

Label maps correspond index numbers to category names, so that when our convolution network predicts 5, we know that this corresponds to airplane. Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine.

We will load our labels from the same repository that we loaded the TF Object Detection API from.

In [None]:
LABELS_PATH = (
    'models/official/projects/waste_identification_ml/pre_processing/'
    'config/data/45_labels.csv'
)

labels, category_index = load_labels(LABELS_PATH)

## Load all images

In [None]:
images_dir = "/mydrive/circularnet/TestData/input-test-05022025"
images = glob.glob(os.path.join(images_dir, "*"))

# Make sure that the files are sorted.
images = natsort.natsorted(images)
len(images)

In [None]:
# The model is trained on 1024 x 1024 image dimensions.
HEIGHT = 1024
WIDTH = 1024

# Object Tracking parameters.
# The distance an object can move along the x axis from one frame to another.
# The paramter was decided according to an image size of 300 x 300.
SEARCH_RANGE_X=150
# The distance an object can move along the y axis from one frame to another.
# The paramter was decided according to an image size of 300 x 300.
SEARCH_RANGE_Y=20
# No of frames you want to track.
MEMORY=1

# Dimensions for tracking images.
HEIGHT_TRACKING = 300
WIDTH_TRACKING = 300

# Prediction confidence score.
PREDICTION_THRESHOLD = 0.50
area_threshold = None

# Create a folder for saving prediction results.
os.makedirs('prediction_folder', exist_ok=True)
prediction_folder = os.path.join(os.getcwd(), 'prediction_folder')

# Create a folder to troubleshoot tracking results.
os.makedirs('tracking', exist_ok=True)

# Create a folder to save detected objects from Mask RCNN
# accpording to categories
output_dir = "cropped_objects"
os.makedirs(output_dir, exist_ok=True)

## Perform inference

In [None]:
tracking_images = {}
features_set = []


for frame, image_path in tqdm.tqdm(enumerate(images, start=1)):
  # Preprocess an image.
  original_image, resized_image = preprocess_image(image_path, HEIGHT, WIDTH)
  input_tensor = tf.convert_to_tensor(resized_image, dtype=tf.uint8)

  # Running inference with the model.
  result = perform_detection(detection_fn, input_tensor)

  if result["num_detections"][0]:
    scores = result["detection_scores"][0]
    filtered_indices = scores > PREDICTION_THRESHOLD
    result = filter_detection(result, filtered_indices)

  if result["num_detections"][0]:
    # Normalize the bounding boxes according to the resized image size.
    result["normalized_boxes"] = result["detection_boxes"].copy()
    result["normalized_boxes"][:, :, [0, 2]] /= HEIGHT
    result["normalized_boxes"][:, :, [1, 3]] /= WIDTH

    result['detection_boxes'] = result["detection_boxes"].round().astype(int)

    # Adjust the image size to ensure both dimensions are at least 1024
    # for saving images with bbx and masks.
    height_plot, width_plot = adjust_image_size(
        original_image.shape[0], original_image.shape[1], 1024
    )
    image_plot = cv2.resize(
        original_image,
        (width_plot, height_plot),
        interpolation=cv2.INTER_AREA,
    )
    # Reframe the masks to the new size.
    result["detection_masks_reframed"] = reframe_masks(
        result, "normalized_boxes", height_plot, width_plot
    )

    # Filter the prediction results based on the area threshold and
    # remove the overlapping masks.
    unique_indices = filter_masks(
        result["detection_masks_reframed"],
        iou_threshold=0.08,
        area_threshold=area_threshold,
    )
    result = filter_detection(result, unique_indices)

  if result["num_detections"][0]:
    result['detection_classes_names'] = np.array(
        [[str(labels[i-1]) for i in result['detection_classes'][0]]]
    )

    # Save the prediction results as an image file with bbx and masks.
    save_bbox_masks_labels(
        result,
        image_plot,
        os.path.basename(image_path),
        prediction_folder,
        category_index,
        PREDICTION_THRESHOLD,
    )

    # Create object tracking data.
    tracking_image = cv2.resize(
        original_image,
        (WIDTH_TRACKING, HEIGHT_TRACKING),
        interpolation=cv2.INTER_AREA,
    )
    tracking_images[os.path.basename(image_path)] = tracking_image

    # Reducing mask sizes in order to keep the memory required for object
    # tracking under a threshold.
    result["detection_masks_tracking"] = np.array([
        cv2.resize(
            i, (WIDTH_TRACKING, HEIGHT_TRACKING), interpolation=cv2.INTER_NEAREST
        )
        for i in result["detection_masks_reframed"]
    ])

    # In case of connected masks, keep the biggest mask and fill the holes
    # in case of incomplete detections by Mask RCNN.
    result["detection_masks_tracking"] = np.array([
        dilated_largest_component(i) for i in result["detection_masks_tracking"]
    ])

    # Crop objects from an image using masks for color detection.
    cropped_objects = [
        np.where(np.expand_dims(i, -1), image_plot, 0)
        for i in result['detection_masks_reframed']
    ]

    # Perform color detection using clustering approach.
    dominant_colors = [
          *map(
              color_and_property_extractor.find_dominant_color, cropped_objects
          )
    ]
    generic_color_names = color_and_property_extractor.get_generic_color_name(dominant_colors)

    # Extract features.
    features = extract_properties(
          tracking_image, result, "detection_masks_tracking"
      )
    features["source_name"] = os.path.basename(os.path.dirname(image_path))
    features["image_name"] = os.path.basename(image_path)
    features["creation_time"] = get_image_creation_time(image_path)
    features["frame"] = frame
    features["detection_scores"] = result["detection_scores"][0]
    features["detection_classes"] = result["detection_classes"][0]
    features["detection_classes_names"] = result["detection_classes_names"][0]
    features["color"] = generic_color_names
    features_set.append(features)


if features_set:
  features_df = pd.concat(features_set, ignore_index=True)
  tracking_features = apply_tracking(
      features_df,
      search_range_x=SEARCH_RANGE_X,
      search_range_y=SEARCH_RANGE_Y,
      memory=MEMORY
  )
  agg_features = process_tracking_result(tracking_features)
  counts = agg_features.groupby("detection_classes_names").size()
  counts.to_frame().to_csv(os.path.join(os.getcwd(), "count.csv"))
  print(counts)

## Visualize Object Tracking  

In [None]:
CIRCLE_RADIUS =7
font = cv2.FONT_HERSHEY_SIMPLEX
fontScale = 1
color = (255, 0, 0)
groups = tracking_features.groupby('image_name')

for name, group in groups:
  img = tracking_images[name].copy()
  for k in range(len(group)):
    cv2.circle(img,
               (int(group.iloc[k]['x']),int(group.iloc[k]['y'])),
               CIRCLE_RADIUS,
               (255,133,233),
               -1
    )
    cv2.putText(img,
                str(int(group.iloc[k]['particle'])),
                 (int(group.iloc[k]['x']), int(group.iloc[k]['y'])),
                font,
                fontScale,
                color,
                2,
                cv2.LINE_AA
    )

  cv2.imwrite(os.path.join('tracking',name), img)

## Visualize Predictions by Categories

In [None]:
if not agg_features.empty:
  for group_name, df in tqdm.tqdm(agg_features.groupby("detection_classes_names")):
    os.makedirs(f'{output_dir}/{group_name}', exist_ok=True)

    for row in df.itertuples(index=False):
      # Get the image
      image = cv2.imread(os.path.join(images_dir, row.image_name))
      image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
      new_h, new_w = image.shape[0], image.shape[1]

      # Get the bounding box and resize it
      y1, x1, y2, x2 = row.bbox_0, row.bbox_1, row.bbox_2, row.bbox_3
      new_bbox = resize_bbox(y1, x1, y2, x2, HEIGHT_TRACKING, WIDTH_TRACKING, new_h, new_w)

      # Include the score in the filename
      score = row.detection_scores if hasattr(row, 'detection_scores') else 0.0
      name = f'{os.path.splitext(row.image_name)[0]}_{row.particle}_{score:.2f}.png'

      # Save the cropped image
      cv2.imwrite(f'{output_dir}/{row.detection_classes_names}/{name}',
                 image[new_bbox[0]:new_bbox[2], new_bbox[1]:new_bbox[3]])


## Copying folders to my Google drive

In [None]:
destination_folder = '/mydrive/circularnet/TestModel'
os.makedirs(destination_folder, exist_ok=True)

# Function to safely copy directory, removing destination first if it exists
def copytree_replace(src, dst):
  if os.path.exists(dst):
    shutil.rmtree(dst)
  shutil.copytree(src, dst)

# Function to safely copy file, overwriting if it exists
def copy_replace(src, dst):
  if os.path.exists(dst):
    os.remove(dst)
  shutil.copy(src, dst)

copytree_replace(os.path.join(os.getcwd(), "prediction_folder"), os.path.join(destination_folder, "prediction_folder"))
copytree_replace(os.path.join(os.getcwd(), "cropped_objects"),os.path.join(destination_folder, "cropped_objects"))
copytree_replace(os.path.join(os.getcwd(), "tracking"),os.path.join(destination_folder, "tracking"))
copy_replace(os.path.join(os.getcwd(), "count.csv"),os.path.join(destination_folder, "count.csv"))