# ‚Å†Autonomous Object Detection System
Problem Statement
Object detection is crucial for autonomous systems such as self-driving cars and surveillance.
This project builds a real-time object detection system using deep learning.


Imports

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
import cv2
import os
import kagglehub
import numpy as np
import matplotlib.pyplot as plt
import random

GPU Check

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Shit we will be using
- Framework: Tensorflow
- Model: SSD MobileNet
- Dataset: COCO128

Kaggle Creds

In [None]:
from google.colab import userdata

os.environ["KAGGLE_KEY"] = userdata.get('KAGGLE_KEY')
os.environ["KAGGLE_KEY"] = userdata.get('KAGGLE_USERNAME')

Defining the classes by COCO

In [None]:
COCO_CLASSES = [
    'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat',
    'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat',
    'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
    'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
    'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
    'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
    'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair',
    'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse',
    'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator',
    'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]

Loading the dataset

In [None]:
path = kagglehub.dataset_download("ultralytics/coco128")

print(path)
print(os.listdir(path))

In [None]:
IMG_DIR = os.path.join(path, "coco128","images", "train2017")
LABEL_DIR = os.path.join(path, "coco128","labels","train2017")

In [None]:
print("images: ",len(os.listdir(IMG_DIR)))
print("labels: ",len(os.listdir(LABEL_DIR)))

Loading the image and label in sorted pairs

In [None]:
image_files = sorted(os.listdir(IMG_DIR))
label_files = sorted(os.listdir(LABEL_DIR))

Loading an image and drawing bounding boxes around it

In [None]:
def load_image_label(index):
  # picking nth image
  image_name = image_files[index]
  label_name = label_files[index]

  # loading the image
  img_path = os.path.join(IMG_DIR, image_name)
  img = cv2.imread(img_path)

  if img is None:
    print(f"no image at {img_path}")
    return None, None, None, None, None,

  height, width, channels = img.shape

  # reading the label file
  with open(os.path.join(LABEL_DIR, label_name), 'r') as f:
    labels = f.readlines()

  return image_name, img, labels, height, width


In [None]:
def label_to_bb(label, width, height):
  parts_string = label.split()
  parts = [float(x) for x in parts_string]

  # convert label data to data for bb in pixels
  class_id = int(parts[0])
  x_center = float(parts[1]) * width
  y_center = float(parts[2])*height
  box_width = float(parts[3])*width
  box_height = float(parts[4])*height

  # convert center coord to corner coord to draw bb
  # top left
  x1 = x_center-(box_width/2)
  y1 = y_center-(box_height/2)
  # bottom right
  x2 = x_center+(box_width/2)
  y2 = y_center+(box_height/2)

  return class_id, int(x1), int(y1), int(x2), int(y2)

In [None]:
def draw_bb(index):
  image_name, img, labels, height, width = load_image_label(index)

  img_bb = img.copy()

  for label in labels:
    class_id, x1, y1, x2, y2 = label_to_bb(label, width, height)
    class_name = COCO_CLASSES[class_id]

    # random color for the bb
    color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))

    img_bb = cv2.rectangle(img, (x1,y1), (x2,y2), color, 2)
    img_bb = cv2.putText(img_bb, class_name, (x1,y1-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)

  plt.figure(figsize=(12, 8))
  plt.imshow(cv2.cvtColor(img_bb, cv2.COLOR_BGR2RGB))
  plt.axis('off')
  plt.title(f"{image_name} - {len(labels)} objects")
  plt.show()

  return img_bb


Preprocessing Image and Label

In [None]:
def preprocess_image(img, size=416):
  resize_img = cv2.resize(img, (size, size),interpolation=cv2.INTER_LINEAR)
  normalized_img = resize_img / 255.0

  return normalized_img


In [None]:
def preprocess_labels(labels):
    processed = []
    for label in labels:
        parts = label.strip().split()
        # class_id, x_center, y_center, width, height
        parsed = [float(parts[0])] + [float(x) for x in parts[1:]]
        processed.append(parsed)

    return processed


Dataset Creation

In [None]:
def create_dataset(size=416):
  preprocessed_images = []
  preprocessed_labels = []

  print(f"processing {len(image_files)} images...")

  for i in range(len(image_files)):
    image_name, img, labels, height, width = load_image_label(i)

    preprocessed_img = preprocess_image(img, size)
    processed_labels = preprocess_labels(labels)

    preprocessed_images.append(preprocessed_img)
    preprocessed_labels.append(processed_labels)

  print(f"Total images: {len(preprocessed_images)}")
  print(f"Total labels: {len(preprocessed_labels)}")

  return preprocessed_images, preprocessed_labels

In [None]:
def train_val_split(images,labels,train_ratio=0.8):
  split_index = int(len(images)*train_ratio)

  train_images = images[:split_index]
  train_labels = images[:split_index]
  val_images = images[split_index+1:]
  val_labels = labels[split_index:]

  return train_images, train_labels, val_images, val_labels

Building the model

In [None]:
backbone = keras.applications.MobileNetV2(
    input_shape = (416,416,3),
    include_top=False,
    weights='imagenet',
)

backbone.trainable = False

In [None]:
inputs = keras.Input(shape=(416,416,3))

layers = keras.layers

x = backbone(inputs, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.3)(x)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.3)(x)

outputs = layers.Dense(85, activation='sigmoid')(x)

model = keras.Model(inputs=inputs, outputs=outputs)

In [None]:
model.summary()