## Download Dataset

In [1]:
# https://drive.google.com/file/d/1kdypEb7Q0NFSg0Ct7PeG0akVlnYxlhZp/view?usp=drive_link
!gdown 1kdypEb7Q0NFSg0Ct7PeG0akVlnYxlhZp
# !gdown 1sX9qmlPY1SXwboz9qOzjyLrkzCwYoT31

Downloading...
From (original): https://drive.google.com/uc?id=1kdypEb7Q0NFSg0Ct7PeG0akVlnYxlhZp
From (redirected): https://drive.google.com/uc?id=1kdypEb7Q0NFSg0Ct7PeG0akVlnYxlhZp&confirm=t&uuid=0e50e7af-d97b-41c6-9fd9-87e24e18893b
To: /content/car_object_detection.zip
100% 118M/118M [00:01<00:00, 71.1MB/s]


In [2]:
# !unzip cars_yolo_data.zip
!unzip car_object_detection.zip

Archive:  car_object_detection.zip
  inflating: data/sample_submission.csv  
  inflating: data/testing_images/vid_5_25100.jpg  
  inflating: data/testing_images/vid_5_25120.jpg  
  inflating: data/testing_images/vid_5_25140.jpg  
  inflating: data/testing_images/vid_5_25160.jpg  
  inflating: data/testing_images/vid_5_25180.jpg  
  inflating: data/testing_images/vid_5_25200.jpg  
  inflating: data/testing_images/vid_5_25220.jpg  
  inflating: data/testing_images/vid_5_25240.jpg  
  inflating: data/testing_images/vid_5_25260.jpg  
  inflating: data/testing_images/vid_5_26320.jpg  
  inflating: data/testing_images/vid_5_26400.jpg  
  inflating: data/testing_images/vid_5_26420.jpg  
  inflating: data/testing_images/vid_5_26560.jpg  
  inflating: data/testing_images/vid_5_26580.jpg  
  inflating: data/testing_images/vid_5_26600.jpg  
  inflating: data/testing_images/vid_5_26620.jpg  
  inflating: data/testing_images/vid_5_26640.jpg  
  inflating: data/testing_images/vid_5_26660.jpg  
  inf

## Import Libraries

In [3]:
%pip install ultralytics
import ultralytics

ultralytics.checks()

Ultralytics YOLOv8.2.2 🚀 Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 29.2/78.2 GB disk)


In [4]:
import os
import shutil
import yaml
import numpy as np
import cv2
import pandas as pd
import json
import matplotlib.pyplot as plt
from ultralytics import YOLO
from PIL import Image
from sklearn.model_selection import train_test_split

## Prepare YOLOv8 Data

In [5]:
train_data_dir = '/content/data/training_images'
label_filepath = '/content/data/train_solution_bounding_boxes (1).csv'
df = pd.read_csv(label_filepath)
df

Unnamed: 0,image,xmin,ymin,xmax,ymax
0,vid_4_1000.jpg,281.259045,187.035071,327.727931,223.225547
1,vid_4_10000.jpg,15.163531,187.035071,120.329957,236.430180
2,vid_4_10040.jpg,239.192475,176.764801,361.968162,236.430180
3,vid_4_10020.jpg,496.483358,172.363256,630.020260,231.539575
4,vid_4_10060.jpg,16.630970,186.546010,132.558611,238.386422
...,...,...,...,...,...
554,vid_4_9860.jpg,0.000000,198.321729,49.235251,236.223284
555,vid_4_9880.jpg,329.876184,156.482351,536.664239,250.497895
556,vid_4_9900.jpg,0.000000,168.295823,141.797524,239.176652
557,vid_4_9960.jpg,487.428988,172.233646,616.917699,228.839864


In [6]:
pil_im = Image.open('/content/data/testing_images/vid_5_25100.jpg')
im_width, im_height = pil_im.size

df['class'] = 0
df.rename(columns={'image': 'im_name'}, inplace=True)

df['x_center'] = (df['xmin'] + df['xmax']) / 2
df['y_center'] = (df['ymin'] + df['ymax']) / 2
df['bbox_w'] = df['xmax'] - df['xmin']
df['bbox_h'] = df['ymax'] - df['ymin']

df['x_center'] = df['x_center'] / im_width
df['y_center']= df['y_center'] / im_height
df['bbox_w'] = df['bbox_w'] / im_width
df['bbox_h'] = df['bbox_h'] / im_height

class_labels = ['car']
yolo_df = df[['im_name', 'class', 'x_center', 'y_center', 'bbox_w', 'bbox_h']]
yolo_df

Unnamed: 0,im_name,class,x_center,y_center,bbox_w,bbox_h
0,vid_4_1000.jpg,0,0.450434,0.539817,0.068741,0.095238
1,vid_4_10000.jpg,0,0.100217,0.557191,0.155572,0.129987
2,vid_4_10040.jpg,0,0.444645,0.543678,0.181621,0.157014
3,vid_4_10020.jpg,0,0.833213,0.531451,0.197540,0.155727
4,vid_4_10060.jpg,0,0.110347,0.559122,0.171491,0.136422
...,...,...,...,...,...,...
554,vid_4_9860.jpg,0,0.036417,0.571770,0.072833,0.099741
555,vid_4_9880.jpg,0,0.640932,0.535500,0.305899,0.247409
556,vid_4_9900.jpg,0,0.104880,0.536148,0.209760,0.186528
557,vid_4_9960.jpg,0,0.816824,0.527728,0.191551,0.148964


## Split Data

In [7]:
seed = 99

train_data, val_data = train_test_split(
    yolo_df,
    test_size=0.05,
    random_state=seed,
    shuffle=True
)

## Save Data

In [8]:
def save_data(df, src_im_dir, save_dir):
    os.makedirs(save_dir, exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'images'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'labels'), exist_ok=True)

    yolo_labels = []
    for i, row in df.iterrows():
        im_name = row['im_name']
        class_id = row['class']
        x_center = row['x_center']
        y_center = row['y_center']
        bbox_w = row['bbox_w']
        bbox_h = row['bbox_h']

        im_path = os.path.join(src_im_dir, im_name)

        # copy image to images folder
        shutil.copy(
            os.path.join(src_im_dir, im_path),
            os.path.join(save_dir, 'images')
        )

        im_name = os.path.splitext(im_name)[0]
        label = f'{class_id} {x_center} {y_center} {bbox_w} {bbox_h}'
        yolo_labels.append((im_name, label))

    for im_name, label in yolo_labels:
        with open(os.path.join(save_dir, 'labels', f'{im_name}.txt'), 'a') as f:
            f.write(f'{label}\n')


In [9]:
save_yolo_data_dir = 'yolo_data'
os.makedirs(save_yolo_data_dir, exist_ok=True)
save_train_dir = os.path.join(save_yolo_data_dir, 'train')
save_val_dir = os.path.join(save_yolo_data_dir, 'val')

save_data(train_data, train_data_dir, save_train_dir)
save_data(val_data, train_data_dir, save_val_dir)

## Create YAML File

In [10]:
data_yaml = {
    'path': '/content/yolo_data',
    'train': 'train/images',
    'test': 'test/images',
    'val': 'val/images',
    'nc': 1,
    'names': class_labels
}

yolo_yaml_path = os.path.join(
    save_yolo_data_dir,
    'data.yml'
)
with open(yolo_yaml_path, 'w') as f:
    yaml.dump(data_yaml, f, default_flow_style=False)

## Train Model

#### Load model

In [11]:
yolo_yaml_path = 'yolo_data/data.yml'

# Build from YAML and transfer weights
model = YOLO('yolov8s.yaml').load('yolov8s.pt')

Downloading https://github.com/ultralytics/assets/releases/download/v8.2.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:00<00:00, 112MB/s]


Transferred 355/355 items from pretrained weights


#### Train model

In [12]:
epochs = 15
imgsz = 640
batch_size = 8
patience = 5
lr = 5e-4

results = model.train(
    data=yolo_yaml_path,
    epochs=epochs,
    imgsz=imgsz,
    batch=batch_size,
    lr0=lr,
    patience=patience,
    project='models',
    name='yolov8/detect/train'
)

Ultralytics YOLOv8.2.2 🚀 Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.yaml, data=yolo_data/data.yml, epochs=15, time=None, patience=5, batch=8, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=models, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, 

100%|██████████| 755k/755k [00:00<00:00, 19.9MB/s]


Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       928  ultralytics.nn.modules.conv.Conv             [3, 32, 3, 2]                 
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     29056  ultralytics.nn.modules.block.C2f             [64, 64, 1, True]             
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  2    788480  ultralytics.nn.modules.block.C2f             [256, 256, 2, True]           
  7                  -1  1   1180672  ultralytics

100%|██████████| 6.23M/6.23M [00:00<00:00, 67.1MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /content/yolo_data/train/labels... 340 images, 0 backgrounds, 0 corrupt: 100%|██████████| 340/340 [00:00<00:00, 2090.54it/s]

[34m[1mtrain: [0mNew cache created: /content/yolo_data/train/labels.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  self.pid = os.fork()
[34m[1mval: [0mScanning /content/yolo_data/val/labels... 27 images, 0 backgrounds, 0 corrupt: 100%|██████████| 27/27 [00:00<00:00, 1219.48it/s]

[34m[1mval: [0mNew cache created: /content/yolo_data/val/labels.cache





Plotting labels to models/yolov8/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.0005' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mmodels/yolov8/detect/train[0m
Starting training for 15 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/15      2.46G      1.491      1.937      1.172         14        640: 100%|██████████| 43/43 [00:11<00:00,  3.68it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:01<00:00,  1.25it/s]

                   all         27         28        0.4      0.524      0.366      0.244






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/15      2.36G      1.341     0.9758      1.154         12        640: 100%|██████████| 43/43 [00:08<00:00,  4.98it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  3.68it/s]


                   all         27         28      0.597      0.821      0.694      0.459

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/15      2.35G      1.355      1.023      1.213          8        640: 100%|██████████| 43/43 [00:07<00:00,  5.55it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.65it/s]

                   all         27         28       0.55      0.893      0.579      0.351






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/15      2.37G      1.391     0.8681      1.217          7        640: 100%|██████████| 43/43 [00:08<00:00,  4.80it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  7.86it/s]

                   all         27         28       0.52      0.891        0.6        0.4






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/15      2.37G      1.312      0.761      1.151         12        640: 100%|██████████| 43/43 [00:09<00:00,  4.75it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  6.73it/s]

                   all         27         28      0.581      0.821      0.678       0.42





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01), CLAHE(p=0.01, clip_limit=(1, 4.0), tile_grid_size=(8, 8))


  self.pid = os.fork()



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/15      2.37G      1.323       0.81      1.198          7        640: 100%|██████████| 43/43 [00:11<00:00,  3.81it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  8.49it/s]

                   all         27         28       0.52      0.929      0.634      0.397






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/15      2.35G      1.284     0.7216      1.189          8        640: 100%|██████████| 43/43 [00:07<00:00,  5.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  4.06it/s]

                   all         27         28      0.511      0.897      0.621      0.435
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 5 epochs. Best results observed at epoch 2, best model saved as best.pt.
To update EarlyStopping(patience=5) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






7 epochs completed in 0.022 hours.
Optimizer stripped from models/yolov8/detect/train/weights/last.pt, 22.5MB
Optimizer stripped from models/yolov8/detect/train/weights/best.pt, 22.5MB

Validating models/yolov8/detect/train/weights/best.pt...
Ultralytics YOLOv8.2.2 🚀 Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8s summary (fused): 168 layers, 11125971 parameters, 0 gradients, 28.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:00<00:00,  5.10it/s]


                   all         27         28      0.596      0.821       0.69      0.456
Speed: 0.3ms preprocess, 4.4ms inference, 0.0ms loss, 1.5ms postprocess per image
Results saved to [1mmodels/yolov8/detect/train[0m


## Evaluation


In [13]:
model_path = 'models/yolov8/detect/train/weights/best.pt'
model = YOLO(model_path)

metrics = model.val(
    project='models',
    name='yolov8/detect/val'
)

Ultralytics YOLOv8.2.2 🚀 Python-3.10.12 torch-2.2.1+cu121 CUDA:0 (Tesla T4, 15102MiB)
YOLOv8s summary (fused): 168 layers, 11125971 parameters, 0 gradients, 28.4 GFLOPs


[34m[1mval: [0mScanning /content/yolo_data/val/labels.cache... 27 images, 0 backgrounds, 0 corrupt: 100%|██████████| 27/27 [00:00<?, ?it/s]
  self.pid = os.fork()
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 2/2 [00:01<00:00,  1.41it/s]


                   all         27         28      0.597      0.821      0.695      0.455
Speed: 0.2ms preprocess, 27.1ms inference, 0.0ms loss, 11.4ms postprocess per image
Results saved to [1mmodels/yolov8/detect/val[0m


## Inference


In [15]:
test_im_path = '/content/yolo_data/val/images/vid_4_13880.jpg'
pred = model.predict(test_im_path, save=True)


image 1/1 /content/yolo_data/val/images/vid_4_13880.jpg: 384x640 2 cars, 170.1ms
Speed: 2.6ms preprocess, 170.1ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)
Results saved to [1mruns/detect/predict[0m


In [16]:
json.loads(pred[0].tojson())

[{'name': 'car',
  'class': 0,
  'confidence': 0.81937,
  'box': {'x1': 392.80798, 'y1': 180.61198, 'x2': 506.00787, 'y2': 230.14555}},
 {'name': 'car',
  'class': 0,
  'confidence': 0.81334,
  'box': {'x1': 66.97375, 'y1': 195.61768, 'x2': 218.34196, 'y2': 249.10812}}]

In [17]:
ytb_url = 'https://www.youtube.com/watch?v=Gr_eUDukd-0'
model.predict(ytb_url, save=True)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
0: 384x640 1 car, 17.2ms
0: 384x640 1 car, 10.9ms
0: 384x640 2 cars, 12.5ms
0: 384x640 2 cars, 12.3ms
0: 384x640 2 cars, 10.9ms
0: 384x640 2 cars, 38.9ms
0: 384x640 1 car, 12.1ms
0: 384x640 1 car, 10.9ms
0: 384x640 1 car, 16.0ms
0: 384x640 1 car, 13.5ms
0: 384x640 1 car, 10.8ms
0: 384x640 1 car, 10.8ms
0: 384x640 2 cars, 11.5ms
0: 384x640 1 car, 11.0ms
0: 384x640 1 car, 10.9ms
0: 384x640 1 car, 10.8ms
0: 384x640 1 car, 23.3ms
0: 384x640 1 car, 38.7ms
0: 384x640 2 cars, 10.8ms
0: 384x640 2 cars, 11.2ms
0: 384x640 2 cars, 12.2ms
0: 384x640 1 car, 16.7ms
0: 384x640 1 car, 15.5ms
0: 384x640 1 car, 20.1ms
0: 384x640 1 car, 14.9ms
0: 384x640 2 cars, 12.6ms
0: 384x640 1 car, 10.9ms
0: 384x640 1 car, 10.8ms
0: 384x640 1 car, 10.9ms
0: 384x640 1 car, 11.3ms
0: 384x640 1 car, 10.8ms
0: 384x640 (no detections), 24.8ms
0: 384x640 (no detections), 13.5ms
0: 384x640 (no detections), 12.0ms
0: 384x640 (no detections), 11.3ms
0: 384x640 

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'car'}
 obb: None
 orig_img: array([[[ 30,  56,  34],
         [ 31,  57,  35],
         [ 36,  62,  40],
         ...,
         [ 15,  23,   6],
         [ 15,  23,   6],
         [ 15,  23,   6]],
 
        [[ 30,  56,  34],
         [ 33,  59,  37],
         [ 36,  62,  40],
         ...,
         [ 15,  23,   6],
         [ 15,  23,   6],
         [ 15,  23,   6]],
 
        [[ 31,  57,  35],
         [ 33,  59,  37],
         [ 35,  61,  39],
         ...,
         [ 15,  23,   6],
         [ 15,  23,   6],
         [ 15,  23,   6]],
 
        ...,
 
        [[ 97, 106,  55],
         [106, 115,  64],
         [111, 120,  69],
         ...,
         [  5,   0,   0],
         [  3,   0,   0],
         [  3,   0,   0]],
 
        [[ 63,  72,  21],
         [ 94, 103,  52],
         [111, 120,  69],
         ...,
         [  4,   0,  

In [18]:
def visualize_bbox(im_path, preds, conf_thres=0.8, font=cv2.FONT_HERSHEY_SIMPLEX):
    im = cv2.imread(im_path)
    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
    h, w = im.shape[:2]

    for pred in preds:
        conf_score = pred['confidence']
        if conf_score < conf_thres:
            continue

        bbox = pred['box']
        xmin = int(bbox['x1'])
        xmax = int(bbox['x2'])
        ymin = int(bbox['y1'])
        ymax = int(bbox['y2'])

        # draw a green rectangle around the object with thickness of 3
        cv2.rectangle(im, (xmin, ymin), (xmax, ymax), (0, 255, 0), 3)

        text = f'{conf_score:.2f}'
        (text_width, text_height), _ = cv2.getTextSize(text, font, 1, 2)

        # draw green rectangle wrapping around the class text
        cv2.rectangle(im, (xmin, ymin - text_height - 5), (xmin + text_width, ymin), (0, 255, 0), -1)
        cv2.putText(im, text, (xmin, ymin - 5), font, 1, (0, 0, 0), 2)

    return im


In [19]:
test_im_dir = '/content/data/testing_images'
conf_thres = 0.75

for im_name in os.listdir(test_im_dir):
    im_path = os.path.join(test_im_dir, im_name)

    results = model(im_path, verbose=False)
    preds = json.loads(results[0].tojson())
    visualized_im = visualize_bbox(im_path, preds, conf_thres)

    %matplotlib inline
    plt.imshow(visualized_im)
    plt.axis('off')
    plt.show()

Output hidden; open in https://colab.research.google.com to view.