# Car & Human Segmentation | Computer Vision Final Project

Segment cars and humans in a given picture.

***Group***
- Laurids Radtke
- Giorgia Iacobellis
- Thiago Costa


In [None]:
!pip install ultralytics==8.0.196

Collecting ultralytics==8.0.196
  Downloading ultralytics-8.0.196-py3-none-any.whl (631 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/631.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/631.1 kB[0m [31m2.1 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m624.6/631.1 kB[0m [31m10.7 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m631.1/631.1 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
Collecting thop>=0.1.1 (from ultralytics==8.0.196)
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: thop, ultralytics
Successfully installed thop-0.1.1.post2209072238 ultralytics-8.0.196


In [None]:
import os
import random
import matplotlib.pyplot as plt
from ultralytics import YOLO
import numpy as np
import cv2
import plotly.express as px
from plotly import subplots
from tqdm.notebook import tqdm
import pandas as pd
from PIL import Image
import tensorflow as tf
import seaborn as sns
from tabulate import tabulate

%load_ext tensorboard

### Introduction

In this project we are performing object segmentation on images. The goal is to detect humans and cars in various images. For the segmentation tasks we are using several pretrained models. For the baseline model we are using a simple ... model. For our high performance model we are using YOLO v?. After inference we evaluate the model predictions on the ... dataset. Afterwards we are comparing the performances of the two models to determine how much better the high performance model is compared to the baseline.

### Strategy

- download the dataset
- preprocessing of the dataset (more detailed)
- baseline model for comparison
- high performance model
- evaluation each model
- comparison of models to define the best one




#### Evaluation

In [None]:
def yolo_evaluation(path):
  model = YOLO(path)
  metrics = model.val()
  print(metrics)
  return metrics.results_dict

def results_table(names, data):
  # Add an additional column for dictionary names
  columns = ['Trainings'] + list(data[0].keys())

  df = pd.DataFrame(columns=columns)

  # Populate the DataFrame with dictionary values
  for i, d in enumerate(data):
      df.loc[i] = [names[i]] + list(d.values())

  # Display the DataFrame using tabulate for a prettier format
  table = tabulate(df, headers='keys', tablefmt='pretty', showindex=False)

  print(table)

### Preparation

In [None]:

def crop_images(path="/content/data/"):
  for file in os.listdir(path):
    if file.endswith(".jpg"):
      print(f'saving {file}')
      cropcrop(f'{path}{file}', file)

def cropcrop(path, filename):
  img = cv2.imread(path)  # Use cv2.imread to read the image with OpenCV
  img1 = img[:, :256]  # Crop the left half of the image
  img2 = img[:, 256:512]  # Crop the right half of the image

  cv2.imwrite(f'./data/images/{filename}', img1)
  cv2.imwrite(f'./data/labels/{filename}', img2)

In [None]:
crop_images()

saving 5.jpg
saving 2.jpg
saving 3.jpg
saving 11.jpg
saving 9.jpg
saving 6.jpg
saving 7.jpg
saving 8.jpg
saving 4.jpg
saving 10.jpg
saving 1.jpg
saving 12.jpg


### Baseline Model

In [None]:
# baseline = yolo_evaluation('yolov8n-seg.pt')


In [None]:
def path_extraction(path="/content/data/images/"):
  image_paths = []
  for root, dirs, files in os.walk(path):
    for file in files:
      if file.lower().endswith('.jpg'):
        image_paths.append(os.path.join(root, file))
  return image_paths

image_paths = path_extraction()

In [None]:
def prediction(model, images, model_name, pred_args):
  """
  Take model and images and predict segmentation on images.
  Return predicted segmentation as array of masks.
  """
  if model_name == "YOLO":
    results = model.predict(images)
    mask_list = []
    for result in results:
      mask = result.masks.numpy()
      mask_list.append(mask)

    for r in results:
        im_array = r.plot()  # plot a BGR numpy array of predictions
        im = Image.fromarray(im_array[..., ::-1])  # RGB PIL image
        im.show()  # show image
        im.save('results.jpg')  # save image

  else:
    mask_list = []
    for image in images:
      mask = model.predict(image, args=pred_args)
      masks.append(mask)
    masks = np.array(masks)

  return mask_list

model = YOLO("yolov8n-seg.pt")
model_name = "YOLO"
masks = prediction(model, image_paths, "YOLO",[])



0: 640x640 3 cars, 1 bus, 1: 640x640 6 cars, 1 potted plant, 2: 640x640 2 persons, 7 cars, 3: 640x640 2 cars, 4: 640x640 1 bicycle, 2 cars, 4 buss, 2 trucks, 1 clock, 5: 640x640 2 cars, 1 bus, 6: 640x640 5 cars, 1 bus, 7: 640x640 2 cars, 3 buss, 1 truck, 1 traffic light, 8: 640x640 4 cars, 1 bus, 1 traffic light, 9: 640x640 1 car, 2 buss, 10: 640x640 1 car, 1 bus, 11: 640x640 1 person, 1 car, 5582.2ms
Speed: 6.9ms preprocess, 465.2ms inference, 21.3ms postprocess per image at shape (1, 3, 640, 640)


In [None]:
for x in masks:
  print(x.data.shape)

(4, 640, 640)
(7, 640, 640)
(9, 640, 640)
(2, 640, 640)
(10, 640, 640)
(3, 640, 640)
(6, 640, 640)
(7, 640, 640)
(6, 640, 640)
(3, 640, 640)
(2, 640, 640)
(2, 640, 640)


In [None]:
def evaluation(masks, labels):
  """
  Take predicted masks and ground truth masks and calculate different evaluation scores like overlap.
  Return the scores in a list.
  """
  scores = []
  for pred_masks, gt_masks in zip(masks, labels):
    sc1 = compute_overlap(pred_masks, gt_masks)
    sc2 = compute_MAP(pred_masks, gt_masks, thresh=0.5)
    sc3 = compute_MAP(pred_masks, gt_masks, thresh=0.75)
    score = (sc1, sc2, sc3)
    scores.append(score)
  return np.array(scores)

In [None]:
def compute_overlap(pred_masks, gt_masks):
  return 1

In [None]:
def compute_MAP(pred_masks, gt_masks, thresh=0):
  return 1

In [None]:
def table(model_scores): # {model1: [(1,2,3), (3,2,1)], model2: [(1,2,2), (3,2,5)]}
  print(model_scores)
  """
  Take a list of scores from different models and create a table for an overview over the performance.
  Return the table or plot it.
  """
  # Add an additional column for dictionary names
  columns = ['Trainings'] + list(data[0].keys())

  df = pd.DataFrame(columns=columns)

  # Populate the DataFrame with dictionary values
  for i, d in enumerate(data):
      df.loc[i] = [names[i]] + list(d.values())

  # Display the DataFrame using tabulate for a prettier format
  table = tabulate(df, headers='keys', tablefmt='pretty', showindex=False)

  print(table)
  return table

....

### High Performance Model - Yolo V8

In [None]:
def segment_img_yolo(path):
  model = YOLO("yolov8m-seg.pt")
  img = cv2.imread(path)

  yolo_classes = list(model.names.values())
  classes_ids = [yolo_classes.index(clas) for clas in yolo_classes]

  conf = 0.5

  results = model.predict(img, conf=conf)

  colors = [random.choices(range(256), k=3) for _ in classes_ids]

  colors[0] = (0,  0, 142) # person
  colors[2] = (220, 20, 60) # car

  for result in results:
      for mask, box in zip(result.masks.xy, result.boxes):
          points = np.int32([mask])
          color_number = classes_ids.index(int(box.cls[0]))
          cv2.fillPoly(img, points, colors[color_number])

  plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
  plt.axis('off')
  plt.show()


...

### Validation & Comparison

In [None]:
%tensorboard --logdir

....