<a href="https://colab.research.google.com/github/tessamitchell/SchoolBusStopSignDetection/blob/main/StopSignDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports and Data Setup

All imports used in the code are here

In [None]:
!pip install roboflow
from roboflow import Roboflow
from google.colab import userdata

from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics
from skimage.feature import local_binary_pattern
from skimage.feature import haar_like_feature
from skimage.feature import hog

import numpy as np
import cv2 as cv
from matplotlib import pyplot as plt
import xml.etree.ElementTree as ET
import joblib
import os
from google.colab.patches import cv2_imshow # cv.imshow doesn't work in colab

Download/Import Dataset
Code copied from Roboflow's dataset download instructions

[Link to Dataset](https://app.roboflow.com/myworkspace-hr4qa/stop-signs-custom-umjoj/8)

In [None]:
rf = Roboflow(api_key=userdata.get('roboflow'))
project = rf.workspace("myworkspace-hr4qa").project("stop-signs-custom-umjoj")
version = project.version(8)
dataset = version.download("voc")

Function to load positive and negative windows.
The base code for the XML parsing came from [ChatGPT](https://chatgpt.com/share/692615ef-8958-8010-adfd-24ddd028c3e9)

In [None]:

def load_positive_windows(xml_path):
    pos_windows=[]
    neg_windows=[]

    for xml in os.listdir(xml_path):
      # get xml annotation file
      if xml.endswith(".xml"):
        # get image file by replacing .xml with .jpg (should have same name)
        img_name=xml.replace('.xml','.jpg')
        # get full file path
        img_path = os.path.join(xml_path, img_name)
        if not os.path.exists(img_path):
            continue  # img file for xml file doesn't exist
        # read image
        img=cv.imread(img_path)
        # parse xml for image
        tree = ET.parse(os.path.join(xml_path, xml))
        root = tree.getroot()

        boxes = []
        for obj in root.findall("object"):
            bbox = obj.find("bndbox")
            # get corners
            xmin = int(bbox.find("xmin").text) -1 # windows were cutting off left and top part of img
            ymin = int(bbox.find("ymin").text) -1
            xmax = int(bbox.find("xmax").text)
            ymax = int(bbox.find("ymax").text)
            # slice image to get window
            pw=img[ymin:ymax,xmin:xmax]
            pw=cv.resize(pw,(64,64))
            if obj.find("name").text == "stop sign":
              pos_windows.append(pw)
            else: # text should be none so negative window
              neg_windows.append(pw)

    return pos_windows,neg_windows

Call function to load the windows

In [None]:
xml_path = "/content/Stop-Signs-Custom-8/train/"
pos_images,neg_images=load_positive_windows(xml_path)

# Feature extraction

Functions for extracting features

Recommended values taken from examples in documentation

[local_binary_pattern() documentation](https://scikit-image.org/docs/0.25.x/api/skimage.feature.html#skimage.feature.local_binary_pattern)

[local_binary_pattern() example](https://scikit-image.org/docs/0.25.x/auto_examples/features_detection/plot_local_binary_pattern.html)

[hog() documentation](https://scikit-image.org/docs/0.25.x/api/skimage.feature.html#skimage.feature.hog)


In [None]:
# extract lbp features with set values
radius=3
n_points=8*radius
METHOD = 'ror'
def extract_lbp(img):
  res=local_binary_pattern(img,n_points,radius,METHOD)
  return res.flatten()

# extract hog features with set values
def extract_hog(img):
  feature=hog(img,orientations=9, pixels_per_cell=(16, 16),cells_per_block=(1, 1),visualize=False)
  return feature

# extract both
def extract_features(img):
  return np.concatenate([extract_lbp(img),extract_hog(img)])

Actual extraction step (and some preprocessing)

In [None]:
# replace images with their red color channel
pos_images = [img[:,:,2] for img in pos_images]
neg_images = [img[:,:,2] for img in neg_images]

# get features from all windows and put in array
X_features = [extract_features(w) for w in pos_images] + \
             [extract_features(w) for w in neg_images]

# store labels for each extracted feature set
y_labels   = [1] * len(pos_images) + [0] * len(neg_images)


In [None]:
# run this cell to save extracted features and labels (to train model)
joblib.dump(X_features, "adaboost_features5.pkl")
joblib.dump(y_labels,"adaboost_labels5.pkl")

# Training (Adaboost Cascade)


In [None]:
# run this cell to load previously extracted features (if not in same runtime)
X_features=joblib.load("adaboost_features.pkl")
y_labels=joblib.load("adaboost_labels.pkl")

The actual training of the model off of 70% of the data, with the other 30% used for testing

[AdaBoost Documentation](https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.AdaBoostClassifier.html)

[AdaBoost Example/Tutorial](https://www.datacamp.com/tutorial/adaboost-classifier-python) -- format of code copied from here


In [None]:
# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X_features, y_labels, test_size=0.3,random_state=42) # 70% training and 30% test

# Create adaboost classifer object
abc = AdaBoostClassifier(n_estimators=50,
                         learning_rate=1)
# Train Adaboost Classifer
model = abc.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = model.predict(X_test)

# Model Accuracy, how often is the classifier correct?
from sklearn.metrics import accuracy_score
print("Accuracy:",accuracy_score(y_test, y_pred))
# Accuracy for Model 5: 0.9655172413793104

In [None]:
# run this cell to store the trained model
joblib.dump(model, "adaboost_model5.pkl")

In [None]:
# run this cell to load a previously trained model
model=joblib.load("adaboost_model5.pkl")

# Light Detection

Methods used for detecting the lights in the image.  First the preprocessing and then the actual circle detection and pruning of the results

[medianBlur documentation](https://docs.opencv.org/4.x/dc/dd3/tutorial_gausian_median_blur_bilateral_filter.html)

[normalize documentation](https://docs.opencv.org/4.x/d2/de8/group__core__array.html#ga87eef7ee3970f86906d69a92cbf064bd)

[HoughCircles documentation](https://docs.opencv.org/4.x/dd/d1a/group__imgproc__feature.html\#ga47849c3be0d0406ad3ca45db65a25d2d)


In [None]:
def preprocess(img):
  # hough works better on smoothed image
  blurred=cv.medianBlur(img,7)
  # print(f"min:{blurred.min()} max:{blurred.max()}")
  # cv2_imshow(blurred)
  # subtract mean from image
  avg=int(blurred.mean())
  med = (blurred.astype(np.float32)- avg)
  med[med < 0] = 0
  med=np.clip(med,0,255).astype(np.uint8)
  # normalize new img
  med=cv.normalize(med,None,0,255,cv.NORM_MINMAX)
  # cv2_imshow(med)

  return med

# documentation https://docs.opencv.org/4.x/dd/d1a/group__imgproc__feature.html#ga47849c3be0d0406ad3ca45db65a25d2d
def houghAndValidation(img,orig):
  # make copies to process and draw found circles on
  img=img.copy()
  orig=orig.copy()
  img=preprocess(img)
  h,w=img.shape
  # circle radius needs to be at least 1/4 the size of the stop sign and at most 1/10 the stop sign
  minR=int(img.shape[1]/10)
  maxR=int(img.shape[1]/4)
  # print(maxR)
  # get circles
  circles = cv.HoughCircles(img,cv.HOUGH_GRADIENT,1.5,30,
                              param1=50,param2=20,minRadius=minR,maxRadius=maxR)
  if circles is None:
    return [],0
  # print(circles)
  circles = np.uint16(np.around(circles))
  # print(len(circles))
  # validation
  count=0
  # prune results
  for c in circles[0]:
    # print(f"{type(c[0])} {type(c[1])} {type(c[2])}")
    # get edge pixels
    right_edge=c[0]+c[2]
    left_edge=c[0]-c[2]
    top_edge=c[1]-c[2]
    bottom_edge=c[1]+c[2]
    # if in center half of image and in either top or bottom half of image
    if((right_edge<(w * (3/4)) and (left_edge>(w//4)))  and ((bottom_edge<(h/2)) or (top_edge>(h/2)))):
      # draw circle and increase count
      cv.circle(orig, (c[0], c[1]), c[2], (0, 255, 0), 1)
      count+=1
    # use these lines to draw circles found that were pruned
    # else:
    #     cv.circle(orig, (c[0], c[1]), c[2], (255, 0, 0), 1)
  # cv2_imshow(orig)
  return orig,count

# Testing

Calls the methods above to test array of images for first stop sign and then lights if positive for stop sign

In [None]:
# takes array of images and sorts them into images with stop signs and without stop signs, and also returns an array with all the images with lights in them with the lights circled
def test(imgs):
  pos_images=[]
  lights=[]
  neg_images=[]
  for img in imgs:
    # cv2_imshow(img)
    # process image
    img=cv.resize(img,(64,64))
    # cv2_imshow(img)
    red=img[:,:,2]
    # cv2_imshow(red)
    #extract features
    feat=extract_features(red).reshape(1,-1)
    # if contains stop sign
    if model.predict(feat)==1:
        print("found")
        # get lights
        hw,count=houghAndValidation(red,img)
        if count>0: # lights were found
          print("STOP")
          lights.append(hw)
          pos_images.append(hw)
        else: # append img either way
          pos_images.append(img)
    else: # add img to negatives
        neg_images.append(img)
  return pos_images,lights,neg_images

Calling testing method on array of all windows extracted from dataset.

In [None]:
# test overall extracted windows
test_images=np.concatenate([pos_images,neg_images])
# test_images=pos_images[:20]

pos,lights,neg=test(test_images)
print(len(pos))
print(len(neg))
print(len(lights))