<a href="https://colab.research.google.com/github/ngminh124/traffic-sign-detection/blob/main/TrafficSignDetection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [56]:
from google.colab import files
from google.colab import drive
import os

# Gắn kết Google Drive trước để kiểm tra dữ liệu
try:
    drive.mount('/content/drive', force_remount=True)
except Exception as e:
    print(f"Lỗi khi gắn kết Google Drive: {e}")
    raise

# Kiểm tra xem dữ liệu có tồn tại trong Drive hay không
dataset_path = '/content/drive/MyDrive/road-sign-detection'

if not os.path.exists(dataset_path):
    print("Không tìm thấy dữ liệu trong Drive. Đang tải và giải nén...")

    # Tải lên khóa API Kaggle
    try:
        uploaded = files.upload()
        if 'kaggle.json' not in uploaded:
            raise FileNotFoundError("Vui lòng tải lên tệp kaggle.json.")
    except Exception as e:
        print(f"Lỗi khi tải lên khóa API Kaggle: {e}")
        raise

    # Thiết lập Kaggle
    !mkdir -p ~/.kaggle
    !cp kaggle.json ~/.kaggle/
    !chmod 600 ~/.kaggle/kaggle.json
    !kaggle --version

    # Tải dữ liệu từ Kaggle
    !kaggle datasets download -d andrewmvd/road-sign-detection -p /content/

    # Xóa các thư mục annotations và images nếu đã tồn tại
    !rm -rf /content/annotations /content/images

    # Giải nén dữ liệu
    !unzip /content/road-sign-detection.zip -d /content/

    # Sao chép dữ liệu vào Drive
    !mkdir -p /content/drive/MyDrive/road-sign-detection/
else:
    print("Dữ liệu đã có sẵn trong Drive. Không cần tải lại.")
if not os.path.exists('/content/images') or not os.path.exists('/content/annotations'):
    print("Không tìm thấy thư mục images hoặc annotations trong /content/. Đang sao chép từ Drive...")
    if os.path.exists(os.path.join(dataset_path, 'images')) and os.path.exists(os.path.join(dataset_path, 'annotations')):
        !cp -r /content/drive/MyDrive/road-sign-detection/images /content/
        !cp -r /content/drive/MyDrive/road-sign-detection/annotations /content/
        print("Đã sao chép dữ liệu từ Drive sang /content/.")
    else:
        print("Lỗi: Không tìm thấy thư mục images hoặc annotations trong Drive. Vui lòng kiểm tra lại.")
else:
    print("Thư mục images và annotations đã tồn tại trong /content/. Bỏ qua bước sao chép.")

Mounted at /content/drive
Dữ liệu đã có sẵn trong Drive. Không cần tải lại.
Thư mục images và annotations đã tồn tại trong /content/. Bỏ qua bước sao chép.


In [57]:
import time
import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xml.etree.ElementTree as ET

from skimage.transform import resize
from skimage import feature
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [58]:
# đọc 1 file xml cụ thể sử dụng ElementTree

xml_file = '/content/annotations/road0.xml'
tree = ET.parse(xml_file)
root = tree.getroot()
folder_name = root.find('folder').text
filename = root.find('filename').text
print (f'Folder: {folder_name}')
print(f'Filename: {filename}')

# đọc tất cả các object có trong thẻ object của file xml đó

for obj in root.findall('object'):
  classname=obj.find('name').text
  xmin=int(obj.find('bndbox/xmin').text)
  ymin=int(obj.find('bndbox/ymin').text)
  xmax=int(obj.find('bndbox/xmax').text)
  ymax=int(obj.find('bndbox/ymax').text)
print(f'Class name: {classname}')
print(f'Bounding box: {(xmin, ymin, xmax, ymax)}')

Folder: images
Filename: road0.png
Class name: trafficlight
Bounding box: (98, 62, 208, 232)


In [59]:
# đọc toàn bộ datasets
# đọc qua các file annotations bằng hàm listdr của module os
# đọc ảnh thông qua thư viện opencv
annotations_dir = '/content/annotations'
img_dir = '/content/images'
img_lst = []
label_lst = []
for xml_file in os.listdir (annotations_dir):

  xml_filepath = os.path.join(annotations_dir, xml_file)

  tree=ET.parse(xml_filepath)

  root=tree.getroot()

  folder = root.find('folder').text

  img_filename = root.find('filename').text

  img_filepath = os.path.join(img_dir, img_filename)

  img = cv2.imread(img_filepath)
  for obj in root.findall('object'):
      classname = obj.find('name').text
      if classname == 'trafficlight':
          continue
      xmin = int(obj.find('bndbox/xmin').text)
      ymin= int(obj.find('bndbox/ymin').text)
      xmax = int(obj.find('bndbox/xmax').text)
      ymax = int(obj.find('bndbox/ymax').text)
      object_img = img[ymin:ymax, xmin: xmax]
      img_lst.append(object_img)
      label_lst.append(classname)
print(f'Number of images in datasets: {len(img_lst)}')
print(f'Classes names: {list(set(label_lst))}')

Number of images in datasets: 1074
Classes names: ['crosswalk', 'stop', 'speedlimit']


In [60]:
print(f'original shape: {img_lst[0].shape}')

original shape: (38, 38, 3)


In [61]:
# chuyển shape sử dụng flatten của ảnh gốc ban đầu từ vector 3D thành vector 1D để phục vụ cho việc train theo model SVM
flattened_img =img_lst[0].flatten()
print(f'flattend shape: {flattened_img.shape}')

flattend shape: (4332,)


In [62]:
# sử dụng HOG thay vì flatten
def preprocess_img(img):
  if len(img.shape) > 2:
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
  img=img.astype(np.float32)
  resized_img = resize(
      img,
      output_shape=(32, 32),
      anti_aliasing=True
  )
  hog_feature = feature.hog(
      resized_img,
      orientations=9,
      pixels_per_cell=(8, 8),
      cells_per_block=(2, 2),
      transform_sqrt=True,
      block_norm="L2",
      feature_vector=True
  )
  return hog_feature.reshape(1,-1)

In [63]:
img_features_lst =[]
for img in img_lst:
  hog_feature = preprocess_img(img)
  img_features_lst.append(hog_feature)
img_features= np.array(img_features_lst)
print("X shape: ")
print(img_features.shape)

X shape: 
(1074, 1, 324)


In [64]:
# encode label thành dạng số để phục vụ cho việc train
label_encoder= LabelEncoder()
encoded_labels= label_encoder.fit_transform(label_lst)
print(label_encoder.classes_)
encoded_labels

['crosswalk' 'speedlimit' 'stop']


array([1, 0, 1, ..., 1, 1, 1])

In [65]:
random_state=0
test_size=0.3
is_shuffle= True
X_train, X_val, y_train, y_val = train_test_split(
    img_features, encoded_labels,
    test_size=test_size,
    random_state=random_state,
    shuffle=is_shuffle
)

In [68]:

# Sửa lỗi bằng cách flatten mỗi sample
X_train_fixed = []
for sample in X_train:
    if len(sample.shape) == 2:  # Nếu sample là 2D
        X_train_fixed.append(sample.flatten())  # Flatten thành 1D
    else:
        X_train_fixed.append(sample)

X_train_fixed = np.array(X_train_fixed)

# Tương tự cho X_val
X_val_fixed = []
for sample in X_val:
    if len(sample.shape) == 2:
        X_val_fixed.append(sample.flatten())
    else:
        X_val_fixed.append(sample)

X_val_fixed = np.array(X_val_fixed)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_fixed)
X_val = scaler.transform(X_val_fixed)

In [69]:
# train và đánh giá model
clf= SVC(
    kernel='rbf',
    random_state=random_state,
    probability=True,
    C=0.5

)
clf.fit(X_train, y_train)
y_pred=clf.predict(X_val)
score=accuracy_score(y_pred, y_val)
print(f'Evaluation results on val set')
print(f'Accurancy: {score}')

Evaluation results on val set
Accurancy: 0.9721362229102167


In [71]:
input_img = img_lst[1]
normalized_img = preprocess_img(input_img)

# Vì preprocess_img() đã trả về shape (1, n_features), không cần đặt trong []
y_pred = clf.predict(normalized_img)[0]
print(f'Normal prediction: {y_pred}')

y_pred_prob = clf.predict_proba(normalized_img)
prediction = np.argmax(y_pred_prob)
y_pred_prob = [f'{p:.10f}' for p in y_pred_prob[0]]
print(f'Probability of each class: {y_pred_prob}')
print(f'Class with highest probability: {prediction}')

Normal prediction: 1
Probability of each class: ['0.0000000076', '0.9999047308', '0.0000952616']
Class with highest probability: 1


In [72]:
# lấy danh sách các cửa sổ cắt được
def sliding_window(img, window_sizes, stride, scale_factor):
    img_height, img_width = img.shape[:2]
    windows = []
    for window_size in window_sizes:
        window_width, window_height = window_size
        for ymin in range(0, img_height - window_height + 1, stride):
            for xmin in range(0, img_width-window_width + 1, stride):
                xmax = xmin + window_width
                ymax = ymin + window_height
                windows.append([xmin, ymin, xmax, ymax])
    return windows

In [None]:
# scale ảnh để bắt được các object nhỏ so với kích thước ảnh
def pyramid(img, scale=0.8, min_size=(32, 32)):
    acc_scale = 1.0
    pyramid_imgs = [(img, acc_scale)]
    i = 0
    while True:
        acc_scale = acc_scale * scale
        h = int(img.shape[0] * acc_scale)
        w = int(img.shape[1] * acc_scale)
        if h<min_size[1] or w < min_size[0]:
            break
        img = cv2.resize(img, (w, h))
        pyramid_imgs.append((img, acc_scale * (scale ** i)))
        i += 1
    return pyramid_imgs

In [107]:
def visualize_bbox(img, bboxes, label_encoder):
    # Tạo bản copy để không thay đổi ảnh gốc
    img_viz = img.copy()
    # Chuyển sang RGB để vẽ (nếu ảnh đầu vào là BGR)
    img_viz = cv2.cvtColor(img_viz, cv2.COLOR_BGR2RGB)

    for box in bboxes:
        xmin, ymin, xmax, ymax, predict_id, conf_score = box
        # Vẽ bounding box
        cv2.rectangle(img_viz, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

        # Lấy tên class
        try:
            classname = label_encoder.inverse_transform([predict_id])[0]
        except:
            classname = str(predict_id)

        # Tạo label text
        label = f"{classname} {conf_score:.2f}"

        # Tính toán kích thước text
        (w, h), _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.6, 1)

        # Vẽ nền cho text
        cv2.rectangle(img_viz, (xmin, ymin - 20), (xmin + w, ymin), (0, 255, 0), -1)

        # Vẽ text
        cv2.putText(img_viz, label, (xmin, ymin - 5),
                   cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 0), 1)

    return img_viz

In [108]:
preprocessed_img = preprocess_img(object_img)
print(f"Shape of preprocessed_img: {preprocessed_img.shape}")

Shape of preprocessed_img: (1, 324)


In [109]:
# sử dụng pyramid images để có thể bắt được các biển báo với kích thước nhỏ so với ảnh gốc ban đầu
for pyramid_img_info in pyramid_imgs:
    pyramid_img, scale_factor = pyramid_img_info
    window_lst = sliding_window(
        pyramid_img,
        window_sizes=window_sizes,
        stride=stride,
        scale_factor=scale_factor
    )
    for window in window_lst:
      xmin, ymin, xmax, ymax = window
      object_img = pyramid_img[ymin:ymax, xmin:xmax]
      preprocessed_img = preprocess_img(object_img)  # Trả về (1, n_features)

      # Không cần đặt trong [] vì đã là 2D
      normalized_img = scaler.transform(preprocessed_img)[0]
      decision = clf.predict_proba(preprocessed_img)[0]

      if np.all(decision < conf_threshold):
          continue
      else:
          predict_id = np.argmax(decision)
          conf_score = decision[predict_id]
          xmin = int(xmin / scale_factor)
          ymin = int(ymin / scale_factor)
          xmax = int(xmax / scale_factor)
          ymax = int(ymax / scale_factor)
          bboxes.append([xmin, ymin, xmax, ymax, predict_id, conf_score])

In [110]:
# tính chỉ số iou để loại bỏ các bounding box bị trùng lặp với 1 biển báo
def iou_bbox(box1, box2):
    b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3]
    b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3]
#Step 1: Tính diện tích phần giao nhau
    x1 = max(b1_x1, b2_x1)
    y1 = max(b1_y1, b2_y1)
    x2 = min(b1_x2, b2_x2)
    y2 = min(b1_y2, b2_y2)
    inter = max((x2-x1), 0)*max((y2-y1), 0)
#Step 2: Tính diện tích phần hợp nhau
    box1_area = abs((b1_x2 - b1_x1) * (b1_y2 - b1_y1))
    box2_area = abs((b2_x2 - b2_x1) * (b2_y2 - b2_y1))
    union = float(box1_area + box2_area - inter)
    iou=inter / union
    return iou

In [111]:
def compute_iou(box, boxes, area, areas):
    xx1 = np.maximum(box[0], boxes[:, 0])
    yy1 = np.maximum(box[1], boxes[:, 1])
    xx2 = np.minimum(box[2], boxes[:, 2])
    yy2 = np.minimum(box[3], boxes[:, 3])

    w = np.maximum(0, xx2 - xx1)
    h = np.maximum(0, yy2 - yy1)

    inter = w * h
    union = area + areas - inter

    return inter / union

In [112]:
# sử dụng thuật toán non maximun suppression nhằm loại bỏ các bounding box có độ tin cậy thấp và hoặc bị trùng lặp dựa trên ngương iou đã được tính
def nms (bboxes, iou_threshold):
  if not bboxes:
    return []
  scores = np.array([bbox[5] for bbox in bboxes])
  sorted_indices = np.argsort(scores) [::-1]
  xmin = np.array([bbox[0] for bbox in bboxes])
  ymin = np.array([bbox[1] for bbox in bboxes])
  xmax = np.array([bbox[2] for bbox in bboxes])
  ymax = np.array([bbox[3] for bbox in bboxes])
  areas=(xmax - xmin + 1) * (ymax - ymin + 1)
  keep = []
  while sorted_indices.size > 0:
    i=sorted_indices[0]
    keep.append(i)
    iou=compute_iou(
      [xmin[i], ymin[i], xmax[i], ymax[i]],
      np.array(
        [
          xmin [sorted_indices [1:]],
          ymin [sorted_indices [1:]],
          xmax [sorted_indices [1:]],
          ymax [sorted_indices [1:]]]
      ).T,
      areas[i],
      areas[sorted_indices[1:]]
    )
    idx_to_keep=np.where(iou <= iou_threshold)[0]
    sorted_indices=sorted_indices [idx_to_keep + 1]
  return [bboxes[i] for i in keep]

In [119]:
import os
import time
import cv2
import numpy as np

# Tạo thư mục output nếu chưa có
output_dir = '/content/output'
os.makedirs(output_dir, exist_ok=True)

# Configuration
img_dir = '/content/images'
img_filename_lst = os.listdir(img_dir)[:20]
conf_threshold = 0.95
iou_threshold = 0.5
stride = 12
window_sizes = [
    (32, 32),
    (64, 64),
    (128, 128)
]

# Process each image
for img_filename in img_filename_lst:
    start_time = time.time()
    img_filepath = os.path.join(img_dir, img_filename)
    print(f"Processing: {img_filepath}")

    bboxes = []
    img = cv2.imread(img_filepath)
    if img is None:
        print(f"Could not read image: {img_filename}")
        continue

    pyramid_imgs = pyramid(img)

    for pyramid_img_info in pyramid_imgs:
        pyramid_img, scale_factor = pyramid_img_info
        window_lst = sliding_window(
            pyramid_img,
            window_sizes=window_sizes,
            stride=stride,
            scale_factor=scale_factor
        )

        for window in window_lst:
            xmin, ymin, xmax, ymax = window
            object_img = pyramid_img[ymin:ymax, xmin:xmax]

            if object_img.size == 0:
                continue  # Bỏ qua nếu ảnh nhỏ hơn window

            preprocessed_img = preprocess_img(object_img)
            normalized_img = scaler.transform(preprocessed_img)
            decision = clf.predict_proba(normalized_img)[0]

            if np.all(decision < conf_threshold):
                continue
            else:
                predict_id = np.argmax(decision)
                conf_score = decision[predict_id]

                # Scale coordinates back to original image
                xmin = int(xmin / scale_factor)
                ymin = int(ymin / scale_factor)
                xmax = int(xmax / scale_factor)
                ymax = int(ymax / scale_factor)

                bboxes.append(
                    (xmin, ymin, xmax, ymax, predict_id, conf_score)
                )

    # Thông báo số bbox trước khi NMS
    print(f"[INFO] Before NMS: {len(bboxes)} bboxes found in {img_filename}")

    # Apply Non-Maximum Suppression
    bboxes = nms(bboxes, iou_threshold)
    print(f"[INFO] After NMS: {len(bboxes)} bboxes kept")

    # Visualize và lưu ảnh nếu có bbox
    if len(bboxes) > 0:
        img_with_bbox = visualize_bbox(img, bboxes, label_encoder)
        output_path = os.path.join(output_dir, img_filename)
        # Chuyển màu về BGR trước khi lưu
        cv2.imwrite(output_path, cv2.cvtColor(img_with_bbox, cv2.COLOR_RGB2BGR))
        print(f"[SAVED] Result saved to {output_path}")
    else:
        print(f"[SKIPPED] No object detected in {img_filename}")

    print('Time process:', time.time() - start_time)


Processing: /content/images/road176.png
[INFO] Before NMS: 0 bboxes found in road176.png
[INFO] After NMS: 0 bboxes kept
[SKIPPED] No object detected in road176.png
Time process: 2.5587360858917236
Processing: /content/images/road263.png
[INFO] Before NMS: 2 bboxes found in road263.png
[INFO] After NMS: 1 bboxes kept
[SAVED] Result saved to /content/output/road263.png
Time process: 2.602519989013672
Processing: /content/images/road753.png
[INFO] Before NMS: 2 bboxes found in road753.png
[INFO] After NMS: 2 bboxes kept
[SAVED] Result saved to /content/output/road753.png
Time process: 3.285290002822876
Processing: /content/images/road373.png
[INFO] Before NMS: 0 bboxes found in road373.png
[INFO] After NMS: 0 bboxes kept
[SKIPPED] No object detected in road373.png
Time process: 2.7092678546905518
Processing: /content/images/road45.png
[INFO] Before NMS: 0 bboxes found in road45.png
[INFO] After NMS: 0 bboxes kept
[SKIPPED] No object detected in road45.png
Time process: 2.273534059524536
