# ***圖片準確率測試集 貨櫃編號偵測***

In [7]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from package.function import *
import easyocr,cv2,re

# 初始化 easyOCR
reader = easyocr.Reader(['en'], gpu=False)

def remove_non_alphanumeric_and_uppercase(s):
    # 使用正則表達式過濾掉非英文和數字的字符
    cleaned_str = re.sub(r'[^a-zA-Z0-9]', '', s)
    # 將剩餘的字串轉換為大寫
    return cleaned_str.upper()

def visualize_detections(results, filepath):
    for i, result in enumerate(results):
        img = result.orig_img
        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        plt.figure(figsize=(12, 8))
        plt.imshow(img_rgb)
        ax = plt.gca()

        for box in result.boxes.data.cpu():
            x1, y1, x2, y2 = box[:4]
            width, height = x2 - x1, y2 - y1

            rect = Rectangle((x1, y1), width, height, linewidth=2, edgecolor='red', facecolor='none')
            ax.add_patch(rect)

            crop_img = img_rgb[int(y1):int(y2), int(x1):int(x2)]

            try:
                result_ocr = reader.readtext(crop_img)
                result_ocr = [remove_non_alphanumeric_and_uppercase(res[1]) for res in result_ocr]
                if len(result_ocr) > 0:
                    recognized_text = result_ocr[0]
                    if len(recognized_text) > 11:
                        recognized_text = recognized_text[:11]
                else:
                    recognized_text = 'No Text'
            except Exception as e:
                print("OCR failed:", e)
                recognized_text = 'OCR Error'

            ax.text(x1, y2, f'{recognized_text}', color='yellow', fontsize=12, bbox=dict(facecolor='blue', alpha=0.5))

        plt.axis('off')
        plt.savefig(filepath[i])
        plt.close()


Using CPU. Note: This module is much faster with a GPU.


In [8]:
# 圖片預測
from ultralytics import YOLO
# 用最佳model進行物件偵測
model = YOLO("best_model/YOLO.pt")

folderpath = '資料集/圖片準確率測試集/'
filenames = find_filename(folder_path=folderpath,File_extension='jpg')[0]
file = [folderpath+filename for filename in filenames]
results = model.predict(source=file, mode='predict')


visualize_detections(results,[folderpath+'object_yolo/'+filename for filename in filenames])


0: 416x416 1 container, 4.8ms
1: 416x416 2 containers, 4.8ms
2: 416x416 (no detections), 4.8ms
3: 416x416 1 container, 4.8ms
4: 416x416 1 container, 4.8ms
5: 416x416 1 container, 4.8ms
6: 416x416 1 container, 4.8ms
7: 416x416 1 container, 4.8ms
8: 416x416 1 container, 4.8ms
9: 416x416 1 container, 4.8ms
10: 416x416 1 container, 4.8ms
11: 416x416 1 container, 4.8ms
12: 416x416 1 container, 4.8ms
13: 416x416 1 container, 4.8ms
14: 416x416 1 container, 4.8ms
15: 416x416 2 containers, 4.8ms
16: 416x416 1 container, 4.8ms
17: 416x416 (no detections), 4.8ms
18: 416x416 1 container, 4.8ms
19: 416x416 1 container, 4.8ms
20: 416x416 1 container, 4.8ms
21: 416x416 1 container, 4.8ms
22: 416x416 2 containers, 4.8ms
23: 416x416 1 container, 4.8ms
24: 416x416 1 container, 4.8ms
25: 416x416 2 containers, 4.8ms
26: 416x416 1 container, 4.8ms
27: 416x416 1 container, 4.8ms
28: 416x416 2 containers, 4.8ms
29: 416x416 2 containers, 4.8ms
30: 416x416 1 container, 4.8ms
31: 416x416 1 container, 4.8ms
32:

# ***影片 貨櫃編號偵測***

In [9]:
import cv2,re
import easyocr

reader = easyocr.Reader(['en'], gpu=False)

def remove_non_alphanumeric_and_uppercase(s):
    # 使用正則表達式過濾掉非英文和數字的字符
    cleaned_str = re.sub(r'[^a-zA-Z0-9]', '', s)
    # 將剩餘的字串轉換為大寫
    return cleaned_str.upper()

def process_video(video_path,save_path, model):
    cap = cv2.VideoCapture(video_path)
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    out = cv2.VideoWriter(save_path, fourcc, fps, (width, height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        img_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        
        results = model.predict([img_rgb], mode='predict')
        
        output_frame = draw_detections(frame, results)
        out.write(output_frame)

    cap.release()
    out.release()
    cv2.destroyAllWindows()

def draw_detections(frame, results):
    if results:
        for box in results[0].boxes.data:
            x1, y1, x2, y2 = box[:4].int().tolist()

            cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)

            crop_img = frame[y1:y2, x1:x2]

            try:
                result_ocr = reader.readtext(crop_img)
                result_ocr = remove_non_alphanumeric_and_uppercase(result_ocr)
                recognized_text = ' '.join([res[1] for res in result_ocr])
            except Exception as e:
                print("OCR failed:", e)
                recognized_text = 'OCR Error'

            cv2.putText(frame, recognized_text, (x1, y2 + 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 255, 0), 2)
    
    return frame

Using CPU. Note: This module is much faster with a GPU.


In [10]:
# 用最佳模型路径初始化 YOLO 模型
model = YOLO("best_model/YOLO.pt")
filenames = find_filename(folder_path='資料集/影片資料集/',File_extension='avi')[0]
for filename in filenames:
    # 影片路徑
    video_path = f'資料集/影片資料集/{filename}'

    save_path = f'資料集/影片資料集/output_yolo/{filename}'
    # 處理影片
    process_video(video_path,save_path, model)


0: 256x416 (no detections), 96.2ms
Speed: 2.0ms preprocess, 96.2ms inference, 2.3ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 16.4ms
Speed: 2.5ms preprocess, 16.4ms inference, 0.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 15.3ms
Speed: 1.0ms preprocess, 15.3ms inference, 1.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 13.4ms
Speed: 1.2ms preprocess, 13.4ms inference, 2.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 14.7ms
Speed: 2.0ms preprocess, 14.7ms inference, 1.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 13.0ms
Speed: 1.0ms preprocess, 13.0ms inference, 2.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 13.3ms
Speed: 1.0ms preprocess, 13.3ms inference, 2.0ms postprocess per image at shape (1, 3, 256, 416)

0: 256x416 (no detections), 14.6ms
Speed: 2.5ms preprocess, 14.6ms i

# ***評估 測試集性能***

In [11]:
# model性能評估
from ultralytics import YOLO
from package.function import *
# 用最佳model進行物件偵測
model = YOLO("runs/detect/train/weights/best.pt")

folderpath = '資料集/貨櫃資料集/測試集/'
filenames = find_filename(folder_path=folderpath,File_extension='jpg')[0]
file = [folderpath+filename for filename in filenames]
results = model.predict(source=file, mode='predict')




  return F.conv2d(input, weight, bias, self.stride,


0: 416x416 1 container, 2.6ms
1: 416x416 2 containers, 2.6ms
2: 416x416 2 containers, 2.6ms
3: 416x416 2 containers, 2.6ms
4: 416x416 1 container, 2.6ms
5: 416x416 (no detections), 2.6ms
6: 416x416 (no detections), 2.6ms
7: 416x416 (no detections), 2.6ms
8: 416x416 (no detections), 2.6ms
9: 416x416 1 container, 2.6ms
10: 416x416 1 container, 2.6ms
11: 416x416 1 container, 2.6ms
12: 416x416 1 container, 2.6ms
13: 416x416 1 container, 2.6ms
14: 416x416 1 container, 2.6ms
15: 416x416 1 container, 2.6ms
16: 416x416 1 container, 2.6ms
17: 416x416 1 container, 2.6ms
18: 416x416 1 container, 2.6ms
19: 416x416 1 container, 2.6ms
20: 416x416 1 container, 2.6ms
21: 416x416 1 container, 2.6ms
22: 416x416 1 container, 2.6ms
23: 416x416 (no detections), 2.6ms
24: 416x416 (no detections), 2.6ms
25: 416x416 1 container, 2.6ms
26: 416x416 1 container, 2.6ms
27: 416x416 1 container, 2.6ms
28: 416x416 (no detections), 2.6ms
29: 416x416 1 container, 2.6ms
30: 416x416 1 container, 2.6ms
31: 416x416 1 cont

In [12]:
results

[ultralytics.engine.results.Results object with attributes:
 
 boxes: ultralytics.engine.results.Boxes object
 keypoints: None
 masks: None
 names: {0: 'container'}
 obb: None
 orig_img: array([[[106, 107, 103],
         [106, 107, 103],
         [106, 107, 103],
         ...,
         [ 45,  34,  26],
         [ 65,  56,  47],
         [ 72,  63,  54]],
 
        [[106, 107, 103],
         [106, 107, 103],
         [106, 107, 103],
         ...,
         [ 46,  35,  27],
         [ 65,  56,  47],
         [ 72,  63,  54]],
 
        [[106, 107, 103],
         [106, 107, 103],
         [106, 107, 103],
         ...,
         [ 46,  35,  27],
         [ 66,  57,  48],
         [ 72,  63,  54]],
 
        ...,
 
        [[  0, 231, 253],
         [  0, 231, 253],
         [  0, 231, 253],
         ...,
         [ 72, 169, 197],
         [ 72, 169, 197],
         [ 72, 169, 197]],
 
        [[  0, 231, 252],
         [  0, 231, 252],
         [  0, 231, 252],
         ...,
         [ 71, 

In [13]:
from package.function import *
predictions = predictions_to_coco_json(results)

xml_path = '資料集/貨櫃資料集/測試集_xml/'
ground_truths = xmls_to_coco_json(xml_path)

In [14]:
metric = compute_metrics(predictions, ground_truths)

creating index...
index created!
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.10s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.476
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.779
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.572
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.387
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.521
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.536
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.541
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.541
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 

In [15]:
metric

{'mAP50': 0.7786285892336484,
 'mAP50-95': 0.23516055877991432,
 'Precision': 0.47648893712884866,
 'Recall': 0.540662251655629,
 'F1-Score': 0.5065512078788267}