##### EDA

In [None]:
import os
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import cv2
import random
from collections import defaultdict
import pandas as pd
import numpy as np

In [None]:
def show_img(img_path, xml_path):
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    h, w, _ = img.shape
    print("Image Height:", h)
    print("Image Width:", w)
    
    tree = ET.parse(xml_path)
    root = tree.getroot()

    for obj in root.findall('object'):
        name = obj.find('name').text
        bndbox = obj.find('bndbox')
        xmin = int(bndbox.find('xmin').text)
        ymin = int(bndbox.find('ymin').text)
        xmax = int(bndbox.find('xmax').text)
        ymax = int(bndbox.find('ymax').text)

        cv2.rectangle(img, (xmin, ymin), (xmax, ymax), (255, 0, 0), 2)
        cv2.putText(img, name, (xmin, ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 0, 0), 2)

    plt.figure(figsize=(10, 8))
    plt.imshow(img)
    plt.axis('off')
    plt.show()

In [None]:
show_img("/kaggle/input/yellow-sticky-traps-vip/yellow-sticky-traps-dataset-main/images/1000.jpg", 
          "/kaggle/input/yellow-sticky-traps-vip/yellow-sticky-traps-dataset-main/annotations/1000.xml")

In [None]:

# Đường dẫn dữ liệu
IMG_DIR = "/kaggle/input/yellow-sticky-traps-vip/yellow-sticky-traps-dataset-main/images"
ANN_DIR = "/kaggle/input/yellow-sticky-traps-vip/yellow-sticky-traps-dataset-main/annotations"

# Khởi tạo biến thống kê
label_counts = defaultdict(int)
object_per_image = []
box_dims = []

# Duyệt qua toàn bộ file XML
ann_files = sorted(os.listdir(ANN_DIR))
img_files = sorted(os.listdir(IMG_DIR))

for ann_file in ann_files:
    tree = ET.parse(os.path.join(ANN_DIR, ann_file))
    root = tree.getroot()
    
    objects = root.findall("object")
    object_per_image.append(len(objects))

    for obj in objects:
        label = obj.find("name").text
        label_counts[label] += 1

        bbox = obj.find("bndbox")
        xmin = int(bbox.find("xmin").text)
        ymin = int(bbox.find("ymin").text)
        xmax = int(bbox.find("xmax").text)
        ymax = int(bbox.find("ymax").text)

        width = xmax - xmin
        height = ymax - ymin
        box_dims.append((width, height))

# Chuyển về DataFrame để dễ phân tích
box_df = pd.DataFrame(box_dims, columns=["width", "height"])
box_df["area"] = box_df["width"] * box_df["height"]
box_df["aspect_ratio"] = box_df["width"] / box_df["height"]

# Tổng quan
print(f"Tổng số ảnh: {len(img_files)}")
print(f"Tổng số annotation: {len(ann_files)}")
print(f"Các nhãn: {list(label_counts.keys())}")
print(f"Số object trung bình/ảnh: {sum(object_per_image)/len(object_per_image):.2f}")

# Vẽ phân phối số object mỗi ảnh
plt.figure(figsize=(8, 4))
plt.hist(object_per_image, bins=20, color='skyblue', edgecolor='black')
plt.title("Phân phối số object mỗi ảnh")
plt.xlabel("Số object")
plt.ylabel("Số ảnh")
plt.show()

In [None]:

# Tạo biểu đồ tròn phân phối classes
plt.figure(figsize=(10, 8))

# Chuyển đổi đếm label thành danh sách và sắp xếp theo số lượng (giảm dần)
labels = list(label_counts.keys())
counts = [label_counts[label] for label in labels]

# Sắp xếp theo số lượng giảm dần
sorted_data = sorted(zip(labels, counts), key=lambda x: x[1], reverse=True)
labels = [item[0] for item in sorted_data]
counts = [item[1] for item in sorted_data]

# Tính phần trăm
total = sum(counts)
percentages = [(count/total)*100 for count in counts]

# Tạo một palette màu đẹp 
colors = plt.cm.viridis(np.linspace(0, 1, len(labels)))

# Vẽ biểu đồ tròn với phần trăm
plt.pie(counts, labels=None, autopct='%1.1f%%', startangle=90, 
        colors=colors, shadow=False, explode=[0.05]*len(labels))

# Thêm title
plt.title('Phân phối các loại côn trùng (classes)', fontsize=16)

# Thêm chú thích riêng với tên class và số lượng
legend_labels = [f'{label} ({count}, {percentage:.1f}%)' for label, count, percentage in zip(labels, counts, percentages)]
plt.legend(legend_labels, loc='best', bbox_to_anchor=(1, 0.5), fontsize=10)

plt.axis('equal')  # Đảm bảo biểu đồ tròn là hình tròn
plt.tight_layout()  # Đảm bảo không bị cắt
plt.show()

# In thêm thông tin tổng hợp
print(f"Tổng số đối tượng: {total}")
print(f"Số lượng classes: {len(labels)}")
print("\nPhân phối chi tiết:")
for label, count, percentage in zip(labels, counts, percentages):
    print(f"{label}: {count} ({percentage:.1f}%)")

In [None]:

# Tính min, max, và peak cho width
widths = box_df["width"]
w_min = widths.min()
w_max = widths.max()
w_counts, w_bins = np.histogram(widths, bins=30)
w_peak_idx = np.argmax(w_counts)
w_peak_value = (w_bins[w_peak_idx] + w_bins[w_peak_idx + 1]) / 2

# Tính min, max, và peak cho height
heights = box_df["height"]
h_min = heights.min()
h_max = heights.max()
h_counts, h_bins = np.histogram(heights, bins=30)
h_peak_idx = np.argmax(h_counts)
h_peak_value = (h_bins[h_peak_idx] + h_bins[h_peak_idx + 1]) / 2

# Vẽ histogram
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.hist(widths, bins=30, color='salmon')
plt.title(f"Chiều rộng bbox\nMin: {w_min:.2f}, Max: {w_max:.2f}, Peak: {w_peak_value:.2f}")

plt.subplot(1, 2, 2)
plt.hist(heights, bins=30, color='seagreen')
plt.title(f"Chiều cao bbox\nMin: {h_min:.2f}, Max: {h_max:.2f}, Peak: {h_peak_value:.2f}")

plt.tight_layout()
plt.show()


## Crop Tiling

In [None]:
import os
import json
import glob
import shutil
import xml.etree.ElementTree as ET
from PIL import Image, ImageDraw
import random
import gc
import matplotlib.pyplot as plt

In [None]:
import torch
import numpy as np
from torchvision.ops import nms

In [None]:
SRC_IMG_DIR = '/kaggle/input/yellow-sticky-traps-vip/yellow-sticky-traps-dataset-main/images'
SRC_ANN_DIR = '/kaggle/input/yellow-sticky-traps-vip/yellow-sticky-traps-dataset-main/annotations'
OUT_DIR = '/kaggle/working/insects_tiles'
CLASSES = ['WF', 'MR', 'NC']

TILE_SIZE = 1280
OVERLAP = 100
TRAIN_SPLIT = 0.9  # 90% for training, 10% for validation

# Create output directories
os.makedirs(f'{OUT_DIR}/images/train', exist_ok=True)
os.makedirs(f'{OUT_DIR}/images/val', exist_ok=True)
os.makedirs(f'{OUT_DIR}/labels/train', exist_ok=True)
os.makedirs(f'{OUT_DIR}/labels/val', exist_ok=True)
tile_map = {}

class_map = {cls: i for i, cls in enumerate(CLASSES)}

def convert_bbox_to_tile(xmin, ymin, xmax, ymax, tile_x, tile_y, img_w, img_h):
    """Convert original bounding box coordinates to tile coordinates"""
    x_overlap = max(0, min(xmax, tile_x + TILE_SIZE) - max(xmin, tile_x))
    y_overlap = max(0, min(ymax, tile_y + TILE_SIZE) - max(ymin, tile_y))
    
    if x_overlap == 0 or y_overlap == 0:
        return None
        
    new_xmin = max(xmin - tile_x, 0)
    new_ymin = max(ymin - tile_y, 0)
    new_xmax = min(xmax - tile_x, TILE_SIZE)
    new_ymax = min(ymax - tile_y, TILE_SIZE)
    
    xc = (new_xmin + new_xmax) / 2 / TILE_SIZE
    yc = (new_ymin + new_ymax) / 2 / TILE_SIZE
    w = (new_xmax - new_xmin) / TILE_SIZE
    h = (new_ymax - new_ymin) / TILE_SIZE
    
    if w <= 0 or h <= 0:
        return None
        
    return xc, yc, w, h

def extract_tiles(img_path, xml_path, split):
    """Extract tiles from a single image and save them to the specific split folder"""
    tiles_created = 0
    filename = os.path.basename(img_path).replace('.jpg', '')
    
    # Parse XML outside of the image loading to reduce memory overlap
    tree = ET.parse(xml_path)
    root = tree.getroot()
    objects = []
    
    for obj in root.findall('object'):
        cls = obj.find('name').text.strip()
        if cls not in class_map:
            continue
            
        class_id = class_map[cls]
        bndbox = obj.find('bndbox')
        xmin = int(float(bndbox.find('xmin').text))
        ymin = int(float(bndbox.find('ymin').text))
        xmax = int(float(bndbox.find('xmax').text))
        ymax = int(float(bndbox.find('ymax').text))
        
        objects.append((class_id, xmin, ymin, xmax, ymax))
    
    # If no valid objects, don't even load the image
    if not objects:
        return 0
    
    # Now load the image since we have valid objects
    with Image.open(img_path) as img:
        img_w, img_h = img.size
        stride = TILE_SIZE - OVERLAP

        for y in range(0, img_h, stride):
            if y + TILE_SIZE > img_h:
                y = max(0, img_h - TILE_SIZE)
                
            for x in range(0, img_w, stride):
                if x + TILE_SIZE > img_w:
                    x = max(0, img_w - TILE_SIZE)
                
                # Process tile only if it contains objects
                labels = []
                for class_id, xmin, ymin, xmax, ymax in objects:
                    bbox = convert_bbox_to_tile(xmin, ymin, xmax, ymax, x, y, img_w, img_h)
                    if bbox:
                        labels.append(f"{class_id} {' '.join(f'{v:.6f}' for v in bbox)}")
                
                if not labels:
                    continue
                
                # Crop this specific tile
                tile = img.crop((x, y, x + TILE_SIZE, y + TILE_SIZE))
                tile_filename = f'{filename}_tile_{y}_{x}.jpg'
                label_filename = tile_filename.replace('.jpg', '.txt')
                
                # Save image
                tile.save(f"{OUT_DIR}/images/{split}/{tile_filename}")
                
                # Save labels
                with open(f"{OUT_DIR}/labels/{split}/{label_filename}", 'w') as f:
                    f.write('\n'.join(labels))
                
                # Update mapping
                tile_map[tile_filename] = {
                    'original_image': filename + '.jpg',
                    'tile_coord': [x, y, x + TILE_SIZE, y + TILE_SIZE],
                    'split': split
                }
                
                tiles_created += 1
                
                # Delete intermediate tile to free memory
                del tile
    
    return tiles_created

def process_images_by_split(image_files, batch_size=5):
    """First split images into train/val, then process each set separately"""
    random.seed(42)  # For reproducible splits
    
    # Shuffle and split image files first
    random.shuffle(image_files)
    split_idx = int(TRAIN_SPLIT * len(image_files))
    train_images = image_files[:split_idx]
    val_images = image_files[split_idx:]
    
    print(f"Split {len(image_files)} images into {len(train_images)} training and {len(val_images)} validation images")
    
    # Process training images
    train_tiles = 0
    for i in range(0, len(train_images), batch_size):
        batch = train_images[i:i+batch_size]
        batch_tiles = 0
        
        for img_path in batch:
            xml_path = os.path.join(SRC_ANN_DIR, os.path.basename(img_path).replace('.jpg', '.xml'))
            if os.path.exists(xml_path):
                batch_tiles += extract_tiles(img_path, xml_path, 'train')
        
        train_tiles += batch_tiles
        print(f"Processed training batch {i//batch_size + 1}/{len(train_images)//batch_size + 1}: {batch_tiles} tiles created")
        gc.collect()  # Force garbage collection
    
    # Process validation images
    val_tiles = 0
    for i in range(0, len(val_images), batch_size):
        batch = val_images[i:i+batch_size]
        batch_tiles = 0
        
        for img_path in batch:
            xml_path = os.path.join(SRC_ANN_DIR, os.path.basename(img_path).replace('.jpg', '.xml'))
            if os.path.exists(xml_path):
                batch_tiles += extract_tiles(img_path, xml_path, 'val')
        
        val_tiles += batch_tiles
        print(f"Processed validation batch {i//batch_size + 1}/{len(val_images)//batch_size + 1}: {batch_tiles} tiles created")
        gc.collect()  # Force garbage collection
    
    return train_tiles, val_tiles

# Main execution
image_files = sorted(glob.glob(f'{SRC_IMG_DIR}/*.jpg'))
train_count, val_count = process_images_by_split(image_files, batch_size=5)

# Save metadata
with open(f'{OUT_DIR}/tile_mapping.json', 'w') as f:
    json.dump(tile_map, f, indent=2)

with open(f'{OUT_DIR}/insects.yaml', 'w') as f:
    f.write(f"""train: {OUT_DIR}/images/train
val: {OUT_DIR}/images/val

nc: {len(CLASSES)}
names: {CLASSES}
""")

# Final garbage collection
gc.collect()

print(f"✅ Done: {train_count} training and {val_count} validation tiles created at {OUT_DIR}")

In [None]:
!pip install ultralytics
from ultralytics import YOLO
from PIL import Image, ImageDraw

In [None]:
model = YOLO("yolo11s.pt")

# Cài đặt tăng cường dữ liệu (Data Augmentation)
model.train(
    data="/kaggle/working/insects_tiles/insects.yaml", 
    epochs=30, 
    imgsz=1280,
    batch=0.8,
    
    # Tham số tăng cường dữ liệu cơ bản
    augment=True,              # Bật tăng cường dữ liệu
    mosaic=1.0,                # Tạo mosaic từ 4 ảnh (0.0-1.0)
    translate=0.1,             # Dịch chuyển ảnh ±20%
    scale=0.5,                 # Thay đổi tỷ lệ ảnh +50%
    fliplr=0.5,                # Lật ảnh theo chiều ngang (xác suất 0.5)
    hsv_h=0.015,               # Thay đổi màu sắc (hue) ±1.5%
    hsv_s=0.8,                 # Thay đổi độ bão hòa (saturation) ±80%
    hsv_v=0.4,                 # Thay đổi độ sáng (value) ±40%
    
    # Tham số huấn luyện khác
    cos_lr=True,               # Sử dụng cosine learning rate scheduler
    lr0=1e-3,                 # Learning rate ban đầu cho AdamW (giảm so với SGD)
    lrf=0.01,                  # Learning rate cuối (0.001 * lr0)
    optimizer='AdamW',         # Chuyển sang AdamW optimizer
    warmup_epochs=3.0,         # Số epochs khởi động tăng dần learning rate
    warmup_momentum=0.8,       # Momentum ban đầu cho warmup

    # Tham số đặc biệt cho dữ liệu không cân bằng
    cls=0.5,
    box=7.5,
    overlap_mask=True,  # Better for overlapping insects
    single_cls=False,  # multiple insect types
    
    # Tham số theo dõi và lưu mô hình
    save_period=-1,             # Lưu checkpoint
    project='/kaggle/working/runs',  # Đường dẫn lưu kết quả
    name='insect_detector', 
    exist_ok=True,             # Ghi đè thư mục nếu tồn tại
    pretrained=True,           # Sử dụng trọng số pretrained
    verbose=True,              # Hiển thị thông tin chi tiết
)

## Evaluate

In [None]:
def evaluate_model(model_path, yaml_path): 
    model = YOLO(model_path) 
    metrics = model.val(data=yaml_path) 
    print("✅ Evaluation complete") 
    print(metrics)

In [None]:
evaluate_model( model_path='/kaggle/working/runs/insect_detector/weights/best.pt', 
               yaml_path='/kaggle/working/insects_tiles/insects.yaml' )

## Visualize

In [None]:
def predict_on_original_by_tile(image_path, model_path, tile_size=TILE_SIZE, overlap=OVERLAP, conf=0.25, iou_threshold=0.7):
    model = YOLO(model_path)
    image = Image.open(image_path).convert("RGB")
    img_w, img_h = image.size
    draw = ImageDraw.Draw(image)

    stride = tile_size - overlap

    all_boxes = []
    all_scores = []
    all_classes = []

    for y in range(0, img_h, stride):
        for x in range(0, img_w, stride):
            if x + tile_size > img_w:
                x = img_w - tile_size
            if y + tile_size > img_h:
                y = img_h - tile_size

            tile = image.crop((x, y, x + tile_size, y + tile_size))
            result = model(tile, conf=conf)[0]

            boxes = result.boxes.xyxy.cpu().numpy()
            scores = result.boxes.conf.cpu().numpy()
            classes = result.boxes.cls.cpu().numpy()

            for box, score, cls in zip(boxes, scores, classes):
                bx1, by1, bx2, by2 = box[:4]
                # dịch tọa độ bbox về vị trí trên ảnh gốc
                all_boxes.append([bx1 + x, by1 + y, bx2 + x, by2 + y])
                all_scores.append(score)
                all_classes.append(int(cls))
            tile.close()

    # Convert to tensors
    boxes_tensor = torch.tensor(all_boxes, dtype=torch.float32)
    scores_tensor = torch.tensor(all_scores)
    classes_tensor = torch.tensor(all_classes)

    # Thực hiện NMS theo từng class
    keep_indices = []
    for class_id in torch.unique(classes_tensor):
        idxs = (classes_tensor == class_id).nonzero(as_tuple=True)[0]
        kept = nms(boxes_tensor[idxs], scores_tensor[idxs], iou_threshold)
        keep_indices.extend(idxs[kept].tolist())

    for i in keep_indices:
        box = all_boxes[i]
        class_id = all_classes[i]
        label = CLASSES[class_id]
        draw.rectangle(box, outline="red", width=2)
        draw.text((box[0] + 2, box[1] + 2), label, fill="white")

    gc.collect()
    plt.figure(figsize=(12, 8))
    plt.imshow(image)
    plt.axis('off')
    plt.title(f"Prediction on: {os.path.basename(image_path)} (tile mode with NMS)")
    plt.show()
    image.close()
    return

In [None]:
show_img("/kaggle/input/yellow-sticky-traps-vip/yellow-sticky-traps-dataset-main/images/1000.jpg", 
          "/kaggle/input/yellow-sticky-traps-vip/yellow-sticky-traps-dataset-main/annotations/1000.xml")

In [None]:
predict_on_original_by_tile(
    image_path='/kaggle/input/yellow-sticky-traps-vip/yellow-sticky-traps-dataset-main/images/1000.jpg',
    model_path='/kaggle/working/runs/insect_detector/weights/best.pt',
    tile_size=1280,
    overlap=100,
    conf=0.25
)