In [3]:
import os
import json
import shutil
import random

def get_output_dirs(image_id, train_ids, train_labels_dir, test_labels_dir, train_images_dir, test_images_dir):
    """根据 image_id 判断属于训练集或测试集，并返回相应的 labels 和 images 目录"""
    if image_id in train_ids:
        return train_labels_dir, train_images_dir
    else:
        return test_labels_dir, test_images_dir

def convert_coco_to_yolo(annotations_file, images_dir, output_dir, split_ratio=0.8):
    with open(annotations_file) as f:
        data = json.load(f)

    # 创建训练和测试集的 labels 和 images 目录
    train_labels_dir = os.path.join(output_dir, 'labels', 'train')
    test_labels_dir = os.path.join(output_dir, 'labels', 'test')
    train_images_dir = os.path.join(output_dir, 'images', 'train')
    test_images_dir = os.path.join(output_dir, 'images', 'test')

    os.makedirs(train_labels_dir, exist_ok=True)
    os.makedirs(test_labels_dir, exist_ok=True)
    os.makedirs(train_images_dir, exist_ok=True)
    os.makedirs(test_images_dir, exist_ok=True)

    # 获取所有图片 id，并按照 split_ratio 随机分为训练和测试集
    image_ids = [img['id'] for img in data['images']]
    random.shuffle(image_ids)
    train_size = int(len(image_ids) * split_ratio)
    train_ids = set(image_ids[:train_size])

    for annotation in data['annotations']:
        image_id = annotation['image_id']
        image_info = next(img for img in data['images'] if img['id'] == image_id)
        width, height = image_info['width'], image_info['height']

        # YOLO 格式的边界框坐标转换
        x_min, y_min, box_width, box_height = annotation['bbox']
        x_center = (x_min + box_width / 2) / width
        y_center = (y_min + box_height / 2) / height
        w = box_width / width
        h = box_height / height
        category_id = annotation['category_id'] - 1  # YOLO 使用 0 开始的类别索引

        # 根据数据集类型选择 labels 和 images 目录
        labels_dir, images_output_dir = get_output_dirs(
            image_id, train_ids, train_labels_dir, test_labels_dir, train_images_dir, test_images_dir
        )

        # 保存标签文件
        img_name = image_info['file_name'].split('.')[0].split('/')[-1]
        label_path = os.path.join(labels_dir, f"{img_name}.txt")
        with open(label_path, 'a') as f:
            f.write(f"{category_id} {x_center} {y_center} {w} {h}\n")

        # 复制图片到相应的文件夹
        src_image_path = os.path.join(images_dir, image_info['file_name'])
        dest_image_path = os.path.join(images_output_dir, f"{img_name}.jpg")
        if not os.path.exists(dest_image_path):  # 避免重复复制
            shutil.copy2(src_image_path, dest_image_path)

# 执行转换并生成文件夹
convert_coco_to_yolo(
    annotations_file='/root/autodl-fs/archive/turtles-data/data/annotations_train.json',
    images_dir='/root/autodl-fs/archive/turtles-data/data',
    output_dir='/root/autodl-fs/archive/turtles-data',
    split_ratio=0.9  
)


In [4]:
!pip install ultralytics



Looking in indexes: http://mirrors.aliyun.com/pypi/simple
Collecting ultralytics
  Downloading http://mirrors.aliyun.com/pypi/packages/13/77/d561ac3a075ba46ef20381b9dcda3af56acbd8af18511dbfcee57413e3f8/ultralytics-8.3.27-py3-none-any.whl (878 kB)
[K     |████████████████████████████████| 878 kB 1.2 MB/s eta 0:00:01
[?25hCollecting ultralytics-thop>=2.0.0
  Downloading http://mirrors.aliyun.com/pypi/packages/59/01/6758cc0854af163f9377e78462ede799b8abe7f0cb6bbb45bf4035088c16/ultralytics_thop-2.0.10-py3-none-any.whl (26 kB)
Collecting py-cpuinfo
  Downloading http://mirrors.aliyun.com/pypi/packages/e0/a9/023730ba63db1e494a271cb018dcd361bd2c917ba7004c3e49d5daf795a2/py_cpuinfo-9.0.0-py3-none-any.whl (22 kB)
Collecting tqdm>=4.64.0
  Downloading http://mirrors.aliyun.com/pypi/packages/41/73/02342de9c2d20922115f787e101527b831c0cffd2105c946c4a4826bcfd4/tqdm-4.66.6-py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 1.2 MB/s eta 0:00:01
[?25hCollecting pandas>=1.1.4
  D

In [12]:
from ultralytics import YOLO

# 加载预训练的 YOLOv8 模型
model = YOLO('yolov8s.pt')  # 加载小型 YOLOv8 模型，其他选择有 'yolov8m.pt'、'yolov8l.pt' 等

# 配置训练
model.train(
    data='/root/autodl-fs/archive/turtles-data/data.yaml',  # 数据配置文件路径
    epochs=10,  # 训练轮数
    imgsz=256,  # 输入图片尺寸
    batch=32,  # 批量大小
    name='turtle_detection_yolov8',  # 训练结果保存文件夹名称
    project='/root/autodl-fs/archive/turtles-data/runs',  # 保存结果的路径
    device=0,  # 指定 GPU 设备（0 表示第一个 GPU，1 表示第二个，依此类推）
    workers=4  # 设置 num_workers，具体数量根据系统资源配置
)


Ultralytics 8.3.27 🚀 Python-3.8.10 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4090, 24210MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8s.pt, data=/root/autodl-fs/archive/turtles-data/data.yaml, epochs=10, time=None, patience=100, batch=32, imgsz=256, save=True, save_period=-1, cache=False, device=0, workers=4, project=/root/autodl-fs/archive/turtles-data/runs, name=turtle_detection_yolov83, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=F

[34m[1mtrain: [0mScanning /autodl-fs/data/archive/turtles-data/labels/train... 4771 images, 0 backgrounds, 0 corrupt: 100%|██████████| 4771/4771 [00:11<00:00, 406.08it/s]


[34m[1mtrain: [0mNew cache created: /autodl-fs/data/archive/turtles-data/labels/train.cache


[34m[1mval: [0mScanning /autodl-fs/data/archive/turtles-data/labels/test... 531 images, 0 backgrounds, 0 corrupt: 100%|██████████| 531/531 [00:01<00:00, 448.87it/s]


[34m[1mval: [0mNew cache created: /autodl-fs/data/archive/turtles-data/labels/test.cache
Plotting labels to /root/autodl-fs/archive/turtles-data/runs/turtle_detection_yolov83/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 256 train, 256 val
Using 4 dataloader workers
Logging results to [1m/root/autodl-fs/archive/turtles-data/runs/turtle_detection_yolov83[0m
Starting training for 10 epochs...
Closing dataloader mosaic

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      1.84G     0.9793      1.026      1.012         15        256: 100%|██████████| 150/150 [00:18<00:00,  8.29it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:02<00:00,  4.24it/s]


                   all        531       2298      0.735      0.785      0.819      0.555

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10      1.79G     0.9061     0.6278     0.9616         12        256: 100%|██████████| 150/150 [00:16<00:00,  9.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:01<00:00,  8.64it/s]


                   all        531       2298      0.932      0.852      0.911      0.702

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      1.82G     0.8756     0.5867     0.9458         12        256: 100%|██████████| 150/150 [00:16<00:00,  9.09it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:01<00:00,  8.64it/s]


                   all        531       2298      0.926      0.854      0.914      0.689

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      1.82G     0.8483     0.5522     0.9416         12        256: 100%|██████████| 150/150 [00:16<00:00,  9.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:01<00:00,  8.44it/s]

                   all        531       2298      0.928      0.878      0.924      0.725






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10      1.82G     0.8022     0.5088     0.9219         14        256: 100%|██████████| 150/150 [00:16<00:00,  9.25it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:01<00:00,  8.77it/s]

                   all        531       2298      0.944      0.884       0.93      0.736






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10      1.82G     0.7443     0.4643     0.9044         13        256: 100%|██████████| 150/150 [00:16<00:00,  9.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:01<00:00,  9.00it/s]

                   all        531       2298      0.947      0.895      0.935      0.761






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10      1.83G     0.7107     0.4325     0.8954         13        256: 100%|██████████| 150/150 [00:15<00:00,  9.40it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:01<00:00,  8.62it/s]

                   all        531       2298       0.96      0.908      0.946       0.78






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10      1.82G     0.6703     0.4045     0.8822         14        256: 100%|██████████| 150/150 [00:16<00:00,  9.16it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:01<00:00,  8.43it/s]

                   all        531       2298      0.961      0.918      0.951      0.792






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10      1.82G     0.6393     0.3773     0.8725         16        256: 100%|██████████| 150/150 [00:16<00:00,  9.15it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:01<00:00,  8.68it/s]

                   all        531       2298      0.972       0.91      0.951      0.803






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10      1.82G     0.6002     0.3491     0.8621         13        256: 100%|██████████| 150/150 [00:16<00:00,  9.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:01<00:00,  8.63it/s]

                   all        531       2298      0.966      0.922      0.954      0.816






10 epochs completed in 0.056 hours.
Optimizer stripped from /root/autodl-fs/archive/turtles-data/runs/turtle_detection_yolov83/weights/last.pt, 22.5MB
Optimizer stripped from /root/autodl-fs/archive/turtles-data/runs/turtle_detection_yolov83/weights/best.pt, 22.5MB

Validating /root/autodl-fs/archive/turtles-data/runs/turtle_detection_yolov83/weights/best.pt...
Ultralytics 8.3.27 🚀 Python-3.8.10 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce RTX 4090, 24210MiB)
Model summary (fused): 168 layers, 11,126,745 parameters, 0 gradients, 28.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 9/9 [00:04<00:00,  2.17it/s]


                   all        531       2298      0.965      0.922      0.954      0.816
                 shell        530        531       0.99      0.992      0.995       0.97
                   fin        524       1251      0.929      0.845      0.907      0.686
                  head        516        516      0.978      0.928      0.961      0.793
Speed: 0.1ms preprocess, 0.8ms inference, 0.0ms loss, 4.8ms postprocess per image
Results saved to [1m/root/autodl-fs/archive/turtles-data/runs/turtle_detection_yolov83[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1, 2])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f3e686ca550>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.04

In [30]:

import json
import numpy as np
from PIL import Image
from segment_anything import sam_model_registry, SamPredictor
from sklearn.metrics import jaccard_score
from pycocotools import mask as coco_mask
from ultralytics import YOLO
from collections import defaultdict
import torch
import matplotlib.pyplot as plt

# Configure paths
annotations_path = '/root/autodl-fs/archive/turtles-data/data/annotations_test.json'
image_folder = '/root/autodl-fs/archive/turtles-data/data/'
sam_checkpoint = "/root/autodl-fs/sam_vit_h_4b8939.pth"
yolo_checkpoint = "/root/autodl-fs/archive/turtles-data/runs/turtle_detection_yolov83/weights/best.pt"

# Load JSON file to get all images and annotations information
with open(annotations_path, 'r') as f:
    data = json.load(f)

# Load YOLOv8 and SAM models on GPU
device = 'cuda' if torch.cuda.is_available() else 'cpu'
yolo_model = YOLO(yolo_checkpoint).to(device)
yolo_model.overrides['verbose'] = False  # 禁用 YOLO 推理详细输出
torch.use_deterministic_algorithms(False)  # 避免非确定性警告
model_type = "vit_h"
sam = sam_model_registry[model_type](checkpoint=sam_checkpoint).to(device)
predictor = SamPredictor(sam)

# Initialize IoU accumulations and counts for each category
iou_scores = defaultdict(float)  # Accumulated IoU per category
iou_counts = defaultdict(int)     # Count of masks per category

# List of category names, matching the names in `data.yaml`
category_names = ['shell', 'fin', 'head']
category_colors = {
    0: [1, 0, 0],     # Red
    1: [0, 1, 0],     # Green
    2: [0, 0, 1],     # Blue
}

import matplotlib.pyplot as plt
import os


import matplotlib.pyplot as plt
import os

def visualize(image_np, pred_colored_mask, gt_colored_mask, image_name, output_dir="visualizations"):
    """
    可视化单张图像的预测遮罩和地面真值遮罩，并将其保存到文件。
    
    参数:
    - image_np: 原始图像的 numpy 数组
    - pred_colored_mask: 预测的彩色遮罩
    - gt_colored_mask: 地面真值彩色遮罩
    - image_name: 图像的文件名（可能包含子目录路径）
    - output_dir: 保存图像的文件夹
    """
    # 构建完整的保存路径，包括子目录
    save_dir = os.path.join(output_dir)
    os.makedirs(save_dir, exist_ok=True)  # 递归创建子目录
    
    # 提取文件名并去掉扩展名
    base_name = os.path.basename(image_name)
    output_path = os.path.join(save_dir, f"{os.path.splitext(base_name)[0]}_visualization.png")
    
    fig, axes = plt.subplots(1, 3, figsize=(18, 6))
    
    # 显示原始图像
    axes[0].imshow(image_np)
    axes[0].set_title(f"Original Image: {base_name}")
    axes[0].axis("off")
    
    # 显示预测的彩色遮罩
    axes[1].imshow(pred_colored_mask)
    axes[1].set_title("Predicted Mask")
    axes[1].axis("off")
    
    # 显示地面真值遮罩
    axes[2].imshow(gt_colored_mask)
    axes[2].set_title("Ground Truth Mask")
    axes[2].axis("off")
    
    # 保存图像
    plt.savefig(output_path)
    plt.close(fig)  # 关闭图像以释放内存
    print(f"Visualization saved to {output_path}")



# Loop through all images in the dataset
for idx, image_info in enumerate(data['images']):
    image_id = image_info['id']
    annotations_info = [ann for ann in data['annotations'] if ann['image_id'] == image_id]
    
    # Load image
    image_path = f"{image_folder}/{image_info['file_name']}"
    image = Image.open(image_path)
    image_np = np.array(image)

    # Get YOLOv8 predicted bounding boxes (ensure GPU processing)
    results = yolo_model.predict(source=image_path, imgsz=256, device=device)
    bboxes = results[0].boxes.xyxy.cpu().numpy()  # YOLOv8 bounding boxes
    class_ids = results[0].boxes.cls.cpu().numpy()  # YOLOv8 class IDs

    # Set SAM model image
    predictor.set_image(image_np)

    # Create blank colored masks
    pred_colored_mask = np.zeros((image_np.shape[0], image_np.shape[1], 3))
    gt_colored_mask = np.zeros((image_np.shape[0], image_np.shape[1], 3))

    # Iterate over each predicted bounding box and calculate IoU for each class
    for i, bbox in enumerate(bboxes):
        x_min, y_min, x_max, y_max = bbox
        box_prompt = np.array([x_min, y_min, x_max, y_max]).reshape(1, -1)
        
        # Predict segmentation mask with SAM model
        masks, scores, _ = predictor.predict(box=box_prompt)
        
        # Get class ID and category ID
        class_id = int(class_ids[i])
        category_id = class_id + 1  # Convert to annotation category_id
        class_name = category_names[class_id]  # Get category name

        # Get all ground truth masks for this category
        ground_truth_masks = []
        for ann in annotations_info:
            if ann['category_id'] == category_id:
                rle_encoded = ann['segmentation']
                gt_mask = coco_mask.decode(coco_mask.frPyObjects(rle_encoded, image_np.shape[0], image_np.shape[1])).astype(bool)
                ground_truth_masks.append(gt_mask)

        # Calculate IoU for each predicted mask against all ground truth masks and select the best one
        best_mask = None
        best_iou = 0
        for mask in masks:
            max_iou = 0
            for gt_mask in ground_truth_masks:
                pred_mask_flat = mask.flatten()
                gt_mask_flat = gt_mask.flatten()
                iou = jaccard_score(gt_mask_flat, pred_mask_flat, average='binary')
                if iou > max_iou:
                    max_iou = iou
            # Record the best IoU mask
            if max_iou > best_iou:
                best_iou = max_iou
                best_mask = mask
        
        # Only accumulate IoU for the best mask and prepare visualization
        if best_mask is not None:
            iou_scores[class_name] += best_iou
            iou_counts[class_name] += 1
            color = category_colors.get(class_id, [1, 1, 1])
            
            # Apply color to best predicted mask
            expanded_best_mask = np.zeros((*best_mask.shape, 3))
            for j in range(3):
                expanded_best_mask[:, :, j] = best_mask * color[j]
            pred_colored_mask = np.where(expanded_best_mask > 0, expanded_best_mask, pred_colored_mask)
            
            # Apply color to ground truth masks
            for gt_mask in ground_truth_masks:
                expanded_gt_mask = np.zeros((*gt_mask.shape, 3))
                for j in range(3):
                    expanded_gt_mask[:, :, j] = gt_mask * color[j]
                gt_colored_mask = np.where(expanded_gt_mask > 0, expanded_gt_mask, gt_colored_mask)

            print(f"IOU for predicted {class_name} in image {image_info['file_name']}: {best_iou:.4f}")
        else:
            print(f"No valid mask found for predicted {class_name} in image {image_info['file_name']}. Skipping.")

    # Visualize every 100 images
    if idx % 10 == 0:
        visualize(image_np, pred_colored_mask, gt_colored_mask, image_info['file_name'])
    if idx == 50:
        break

# Calculate and print the average IoU for each category
for class_name in category_names:
    if iou_counts[class_name] > 0:
        avg_iou = iou_scores[class_name] / iou_counts[class_name]
        print(f"Average IOU for {class_name}: {avg_iou:.4f}")
    else:
        print(f"No predictions for {class_name}")


IOU for predicted shell in image images/t007/BBuJpHMVVi.JPG: 0.9974
IOU for predicted fin in image images/t007/BBuJpHMVVi.JPG: 0.9943
IOU for predicted fin in image images/t007/BBuJpHMVVi.JPG: 0.9975
IOU for predicted head in image images/t007/BBuJpHMVVi.JPG: 0.8694
IOU for predicted fin in image images/t007/BBuJpHMVVi.JPG: 0.9914
Visualization saved to visualizations/BBuJpHMVVi_visualization.png
IOU for predicted shell in image images/t007/CSsLXEILgE.JPG: 0.9976
IOU for predicted head in image images/t007/CSsLXEILgE.JPG: 0.9597
IOU for predicted fin in image images/t007/CSsLXEILgE.JPG: 0.9963
IOU for predicted fin in image images/t007/CSsLXEILgE.JPG: 0.9908
IOU for predicted fin in image images/t007/CSsLXEILgE.JPG: 0.8612
IOU for predicted shell in image images/t007/CjfQKJUyHh.JPG: 0.9973
IOU for predicted fin in image images/t007/CjfQKJUyHh.JPG: 0.9787
IOU for predicted fin in image images/t007/CjfQKJUyHh.JPG: 0.9809
IOU for predicted head in image images/t007/CjfQKJUyHh.JPG: 0.9060
