In [1]:
!pip install fiftyone
!pip install fiftyone-db-ubuntu2204
!pip install ultralytics

Collecting fiftyone
  Downloading fiftyone-1.3.0-py3-none-any.whl.metadata (24 kB)
Collecting argcomplete (from fiftyone)
  Downloading argcomplete-3.5.3-py3-none-any.whl.metadata (16 kB)
Collecting dacite<1.8.0,>=1.6.0 (from fiftyone)
  Downloading dacite-1.7.0-py3-none-any.whl.metadata (14 kB)
Collecting ftfy (from fiftyone)
  Downloading ftfy-6.3.1-py3-none-any.whl.metadata (7.3 kB)
Collecting hypercorn>=0.13.2 (from fiftyone)
  Downloading hypercorn-0.17.3-py3-none-any.whl.metadata (5.4 kB)
Collecting kaleido!=0.2.1.post1 (from fiftyone)
  Downloading kaleido-0.2.1-py2.py3-none-manylinux1_x86_64.whl.metadata (15 kB)
Collecting mongoengine~=0.29.1 (from fiftyone)
  Downloading mongoengine-0.29.1-py3-none-any.whl.metadata (6.7 kB)
Collecting motor~=3.6.0 (from fiftyone)
  Downloading motor-3.6.1-py3-none-any.whl.metadata (21 kB)
Collecting pprintpp (from fiftyone)
  Downloading pprintpp-0.4.0-py2.py3-none-any.whl.metadata (7.9 kB)
Collecting pymongo~=4.9.2 (from fif

In [2]:
import fiftyone as fo
import fiftyone.zoo as foz
import json
from collections import defaultdict
import matplotlib.pyplot as plt
from PIL import Image
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import functional as F
import torch
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torch.utils.data import DataLoader
from tqdm import tqdm
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np
from sklearn.metrics import confusion_matrix, accuracy_score
import seaborn as sns
from torchvision.transforms import functional as F

In [3]:
custom_dataset_dir = "/kaggle/working/dataset"
fo.config.dataset_zoo_dir = custom_dataset_dir

try:
    train_dataset = foz.load_zoo_dataset(
        "coco-2017",
        split="train",
        label_types=["detections", "segmentations"], 
        classes=["person"],  
        max_samples=3000, 
        dataset_name="coco-2017-train-persons", 
    )
    print("Train dataset loaded successfully!")
except Exception as e:
    print(f"Error loading train dataset: {e}")

try:
    val_dataset = foz.load_zoo_dataset(
        "coco-2017",
        split="validation",
        label_types=["detections", "segmentations"],
        classes=["person"],
        max_samples=750,
        dataset_name="coco-2017-validation-persons",
    )
    print("Validation dataset loaded successfully!")
except Exception as e:
    print(f"Error loading validation dataset: {e}")

Downloading split 'train' to '/kaggle/working/dataset/coco-2017/train' if necessary
Downloading annotations to '/kaggle/working/dataset/coco-2017/tmp-download/annotations_trainval2017.zip'
 100% |██████|    1.9Gb/1.9Gb [6.0s elapsed, 0s remaining, 346.7Mb/s]       
Extracting annotations to '/kaggle/working/dataset/coco-2017/raw/instances_train2017.json'
Downloading 3000 images
 100% |████████████████| 3000/3000 [5.4m elapsed, 0s remaining, 9.2 images/s]       
Writing annotations for 3000 downloaded samples to '/kaggle/working/dataset/coco-2017/train/labels.json'
Dataset info written to '/kaggle/working/dataset/coco-2017/info.json'
Loading 'coco-2017' split 'train'
 100% |███████████████| 3000/3000 [1.0m elapsed, 0s remaining, 50.9 samples/s]      
Dataset 'coco-2017-train-persons' created
Train dataset loaded successfully!
Downloading split 'validation' to '/kaggle/working/dataset/coco-2017/validation' if necessary
Found annotations at '/kaggle/working/dataset/coco-2017/raw/instances

In [4]:
if len(train_dataset) == 0:
    print("Dataset is empty. Please check the dataset or filters.")
else:
    print(f"Dataset contains {len(train_dataset)} samples.")

if len(val_dataset) == 0:
    print("Dataset is empty. Please check the dataset or filters.")
else:
    print(f"Dataset contains {len(val_dataset)} samples.")

Dataset contains 3000 samples.
Dataset contains 750 samples.


In [5]:
class COCOParser:
    def __init__(self, anns_file, imgs_dir):
        self.imgs_dir = imgs_dir
        self.available_images = set(os.listdir(imgs_dir))  
        
        with open(anns_file, 'r') as f:
            coco = json.load(f)
        self.filename_to_id = {img['file_name']: img['id'] for img in coco['images']}
        self.id_to_filename = {v: k for k, v in self.filename_to_id.items()}  
        self.img_info = {img['id']: img for img in coco['images']}  
        self.annIm_dict = defaultdict(list)
        for ann in coco['annotations']:
            if ann['category_id'] == 1: 
                self.annIm_dict[ann['image_id']].append(ann)
        self.cat_dict = {cat['id']: cat for cat in coco['categories']}
    
    def get_available_image_ids(self):
        available_ids = []
        for filename in self.available_images:
            if filename in self.filename_to_id:
                available_ids.append(self.filename_to_id[filename])
        return available_ids
    
    def load_img(self, img_id):
        for filename, current_id in self.filename_to_id.items():
            if current_id == img_id:
                img_path = os.path.join(self.imgs_dir, filename)
                if os.path.exists(img_path):
                    return Image.open(img_path).convert("RGB")
        return None

class COCODataset(Dataset):
    def __init__(self, coco_parser, transforms=None):
        self.coco_parser = coco_parser
        self.transforms = transforms
        self.image_ids = coco_parser.get_available_image_ids()
    
    def __len__(self):
        return len(self.image_ids)
    
    def __getitem__(self, idx):
        img_id = self.image_ids[idx]
        img = self.coco_parser.load_img(img_id)
        annotations = self.coco_parser.annIm_dict[img_id]

        boxes = []
        labels = []
        
        for ann in annotations:

            bbox = ann['bbox']
            x, y, w, h = bbox
            boxes.append([x, y, x + w, y + h])  

            labels.append(ann['category_id'])

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
        }

        if self.transforms:
            img, target = self.transforms(img, target)
        
        return img, target

In [6]:
class Compose:
    def __init__(self, transforms):
        self.transforms = transforms

    def __call__(self, image, target):
        for t in self.transforms:
            image, target = t(image, target)
        return image, target

class ToTensor:
    def __call__(self, image, target):
        image = F.to_tensor(image)
        return image, target

class Normalize:
    def __init__(self, mean, std):
        self.mean = mean
        self.std = std

    def __call__(self, image, target):
        image = F.normalize(image, mean=self.mean, std=self.std)
        return image, target

class Resize:
    def __init__(self, size):
        self.size = size 

    def __call__(self, image, target):

        if not isinstance(image, Image.Image):
            raise TypeError("Resize harus diterapkan sebelum ToTensor!")
        
        original_size = image.size 
        image = F.resize(image, self.size)

        ratio_width = self.size[0] / original_size[0]  
        ratio_height = self.size[1] / original_size[1]  

        target["boxes"][:, [0, 2]] *= ratio_width
        target["boxes"][:, [1, 3]] *= ratio_height
        
        return image, target

def get_transform(train):
    transforms = []
    transforms.append(Resize((224, 224)))  
    transforms.append(ToTensor())         
    transforms.append(Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]))
    return Compose(transforms)

In [7]:
coco_annotations_file_train = "/kaggle/working/dataset/coco-2017/raw/instances_train2017.json"
coco_images_dir_train = "/kaggle/working/dataset/coco-2017/train/data"
coco_annotations_file_val = "/kaggle/working/dataset/coco-2017/raw/instances_val2017.json"
coco_images_dir_val = "/kaggle/working/dataset/coco-2017/validation/data"

coco_parser_train = COCOParser(coco_annotations_file_train, coco_images_dir_train)
coco_parser_val = COCOParser(coco_annotations_file_val, coco_images_dir_val)

In [8]:
import os
import shutil
from pathlib import Path

def convert_coco_to_yolo(coco_parser, output_dir, subset):
    images_dir = Path(output_dir) / "images" / subset
    labels_dir = Path(output_dir) / "labels" / subset
    images_dir.mkdir(parents=True, exist_ok=True)
    labels_dir.mkdir(parents=True, exist_ok=True)
    
    image_ids = coco_parser.get_available_image_ids()
    for img_id in image_ids:
        # Salin gambar ke direktori YOLO
        filename = coco_parser.id_to_filename[img_id]
        src_img_path = os.path.join(coco_parser.imgs_dir, filename)
        dst_img_path = images_dir / filename
        shutil.copy(src_img_path, dst_img_path)
        
        # Konversi anotasi
        annotations = coco_parser.annIm_dict.get(img_id, [])
        label_file = labels_dir / (filename.split('.')[0] + ".txt")
        with open(label_file, 'w') as f:
            for ann in annotations:
                img_info = coco_parser.img_info[img_id]
                img_w, img_h = img_info['width'], img_info['height']
                x, y, w, h = ann['bbox']
                
                # Konversi ke format YOLO
                x_center = (x + w / 2) / img_w
                y_center = (y + h / 2) / img_h
                width = w / img_w
                height = h / img_h
                
                # Kelas 0 karena hanya kelas 'person' yang digunakan
                f.write(f"0 {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}\n")

In [9]:
# Direktori output untuk dataset YOLO
output_dir = Path("/kaggle/working/yolo_dataset")

# Konversi dataset training dan validasi
convert_coco_to_yolo(coco_parser_train, output_dir, "train")
convert_coco_to_yolo(coco_parser_val, output_dir, "val")

# Buat file data.yaml
data_yaml_content = f"""
train: {str(output_dir / 'images/train')}
val: {str(output_dir / 'images/val')}
nc: 1
names: ['person']
"""

with open(output_dir / "data.yaml", "w") as f:
    f.write(data_yaml_content)

In [10]:
from ultralytics import YOLO

model = YOLO('yolov8n.pt')  # Pre-trained model weights untuk YOLOv8

model.train(
    data=str(output_dir / "data.yaml"),
    epochs=10,
    imgsz=640,
    batch=16,
    workers=16,
    device=0,
)

model.val()

model.export(format='onnx') 

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 129MB/s]


Ultralytics 8.3.80 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/kaggle/working/yolo_dataset/data.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=0, workers=16, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labe

100%|██████████| 755k/755k [00:00<00:00, 24.2MB/s]


Overriding model.yaml nc=80 with nc=1

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics

100%|██████████| 5.35M/5.35M [00:00<00:00, 128MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /kaggle/working/yolo_dataset/labels/train... 3000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 3000/3000 [00:02<00:00, 1131.39it/s]

[34m[1mtrain: [0mNew cache created: /kaggle/working/yolo_dataset/labels/train.cache





[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


  check_for_updates()
[34m[1mval: [0mScanning /kaggle/working/yolo_dataset/labels/val... 750 images, 0 backgrounds, 0 corrupt: 100%|██████████| 750/750 [00:00<00:00, 795.35it/s]


[34m[1mval: [0mNew cache created: /kaggle/working/yolo_dataset/labels/val.cache
Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 4 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      2.21G      1.435      1.992      1.362         29        640: 100%|██████████| 188/188 [00:37<00:00,  5.02it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:05<00:00,  4.45it/s]


                   all        750       3227        0.5      0.412      0.415      0.215

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10      2.16G      1.738      1.932      1.602         20        640: 100%|██████████| 188/188 [00:34<00:00,  5.42it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:03<00:00,  6.15it/s]


                   all        750       3227      0.431      0.385      0.344      0.168

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      2.18G      1.806      1.908      1.648         35        640: 100%|██████████| 188/188 [00:34<00:00,  5.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:03<00:00,  6.19it/s]


                   all        750       3227      0.497      0.345      0.349      0.171

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      2.17G      1.744      1.821      1.632         37        640: 100%|██████████| 188/188 [00:34<00:00,  5.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.87it/s]


                   all        750       3227      0.551      0.405      0.429      0.218

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10      2.17G      1.675      1.722      1.565         44        640: 100%|██████████| 188/188 [00:34<00:00,  5.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:03<00:00,  6.26it/s]


                   all        750       3227      0.589      0.467      0.486       0.26

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10      2.17G       1.62      1.624      1.514         32        640: 100%|██████████| 188/188 [00:34<00:00,  5.52it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:03<00:00,  6.30it/s]

                   all        750       3227      0.637       0.48      0.537      0.298






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10      2.17G      1.552      1.526      1.474         57        640: 100%|██████████| 188/188 [00:33<00:00,  5.53it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:03<00:00,  6.27it/s]


                   all        750       3227      0.654      0.487      0.552      0.313

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10      2.17G      1.489      1.452      1.443         36        640: 100%|██████████| 188/188 [00:33<00:00,  5.54it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:03<00:00,  6.08it/s]

                   all        750       3227      0.669      0.503       0.57      0.337






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10      2.17G      1.422      1.365      1.392         37        640: 100%|██████████| 188/188 [00:34<00:00,  5.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:03<00:00,  6.37it/s]

                   all        750       3227      0.691      0.501      0.588      0.356






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10      2.17G      1.389      1.312      1.363         47        640: 100%|██████████| 188/188 [00:34<00:00,  5.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:03<00:00,  6.19it/s]

                   all        750       3227      0.681       0.54      0.611      0.373






10 epochs completed in 0.114 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train/weights/best.pt, 6.2MB

Validating runs/detect/train/weights/best.pt...
Ultralytics 8.3.80 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
Model summary (fused): 72 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:05<00:00,  4.76it/s]


                   all        750       3227      0.682      0.539      0.611      0.373


  xa[xa < 0] = -1
  xa[xa < 0] = -1


Speed: 0.1ms preprocess, 1.5ms inference, 0.0ms loss, 1.4ms postprocess per image
Results saved to [1mruns/detect/train[0m
Ultralytics 8.3.80 🚀 Python-3.10.12 torch-2.5.1+cu121 CUDA:0 (Tesla P100-PCIE-16GB, 16269MiB)
Model summary (fused): 72 layers, 3,005,843 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /kaggle/working/yolo_dataset/labels/val.cache... 750 images, 0 backgrounds, 0 corrupt: 100%|██████████| 750/750 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:05<00:00,  7.95it/s]


                   all        750       3227      0.681       0.54       0.61      0.374


  xa[xa < 0] = -1
  xa[xa < 0] = -1


Speed: 0.4ms preprocess, 2.7ms inference, 0.0ms loss, 1.1ms postprocess per image
Results saved to [1mruns/detect/train2[0m
Ultralytics 8.3.80 🚀 Python-3.10.12 torch-2.5.1+cu121 CPU (Intel Xeon 2.00GHz)

[34m[1mPyTorch:[0m starting from 'runs/detect/train/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 5, 8400) (5.9 MB)
[31m[1mrequirements:[0m Ultralytics requirements ['onnxslim', 'onnxruntime-gpu'] not found, attempting AutoUpdate...
Collecting onnxslim
  Downloading onnxslim-0.1.48-py3-none-any.whl.metadata (4.6 kB)
Collecting onnxruntime-gpu
  Downloading onnxruntime_gpu-1.20.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Collecting coloredlogs (from onnxruntime-gpu)
  Downloading coloredlogs-15.0.1-py2.py3-none-any.whl.metadata (12 kB)
Collecting humanfriendly>=9.1 (from coloredlogs->onnxruntime-gpu)
  Downloading humanfriendly-10.0-py2.py3-none-any.whl.metadata (9.2 kB)
Downloading onnxslim-0.1.48-py3-none-an

'runs/detect/train/weights/best.onnx'