In [1]:
!pip install ultralytics


Collecting ultralytics
  Downloading ultralytics-8.3.157-py3-none-any.whl.metadata (37 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch>=1.8.0->ultralytics)
  Downloading nvidia_cusolver_cu12-11.6

In [2]:
import os
import xml.etree.ElementTree as ET
from PIL import Image

# Paths
image_dir = '/kaggle/input/dog-and-cat-detection/images'
annotation_dir = '/kaggle/input/dog-and-cat-detection/annotations'
output_label_dir = '/kaggle/working/labels'

os.makedirs(output_label_dir, exist_ok=True)

# Class name to ID mapping
class_map = {'cat': 0, 'dog': 1}  # Modify as needed

# Loop over annotation files
for xml_file in os.listdir(annotation_dir):
    if not xml_file.endswith('.xml'):
        continue

    tree = ET.parse(os.path.join(annotation_dir, xml_file))
    root = tree.getroot()

    image_filename = root.find('filename').text
    image_path = os.path.join(image_dir, image_filename)

    img = Image.open(image_path)
    img_w, img_h = img.size

    label_filename = os.path.splitext(image_filename)[0] + '.txt'
    label_path = os.path.join(output_label_dir, label_filename)

    with open(label_path, 'w') as f:
        for obj in root.findall('object'):
            class_name = obj.find('name').text.lower()
            class_id = class_map.get(class_name)
            if class_id is None:
                continue

            bbox = obj.find('bndbox')
            xmin = float(bbox.find('xmin').text)
            ymin = float(bbox.find('ymin').text)
            xmax = float(bbox.find('xmax').text)
            ymax = float(bbox.find('ymax').text)

            x_center = ((xmin + xmax) / 2) / img_w
            y_center = ((ymin + ymax) / 2) / img_h
            width = (xmax - xmin) / img_w
            height = (ymax - ymin) / img_h

            f.write(f"{class_id} {x_center} {y_center} {width} {height}\n")


In [4]:
import shutil
from sklearn.model_selection import train_test_split

# Get all images
image_files = [f for f in os.listdir(image_dir) if f.endswith('.png')]

# Train/val split
train_files, val_files = train_test_split(image_files, test_size=0.2, random_state=42)

# Create folders
for split in ['train', 'val']:
    os.makedirs(f'/kaggle/working/images/{split}', exist_ok=True)
    os.makedirs(f'/kaggle/working/labels/{split}', exist_ok=True)

# Copy files
for split, files in [('train', train_files), ('val', val_files)]:
    for img_file in files:
        base = os.path.splitext(img_file)[0]
        shutil.copy(os.path.join(image_dir, img_file), f'/kaggle/working/images/{split}/{img_file}')
        shutil.copy(os.path.join(output_label_dir, base + '.txt'), f'/kaggle/working/labels/{split}/{base}.txt')


In [5]:
import yaml

data = {
    'train': '/kaggle/working/images/train',
    'val': '/kaggle/working/images/val',
    'nc': 2,
    'names': ['cat', 'dog']
}

with open('data.yaml', 'w') as f:
    yaml.dump(data, f)


In [6]:
from ultralytics import YOLO

model = YOLO('yolov8n.pt')
model.train(data='data.yaml', epochs=20, imgsz=640)


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 79.7MB/s]


Ultralytics 8.3.157 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=16, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=20, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=yolov8n.pt, momentum=0.937, mosaic=1.0, multi_scale=False, name=train, nbs=64, nms=False, opset=None, optimize=False, optimizer=auto, overlap_mask=True, patience=100, perspective=0.0, plots=True, pose=12.0, pretrained=True,

100%|██████████| 755k/755k [00:00<00:00, 16.7MB/s]


Overriding model.yaml nc=80 with nc=2

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics

100%|██████████| 5.35M/5.35M [00:00<00:00, 63.8MB/s]


[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 2574.5±814.3 MB/s, size: 228.6 KB)


[34m[1mtrain: [0mScanning /kaggle/working/labels/train... 2948 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2948/2948 [00:03<00:00, 767.16it/s]


[34m[1mtrain: [0mNew cache created: /kaggle/working/labels/train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access ✅ (ping: 0.0±0.0 ms, read: 523.0±41.7 MB/s, size: 254.7 KB)


[34m[1mval: [0mScanning /kaggle/working/labels/val... 738 images, 0 backgrounds, 0 corrupt: 100%|██████████| 738/738 [00:01<00:00, 718.76it/s]


[34m[1mval: [0mNew cache created: /kaggle/working/labels/val.cache
Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001667, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20      2.12G     0.9942      1.882      1.287         11        640: 100%|██████████| 185/185 [00:36<00:00,  5.13it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:06<00:00,  3.83it/s]


                   all        738        738      0.805      0.744      0.861      0.619

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20       2.8G      1.039      1.338      1.289          6        640: 100%|██████████| 185/185 [00:32<00:00,  5.73it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.39it/s]


                   all        738        738      0.743      0.733      0.782      0.521

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/20      2.82G      1.087      1.176      1.314         12        640: 100%|██████████| 185/185 [00:32<00:00,  5.72it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.00it/s]

                   all        738        738      0.659      0.664      0.752      0.532






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/20      2.83G      1.053      1.046      1.283          7        640: 100%|██████████| 185/185 [00:33<00:00,  5.51it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.31it/s]

                   all        738        738       0.79      0.767      0.874      0.633






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/20      2.85G      1.038     0.9685      1.281          8        640: 100%|██████████| 185/185 [00:33<00:00,  5.47it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.25it/s]

                   all        738        738      0.871      0.898      0.961       0.74






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/20      2.87G     0.9691     0.8801      1.233          5        640: 100%|██████████| 185/185 [00:34<00:00,  5.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.53it/s]

                   all        738        738      0.925      0.897      0.949      0.742






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/20      2.89G     0.9137     0.8144      1.206          7        640: 100%|██████████| 185/185 [00:34<00:00,  5.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.43it/s]

                   all        738        738       0.94      0.898      0.972      0.774






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/20       2.9G     0.9021     0.7677      1.198         12        640: 100%|██████████| 185/185 [00:33<00:00,  5.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.05it/s]

                   all        738        738      0.956      0.939      0.975      0.791






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/20      2.92G     0.8636     0.7106      1.166         11        640: 100%|██████████| 185/185 [00:33<00:00,  5.48it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.64it/s]

                   all        738        738      0.942      0.879      0.958      0.795






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/20      2.94G     0.8213     0.7013      1.149          9        640: 100%|██████████| 185/185 [00:34<00:00,  5.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.30it/s]

                   all        738        738      0.901      0.908      0.966      0.798





Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      11/20      2.96G     0.7108     0.5177      1.088          4        640: 100%|██████████| 185/185 [00:33<00:00,  5.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.29it/s]

                   all        738        738      0.953      0.939      0.982      0.812






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      12/20      2.96G     0.6846     0.4684      1.071          4        640: 100%|██████████| 185/185 [00:32<00:00,  5.69it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.38it/s]

                   all        738        738      0.962      0.942      0.982      0.833






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      13/20      2.99G     0.6727     0.4449      1.067          4        640: 100%|██████████| 185/185 [00:32<00:00,  5.74it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.60it/s]

                   all        738        738      0.971      0.961      0.987      0.843






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      14/20      3.01G     0.6378     0.4181       1.04          4        640: 100%|██████████| 185/185 [00:32<00:00,  5.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  4.94it/s]

                   all        738        738      0.972      0.967      0.988      0.861






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      15/20      3.02G     0.6193     0.4037      1.027          4        640: 100%|██████████| 185/185 [00:32<00:00,  5.62it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.63it/s]

                   all        738        738      0.974      0.961      0.992      0.868






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      16/20      3.03G     0.5756     0.3683      1.003          4        640: 100%|██████████| 185/185 [00:31<00:00,  5.88it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.64it/s]

                   all        738        738      0.988       0.97      0.993      0.873






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      17/20      3.06G     0.5561     0.3451     0.9884          4        640: 100%|██████████| 185/185 [00:31<00:00,  5.90it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.58it/s]

                   all        738        738      0.979      0.968      0.991      0.878






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      18/20      3.08G     0.5248     0.3279     0.9667          4        640: 100%|██████████| 185/185 [00:30<00:00,  5.98it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.81it/s]

                   all        738        738      0.972      0.987      0.994       0.89






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      19/20      3.09G     0.5047     0.3067     0.9576          4        640: 100%|██████████| 185/185 [00:31<00:00,  5.92it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.69it/s]

                   all        738        738      0.979      0.989      0.994      0.892






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      20/20       3.1G     0.4798     0.2849     0.9414          4        640: 100%|██████████| 185/185 [00:31<00:00,  5.88it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  5.80it/s]

                   all        738        738      0.973      0.988      0.994        0.9






20 epochs completed in 0.210 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train/weights/best.pt, 6.2MB

Validating runs/detect/train/weights/best.pt...
Ultralytics 8.3.157 🚀 Python-3.11.11 torch-2.6.0+cu124 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 72 layers, 3,006,038 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:04<00:00,  4.92it/s]
  xa[xa < 0] = -1
  xa[xa < 0] = -1


                   all        738        738      0.974      0.989      0.994        0.9
                   cat        231        231       0.98      0.987      0.995      0.937
                   dog        507        507      0.967       0.99      0.993      0.864
Speed: 0.1ms preprocess, 1.6ms inference, 0.0ms loss, 1.7ms postprocess per image
Results saved to [1mruns/detect/train[0m


ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0, 1])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x7f956ebc3950>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,    0.026026,    0.027027,    0.028028,    0.029029,     0.03003,    0.031031,    0.032032,    0.033033,    0.034034,    0.035035,    0.036036,    0.037037,    0.038038,    0.039039,     0.04004,    0.041041,    0.042042,    0.043043,    0.044044,    0.045045,    0.046046,    0.047047,
          0.04804

In [7]:
from ultralytics import YOLO

model = YOLO('/kaggle/working/runs/detect/train/weights/best.pt')  # adjust path if needed
results = model.predict(source='/kaggle/working/images/val', save=True, imgsz=640)



image 1/738 /kaggle/working/images/val/Cats_Test1009.png: 448x640 1 dog, 36.3ms
image 2/738 /kaggle/working/images/val/Cats_Test1022.png: 480x640 1 dog, 36.3ms
image 3/738 /kaggle/working/images/val/Cats_Test1023.png: 480x640 1 cat, 6.1ms
image 4/738 /kaggle/working/images/val/Cats_Test1025.png: 448x640 1 dog, 6.7ms
image 5/738 /kaggle/working/images/val/Cats_Test1038.png: 640x480 1 cat, 34.6ms
image 6/738 /kaggle/working/images/val/Cats_Test1040.png: 416x640 1 cat, 35.4ms
image 7/738 /kaggle/working/images/val/Cats_Test1046.png: 640x448 1 dog, 34.6ms
image 8/738 /kaggle/working/images/val/Cats_Test1070.png: 448x640 1 cat, 6.9ms
image 9/738 /kaggle/working/images/val/Cats_Test1087.png: 448x640 1 dog, 6.1ms
image 10/738 /kaggle/working/images/val/Cats_Test1089.png: 480x640 1 dog, 6.9ms
image 11/738 /kaggle/working/images/val/Cats_Test109.png: 640x480 1 cat, 1 dog, 6.7ms
image 12/738 /kaggle/working/images/val/Cats_Test1098.png: 640x448 1 cat, 6.6ms
image 13/738 /kaggle/working/images/v

In [8]:
model.export(format='onnx')  # or 'torchscript', 'coreml', etc.


Ultralytics 8.3.157 🚀 Python-3.11.11 torch-2.6.0+cu124 CPU (Intel Xeon 2.00GHz)
💡 ProTip: Export to OpenVINO format for best performance on Intel CPUs. Learn more at https://docs.ultralytics.com/integrations/openvino/

[34m[1mPyTorch:[0m starting from '/kaggle/working/runs/detect/train/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 6, 8400) (5.9 MB)
[31m[1mrequirements:[0m Ultralytics requirements ['onnxslim>=0.1.56', 'onnxruntime-gpu'] not found, attempting AutoUpdate...

[31m[1mrequirements:[0m AutoUpdate success ✅ 4.2s


[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.57...
[34m[1mONNX:[0m export success ✅ 6.2s, saved as '/kaggle/working/runs/detect/train/weights/best.onnx' (11.7 MB)

Export complete (6.5s)
Results saved to [1m/kaggle/working/runs/detect/train/weights[0m
Predict:         yolo predict task=detect model=/kaggle/working/runs/detect/train/weights/best.onnx imgsz=

'/kaggle/working/runs/detect/train/weights/best.onnx'