In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
# Install YOLOv8
!pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.43-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.12-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.43-py3-none-any.whl (898 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m898.4/898.4 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.12-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.43 ultralytics-thop-2.0.12


In [3]:
import torch

# Set device to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


Using device: cuda


In [4]:
import os
import random

# Paths to image and label directories
image_dir = '/kaggle/input/dlp-object-detection/final_dlp_data/final_dlp_data/train/images'
label_dir = '/kaggle/input/dlp-object-detection/final_dlp_data/final_dlp_data/train/labels'

# Get list of image files (make sure they're .jpg or .png)
image_files = [f for f in os.listdir(image_dir) if f.endswith(('.jpeg', '.png'))]


# Shuffle and split dataset into train (90%) and validation (10%)
random.seed(42)
random.shuffle(image_files)
train_size = int(0.9 * len(image_files))
train_dataset = image_files[:train_size]
val_dataset = image_files[train_size:]

# Print dataset sizes
print(f"Total dataset size: {len(image_files)}")
print(f"Training set size: {len(train_dataset)}")
print(f"Validation set size: {len(val_dataset)}")


Total dataset size: 7500
Training set size: 6750
Validation set size: 750


In [5]:
train_images_path = "dataset/train/images"
train_labels_path = "dataset/train/labels"
val_images_path = "dataset/val/images"
val_labels_path = "dataset/val/labels"

os.makedirs(train_images_path,exist_ok=True)
os.makedirs(train_labels_path,exist_ok=True)
os.makedirs(val_images_path,exist_ok=True)
os.makedirs(val_labels_path,exist_ok=True)

In [6]:
import shutil
def copy_files(image_list, src_image_dir, src_label_dir, dest_image_dir, dest_label_dir):
    for image_name in image_list:
        # Copy image
        src_image_path = os.path.join(src_image_dir, image_name)
        dest_image_path = os.path.join(dest_image_dir, image_name)
        shutil.copy2(src_image_path, dest_image_path)

        # Copy corresponding label
        label_name = image_name.replace('.jpeg', '.txt')  # Assuming label files match image names
        src_label_path = os.path.join(src_label_dir, label_name)
        dest_label_path = os.path.join(dest_label_dir, label_name)
        
        shutil.copy2(src_label_path, dest_label_path)

# Copy train files
copy_files(train_dataset, image_dir, label_dir, train_images_path, train_labels_path)

# Copy validation files
copy_files(val_dataset, image_dir, label_dir, val_images_path, val_labels_path)


In [7]:
import yaml

# Define the dataset configuration
data = {
    'train': '/kaggle/working/dataset/train/images',  # Path to training images (relative to 'path')
    'val': '/kaggle/working/dataset/val/images',      # Path to validation images (relative to 'path')
    'nc': 6,                  # Number of classes
    'names': ["aegypti","albopictus","anopheles","culex","culiseta","japonicus/koreicus"]   # Class names
}

# Save to a YAML file
with open('/kaggle/working/dataset.yaml', 'w') as file:
    yaml.dump(data, file, default_flow_style=False)

print("YAML file created at /kaggle/working/dataset.yaml")


YAML file created at /kaggle/working/dataset.yaml


In [8]:
from ultralytics import YOLO

model = YOLO("yolov8n.pt")
results = model.train(data="dataset.yaml",
                     epochs=10,
                     imgsz=864,
                     device=0,
                     batch=16,
                     verbose=True,
                     )

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 103MB/s]


Ultralytics 8.3.43 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=dataset.yaml, epochs=10, time=None, patience=100, batch=16, imgsz=864, save=True, save_period=-1, cache=False, device=0, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, show_boxes=True, lin

100%|██████████| 755k/755k [00:00<00:00, 22.4MB/s]


Overriding model.yaml nc=80 with nc=6

                   from  n    params  module                                       arguments                     
  0                  -1  1       464  ultralytics.nn.modules.conv.Conv             [3, 16, 3, 2]                 
  1                  -1  1      4672  ultralytics.nn.modules.conv.Conv             [16, 32, 3, 2]                
  2                  -1  1      7360  ultralytics.nn.modules.block.C2f             [32, 32, 1, True]             
  3                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  4                  -1  2     49664  ultralytics.nn.modules.block.C2f             [64, 64, 2, True]             
  5                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  6                  -1  2    197632  ultralytics.nn.modules.block.C2f             [128, 128, 2, True]           
  7                  -1  1    295424  ultralytics

100%|██████████| 5.35M/5.35M [00:00<00:00, 90.9MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /kaggle/working/dataset/train/labels... 6750 images, 0 backgrounds, 1 corrupt: 100%|██████████| 6750/6750 [00:05<00:00, 1188.82it/s]






[34m[1mtrain: [0mNew cache created: /kaggle/working/dataset/train/labels.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


  self.pid = os.fork()
[34m[1mval: [0mScanning /kaggle/working/dataset/val/labels... 750 images, 0 backgrounds, 0 corrupt: 100%|██████████| 750/750 [00:00<00:00, 876.15it/s]

[34m[1mval: [0mNew cache created: /kaggle/working/dataset/val/labels.cache





Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 864 train, 864 val
Using 4 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 10 epochs...
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


  self.pid = os.fork()
  self.pid = os.fork()



      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/10      3.92G      1.267      3.735      1.503         13        864: 100%|██████████| 422/422 [03:12<00:00,  2.20it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:16<00:00,  1.46it/s]


                   all        750        750      0.543      0.303      0.277      0.177

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/10      3.75G      1.203      1.885      1.415         13        864: 100%|██████████| 422/422 [03:01<00:00,  2.32it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:14<00:00,  1.68it/s]

                   all        750        750      0.917      0.262      0.313      0.214






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       3/10      3.75G       1.19      1.354       1.39         13        864: 100%|██████████| 422/422 [03:00<00:00,  2.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:14<00:00,  1.67it/s]

                   all        750        750      0.906      0.276      0.311      0.208






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       4/10      3.75G      1.148      1.198      1.357         13        864: 100%|██████████| 422/422 [02:59<00:00,  2.36it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:15<00:00,  1.60it/s]


                   all        750        750      0.578      0.368      0.334      0.242

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       5/10      3.74G      1.096      1.091      1.319         13        864: 100%|██████████| 422/422 [03:03<00:00,  2.30it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:14<00:00,  1.63it/s]

                   all        750        750      0.598      0.377      0.341      0.245






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       6/10      3.75G      1.054      1.012      1.288         13        864: 100%|██████████| 422/422 [03:02<00:00,  2.31it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:15<00:00,  1.52it/s]

                   all        750        750      0.576      0.377      0.334      0.238






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       7/10      3.74G      1.004     0.9385      1.247         13        864: 100%|██████████| 422/422 [02:59<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:14<00:00,  1.68it/s]

                   all        750        750      0.671      0.423       0.38      0.282






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       8/10      3.75G     0.9712     0.8833      1.223         13        864: 100%|██████████| 422/422 [02:59<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:14<00:00,  1.63it/s]

                   all        750        750      0.651       0.43      0.393      0.289






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       9/10      3.74G     0.9391     0.8368        1.2         13        864: 100%|██████████| 422/422 [03:00<00:00,  2.34it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:14<00:00,  1.62it/s]

                   all        750        750      0.645       0.48      0.387      0.291






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      10/10      3.75G     0.9032     0.7904      1.173         13        864: 100%|██████████| 422/422 [02:59<00:00,  2.35it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:14<00:00,  1.65it/s]

                   all        750        750      0.659      0.454      0.422      0.322






10 epochs completed in 0.551 hours.
Optimizer stripped from runs/detect/train/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train/weights/best.pt, 6.2MB

Validating runs/detect/train/weights/best.pt...
Ultralytics 8.3.43 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 168 layers, 3,006,818 parameters, 0 gradients, 8.1 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 24/24 [00:15<00:00,  1.53it/s]


                   all        750        750      0.659      0.454      0.423      0.322
               aegypti          2          2          1          0     0.0829     0.0603
            albopictus        317        317      0.749      0.965      0.936      0.684
             anopheles          6          6          1          0     0.0539     0.0458
                 culex        360        360      0.725       0.95      0.921      0.716
              culiseta         35         35      0.202      0.543      0.285      0.233
    japonicus/koreicus         30         30       0.28      0.267      0.256      0.191
Speed: 0.3ms preprocess, 3.0ms inference, 0.0ms loss, 2.0ms postprocess per image
Results saved to [1mruns/detect/train[0m


In [9]:
val_results = model.val()
print(val_results.box.map)

Ultralytics 8.3.43 🚀 Python-3.10.14 torch-2.4.0 CUDA:0 (Tesla T4, 15095MiB)
Model summary (fused): 168 layers, 3,006,818 parameters, 0 gradients, 8.1 GFLOPs


[34m[1mval: [0mScanning /kaggle/working/dataset/val/labels.cache... 750 images, 0 backgrounds, 0 corrupt: 100%|██████████| 750/750 [00:00<?, ?it/s]
  self.pid = os.fork()
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 47/47 [00:17<00:00,  2.72it/s]


                   all        750        750      0.665      0.459      0.421       0.32
               aegypti          2          2          1          0     0.0737     0.0535
            albopictus        317        317       0.75      0.965      0.936      0.685
             anopheles          6          6          1          0     0.0542     0.0458
                 culex        360        360      0.724      0.947      0.922      0.714
              culiseta         35         35      0.206      0.543      0.275      0.225
    japonicus/koreicus         30         30      0.313        0.3      0.267      0.198
Speed: 0.4ms preprocess, 6.0ms inference, 0.0ms loss, 1.6ms postprocess per image
Results saved to [1mruns/detect/train2[0m
0.3202442457448141


In [10]:
print(val_results.box.map50)

0.4212466998129308


In [11]:
test_results = model.predict("/kaggle/input/dlp-object-detection/final_dlp_data/final_dlp_data/test/images",
                            save_txt=True,
                            save_conf=True,
                            conf=0.30,
                            iou=0.50)


image 1/525 /kaggle/input/dlp-object-detection/final_dlp_data/final_dlp_data/test/images/0031063e-716a-4080-934c-77598dc8de72.jpeg: 864x672 1 culex, 37.6ms
image 2/525 /kaggle/input/dlp-object-detection/final_dlp_data/final_dlp_data/test/images/00fbfad7-9722-4581-831c-79faa576ea7f.jpeg: 864x672 (no detections), 9.6ms
image 3/525 /kaggle/input/dlp-object-detection/final_dlp_data/final_dlp_data/test/images/02043b0e-3d7d-4ca4-a36f-4bf97c344264.jpeg: 864x512 1 culex, 35.3ms
image 4/525 /kaggle/input/dlp-object-detection/final_dlp_data/final_dlp_data/test/images/0365513c-8f00-44f3-abd0-1fadde81c602.jpeg: 864x672 1 culex, 10.1ms
image 5/525 /kaggle/input/dlp-object-detection/final_dlp_data/final_dlp_data/test/images/0365d78b-2064-4d54-b15c-994d1950479a.jpeg: 864x704 1 albopictus, 36.9ms
image 6/525 /kaggle/input/dlp-object-detection/final_dlp_data/final_dlp_data/test/images/03d6749a-c5b3-45a3-81fa-b9c2f09eb4ba.jpeg: 768x864 1 albopictus, 33.9ms
image 7/525 /kaggle/input/dlp-object-detection

In [12]:
predictions_dir = "/kaggle/working/runs/detect/train3/labels"

files_list = os.listdir(predictions_dir)
print(len(files_list))
with open(os.path.join(predictions_dir,files_list[49]), "r") as f:
    for line_id,line in enumerate(f):
        print(line)

495
1 0.49217 0.427143 0.157357 0.129592 0.487092

3 0.493246 0.427179 0.149657 0.125484 0.39065



In [13]:
class_names = ["aegypti","albopictus","anopheles",
               "culex","culiseta","japonicus/koreicus"]   # Class names


In [14]:
import csv

# Define the output CSV file path
output_csv = "/kaggle/working/21F1000641.csv"
test_path="/kaggle/input/dlp-object-detection/final_dlp_data/final_dlp_data/test/images"

# Initialize the CSV file with the header
with open(output_csv, mode="w", newline="") as file:
    writer = csv.writer(file)
    sno=0
    # Write header
    writer.writerow(["id", "ImageID", "LabelName", "Conf", "xcenter", "ycenter", "bbx_width", "bbx_height"])

    # Loop through all prediction files in the labels directory
    for img_file in os.listdir(test_path):
        txt_file = img_file.replace(".jpeg",".txt")
        
        image_id = img_file
        label_test_file_path = os.path.join(predictions_dir,txt_file)
        if txt_file in os.listdir(predictions_dir):
            
            # Read the predictions from the .txt file
            with open(label_test_file_path, "r") as f:
                lines = f.readlines()
                if lines:  # File is empty
                    max_conf=0
                    for line in lines:
                        #YOLO format: class x_center y_center width height confidence
                        line_parts = line.strip().split()
                        c = float(line_parts[5])
                        if c>=max_conf:
                            label_name = int(line_parts[0])  # Class ID
                            xcenter = float(line_parts[1])
                            ycenter = float(line_parts[2])
                            bbx_width = float(line_parts[3])
                            bbx_height = float(line_parts[4])
                            conf = float(line_parts[5])
                            max_conf = conf
        else:
            label_name = 0  # Placeholder class ID for no predictions
            xcenter = 0.5   # Dummy values for bounding box
            ycenter = 0.5
            bbx_width = 0.2
            bbx_height = 0.2
            conf = 0.5

        # Write row to CSV
        writer.writerow([
            sno,
            image_id,
            class_names[label_name],
            conf,
            xcenter,
            ycenter,
            bbx_width,
            bbx_height
        ])
        sno+=1

print(f"Submission file saved to {output_csv}")


Submission file saved to /kaggle/working/21F1000641.csv


In [15]:
print(lines)

['1 0.634885 0.659639 0.244449 0.231756 0.811765\n']


In [16]:
len(pd.read_csv("/kaggle/working/21F1000641.csv"))

525