In [1]:
# CUDA: Let's check that Nvidia CUDA drivers are already pre-installed and which version is it.
!/usr/local/cuda/bin/nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Tue_Aug_15_22:02:13_PDT_2023
Cuda compilation tools, release 12.2, V12.2.140
Build cuda_12.2.r12.2/compiler.33191640_0


In [2]:
!ls /usr/bin | grep nvidia
!echo $PATH

nvidia-bug-report.sh
nvidia-cuda-mps-control
nvidia-cuda-mps-server
nvidia-debugdump
nvidia-installer
nvidia-modprobe
nvidia-ngx-updater
nvidia-persistenced
nvidia-powerd
nvidia-settings
nvidia-sleep.sh
nvidia-smi
nvidia-uninstall
nvidia-xconfig
/opt/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/tools/node/bin:/tools/google-cloud-sdk/bin


In [3]:
!nvidia-smi

Fri Dec 13 03:24:25 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA A100-SXM4-40GB          Off | 00000000:00:04.0 Off |                    0 |
| N/A   29C    P0              45W / 400W |      2MiB / 40960MiB |      0%      Default |
|                                         |                      |             Disabled |
+-----------------------------------------+----------------------+----------------------+
                                                                    

Add the dataset

In [4]:
!pip install --upgrade dataset-tools

Collecting dataset-tools
  Downloading dataset_tools-0.1.4-py3-none-any.whl.metadata (2.0 kB)
Collecting supervisely>=6.72.28 (from dataset-tools)
  Downloading supervisely-6.73.254-py3-none-any.whl.metadata (33 kB)
Collecting requests-toolbelt<1.0.0,>=0.9.1 (from dataset-tools)
  Downloading requests_toolbelt-0.10.1-py2.py3-none-any.whl.metadata (14 kB)
Collecting pandas<=1.5.2,>=1.1.3 (from dataset-tools)
  Downloading pandas-1.5.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting dataframe-image<1.0.0,>=0.1.11 (from dataset-tools)
  Downloading dataframe_image-0.2.6-py3-none-any.whl.metadata (9.1 kB)
Collecting urllib3==1.26.15 (from dataset-tools)
  Downloading urllib3-1.26.15-py2.py3-none-any.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.1/48.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting geojson>=3.0.0 (from dataset-tools)
  Downloading geojson-3.1.0-py3-none-any.whl.metadata (16 kB)


In [1]:
import dataset_tools as dtools
dtools.download(dataset='deepNIR Fruit Detection', dst_dir='/content/dataset-ninja/')

Downloading 'deepNIR Fruit Detection': 100%|██████████| 802M/802M [00:08<00:00, 93.7MB/s]
Unpacking 'deepnir-fruit-detection.tar': 100%|██████████| 8593/8593 [00:04<00:00, 1724.27file/s]


'/content/dataset-ninja/deepnir-fruit-detection'

Check the structure of this dataset

In [None]:
!ls /content/dataset-ninja/deepnir-fruit-detection

In [2]:
import os
import json
import shutil
from sklearn.model_selection import train_test_split

# Define paths
train_path = "/content/dataset-ninja/deepnir-fruit-detection/train"
val_path = "/content/dataset-ninja/deepnir-fruit-detection/valid"
test_path = "/content/dataset-ninja/deepnir-fruit-detection/test"

# List the contents of the img and ann directories
train_img_files = os.listdir(os.path.join(train_path, "img"))
train_ann_files = os.listdir(os.path.join(train_path, "ann"))
val_img_files = os.listdir(os.path.join(val_path, "img"))
val_ann_files = os.listdir(os.path.join(val_path, "ann"))
test_img_files = os.listdir(os.path.join(test_path, "img"))
test_ann_files = os.listdir(os.path.join(test_path, "ann"))

print("Number of train images:", len(train_img_files))
print("Number of train annotations:", len(train_ann_files))
print("Number of val images:", len(val_img_files))
print("Number of val annotations:", len(val_ann_files))
print("Number of test images:", len(test_img_files))
print("Number of test annotations:", len(test_ann_files))

# Create directories if not exist
os.makedirs(os.path.join(train_path, "img"), exist_ok=True)
os.makedirs(os.path.join(train_path, "ann"), exist_ok=True)
os.makedirs(os.path.join(val_path, "img"), exist_ok=True)
os.makedirs(os.path.join(val_path, "ann"), exist_ok=True)
os.makedirs(os.path.join(test_path, "img"), exist_ok=True)
os.makedirs(os.path.join(test_path, "ann"), exist_ok=True)

Number of train images: 3434
Number of train annotations: 3434
Number of val images: 430
Number of val annotations: 430
Number of test images: 431
Number of test annotations: 431


In [3]:
# discover the "img" and "ann"
# Print the first five files in each directory
print("First five files in 'img':", train_img_files[:5])
print("First five files in 'ann':", train_ann_files[:5])

First five files in 'img': ['ca22dfe75_jpg.rf.8ece0f8f36605ef7f5e899f174bb737b.jpg', '04fe56fd3_jpg.rf.7bf4af410c0bcea3e51981d5f406e27e.jpg', 'Skeena_jpg.rf.55503244949ee1d772e6afdcbc50d4db.jpg', 'fe14db6a5_jpg.rf.66694889ad12efd1b357408e5c617071.jpg', '29e44e305_jpg.rf.d12b114bb61063ccdb5ff6c1e2ef7946.jpg']
First five files in 'ann': ['01f37f1d3_jpg.rf.3830ef71f5839ccd76017da415ff05c5.jpg.json', 'f98dd1c09_jpg.rf.b79f3a220964fe4ac80edb40feae6d85.jpg.json', '2cffefd1a_jpg.rf.3f5a4087b910b7b3a5c40fed4029fa3c.jpg.json', '3f8f6b1a1_jpg.rf.27cc61770f40d4e8c809aab9db78e079.jpg.json', '03ad2a35c_jpg.rf.081932fec671f35a33368147970e5a0f.jpg.json']


We found that it's not sorted, so we need to pair them up manually: ann name(.jpg) = file name.jpg(.json)

Also filter to have only bounding boxes for the wheat class

In [4]:
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from PIL import Image

def filter_bounding_boxes(img_dir, ann_dir, visualize=False):
    """
    Filters images and annotations to include only those with bounding boxes
    for the specified class.

    Args:
        img_dir (str): Directory containing image files.
        ann_dir (str): Directory containing annotation files.
        visualize (bool): If True, displays bounding boxes on the image.

    Returns:
        list: Filtered image file paths.
        list: Filtered annotation file paths.
    """
    filtered_imgs = []
    filtered_anns = []

    for img_file in sorted(os.listdir(img_dir)):
        img_path = os.path.join(img_dir, img_file)
        ann_file = img_file + ".json"  # Match image file
        ann_path = os.path.join(ann_dir, ann_file)

        with open(ann_path, 'r') as f:
            annotation = json.load(f)

        # Check if the annotation contains at least one bounding box for the class
        contains_class = any(
            obj["classTitle"] == "wheat" and obj["geometryType"] == "rectangle"
            for obj in annotation.get("objects", [])
        )

        if contains_class:
            filtered_imgs.append(img_path)
            filtered_anns.append(ann_path)

            # Visualization (if enabled)
            if visualize:
                print(f"Visualizing: {img_path}")
                bounding_boxes = [
                    obj["points"]["exterior"]
                    for obj in annotation["objects"]
                    if obj["classTitle"] == "wheat" and obj["geometryType"] == "rectangle"
                ]

                # Open and display the image with bounding boxes
                img = Image.open(img_path).convert("RGB")
                fig, ax = plt.subplots(1, 1, figsize=(10, 10))
                ax.imshow(img)

                # Add bounding boxes
                for bbox in bounding_boxes:
                    x_min, y_min = bbox[0]
                    x_max, y_max = bbox[1]
                    width = x_max - x_min
                    height = y_max - y_min
                    rect = patches.Rectangle((x_min, y_min), width, height, linewidth=2, edgecolor='red', facecolor='none')
                    ax.add_patch(rect)

                ax.set_title(f"Bounding Boxes for {img_path}")
                plt.axis('off')
                plt.show()

            # Extract detected object
            detected_objects = [obj["classTitle"] for obj in annotation.get("objects", [])]
            # print(f"Object Class is: {detected_objects[0]}")

            # Count the number of detected objects
            num_objects = len(detected_objects)
            print(f"{img_file}: Object Class is {detected_objects[0]}; Num detected: {num_objects}")

    print(f"Filtered {len(filtered_imgs)} images and annotations.")

    return filtered_imgs, filtered_anns

In [5]:
train_imgs, train_anns = filter_bounding_boxes(os.path.join(train_path, "img"), os.path.join(train_path, "ann"))
val_imgs, val_anns = filter_bounding_boxes(os.path.join(val_path, "img"), os.path.join(val_path, "ann"))
test_imgs, test_anns = filter_bounding_boxes(os.path.join(test_path, "img"), os.path.join(test_path, "ann"))

00333207f_jpg.rf.0f14c64c1172144c0c6940b55ff42d9c.jpg: Object Class is wheat; Num detected: 55
005b0d8bb_jpg.rf.f49004dbfd800cd7ef4baea199a3a060.jpg: Object Class is wheat; Num detected: 20
00b5fefed_jpg.rf.e0000563d76086104f6da9f777bf3b61.jpg: Object Class is wheat; Num detected: 25
00b70a919_jpg.rf.7b63ac89f96f3fb22287cdb8943753e0.jpg: Object Class is wheat; Num detected: 7
00e903abe_jpg.rf.7c67a46a670d6be82aa8c01f54b36764.jpg: Object Class is wheat; Num detected: 43
00ea5e5ee_jpg.rf.08ced040988457f23f7e1b4a7f52261a.jpg: Object Class is wheat; Num detected: 55
010b216d4_jpg.rf.bb2232affd576643f36c7c480d3e2a12.jpg: Object Class is wheat; Num detected: 36
010c93b99_jpg.rf.464fb93f32b8e779d7da733399ce0e1a.jpg: Object Class is wheat; Num detected: 16
010dbcc8f_jpg.rf.b5aacd2b04982143b55827a1cb728d65.jpg: Object Class is wheat; Num detected: 49
0114c88aa_jpg.rf.9eb5fde924107672969a98440cbfebdb.jpg: Object Class is wheat; Num detected: 21
01189a3c3_jpg.rf.9dbeb4409af966bf66690bf7268534c6.j

In [6]:
def save_filtered_data(img_paths, ann_paths, img_dest, ann_dest):
    """
    Saves filtered images and annotations into specified directories.

    Args:
        img_paths (list): List of image file paths.
        ann_paths (list): List of annotation file paths.
        img_dest (str): Destination directory for images.
        ann_dest (str): Destination directory for annotations.
    """
    os.makedirs(img_dest, exist_ok=True)
    os.makedirs(ann_dest, exist_ok=True)

    for img_path, ann_path in zip(img_paths, ann_paths):
        shutil.copy(img_path, img_dest)
        shutil.copy(ann_path, ann_dest)

# Paths for saving filtered data
filtered_train_path = "/content/dataset-ninja/wheat/train"
filtered_val_path = "/content/dataset-ninja/wheat/val"
filtered_test_path = "/content/dataset-ninja/wheat/test"

# Save filtered datasets
save_filtered_data(train_imgs, train_anns, os.path.join(filtered_train_path, "img"), os.path.join(filtered_train_path, "ann"))
save_filtered_data(val_imgs, val_anns, os.path.join(filtered_val_path, "img"), os.path.join(filtered_val_path, "ann"))
save_filtered_data(test_imgs, test_anns, os.path.join(filtered_test_path, "img"), os.path.join(filtered_test_path, "ann"))

# Final counts
print(f"Filtered Training set: {len(train_imgs)} images, {len(train_anns)} annotations")
print(f"Filtered Validation set: {len(val_imgs)} images, {len(val_anns)} annotations")
print(f"Filtered Test set: {len(test_imgs)} images, {len(test_anns)} annotations")

Filtered Training set: 2699 images, 2699 annotations
Filtered Validation set: 337 images, 337 annotations
Filtered Test set: 337 images, 337 annotations


##YOLO

Download pre-trained YOLOv4 weights

In [7]:
# Install darknet (YOLOv4 framework) and other dependencies
!git clone https://github.com/AlexeyAB/darknet.git
%cd darknet

# Build Darknet with GPU and OpenCV support
!sed -i 's/OPENCV=0/OPENCV=1/' Makefile
!sed -i 's/GPU=0/GPU=1/' Makefile
!sed -i 's/CUDNN=0/CUDNN=1/' Makefile
!sed -i 's/CUDNN_HALF=0/CUDNN_HALF=1/' Makefile

Cloning into 'darknet'...
remote: Enumerating objects: 15873, done.[K
remote: Counting objects: 100% (40/40), done.[K
remote: Compressing objects: 100% (31/31), done.[K
remote: Total 15873 (delta 13), reused 24 (delta 8), pack-reused 15833 (from 1)[K
Receiving objects: 100% (15873/15873), 14.50 MiB | 22.00 MiB/s, done.
Resolving deltas: 100% (10679/10679), done.
/content/darknet


In [8]:
# make darknet (builds darknet so that you can then use the darknet executable file to run or train object detectors)
!make

mkdir -p ./obj/
mkdir -p backup
mkdir -p results
chmod +x *.sh
g++ -std=c++11 -std=c++11 -Iinclude/ -I3rdparty/stb/include -DOPENCV `pkg-config --cflags opencv4 2> /dev/null || pkg-config --cflags opencv` -DGPU -I/usr/local/cuda/include/ -DCUDNN -DCUDNN_HALF -Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic -Ofast -DOPENCV -DGPU -DCUDNN -I/usr/local/cudnn/include -DCUDNN_HALF -c ./src/image_opencv.cpp -o obj/image_opencv.o
[01m[K./src/image_opencv.cpp:[m[K In function ‘[01m[Kvoid draw_detections_cv_v3(void**, detection*, int, float, char**, image**, int, int)[m[K’:
  945 |                 float [01;35m[Krgb[m[K[3];
      |                       [01;35m[K^~~[m[K
[01m[K./src/image_opencv.cpp:[m[K In function ‘[01m[Kvoid cv_draw_object(image, float*, int, int, int*, float*, int*, int, char**)[m[K’:
 1443 |         char [01;35m[Kbuff[m[K[100];
      |              [01;35m[K^~~~[m[K
 1419 |     int [01;35m[Kit_tb_res[m[K = cv::c

In [9]:
# Display dimensions of the first 10 images
import os
from PIL import Image

img_dir = '/content/dataset-ninja/wheat/train/img'
dimensions = [Image.open(os.path.join(img_dir, f)).size for f in os.listdir(img_dir)]
print(dimensions[:10])

[(1024, 1024), (1024, 1024), (1024, 1024), (1024, 1024), (1024, 1024), (1024, 1024), (1024, 1024), (1024, 1024), (1024, 1024), (1024, 1024)]


In [13]:
!echo "wheat" > /content/darknet/data/obj.names

In [37]:
with open("/content/darknet/obj.data", "w") as f:
    f.write("""classes=1
train=/content/dataset-ninja/wheat/train/train.txt
valid=/content/dataset-ninja/wheat/val/val.txt
names=/content/darknet/data/obj.names
backup=/content/darknet/backup/
""")

# # test with single file
# with open("/content/darknet/obj.data", "w") as f:
#     f.write("""classes=1
# train=/content/dataset-ninja/wheat/single/train.txt
# valid=/content/dataset-ninja/wheat/single/train.txt
# names=/content/darknet/data/obj.names
# backup=/content/darknet/backup/
# """)

In [38]:
!cat /content/darknet/obj.data

classes=1
train=/content/dataset-ninja/wheat/single/train.txt
valid=/content/dataset-ninja/wheat/single/train.txt
names=/content/darknet/data/obj.names
backup=/content/darknet/backup/


In [15]:
cfg_path = "/content/darknet/cfg/yolov4-custom.cfg"

with open(cfg_path, "r") as f:
    lines = f.readlines()

num_classes = 1
filters = (num_classes + 5) * 3

updated_lines = []
for line in lines:
    if line.strip().startswith("batch="):
        updated_lines.append("batch=64\n")
    elif line.strip().startswith("subdivisions="):
        updated_lines.append("subdivisions=4\n")
    elif line.strip().startswith("max_batches = "):
        updated_lines.append("max_batches=1000\n")
        print(updated_lines)
    elif line.strip().startswith("steps="):
        max_batches = num_classes * 1000
        updated_lines.append(f"steps={int(0.8 * max_batches)},{int(0.9 * max_batches)}\n")
    elif line.strip().startswith("filters="):
        updated_lines.append(f"filters={filters}\n")
    elif line.strip().startswith("classes="):
        updated_lines.append(f"classes={num_classes}\n")
    else:
        updated_lines.append(line)

with open(cfg_path, "w") as f:
    f.writelines(updated_lines)

print(f"Configuration file updated at: {cfg_path}")

['[net]\n', '# Testing\n', '#batch=1\n', '#subdivisions=1\n', '# Training\n', 'batch=64\n', 'subdivisions=4\n', 'width=608\n', 'height=608\n', 'channels=3\n', 'momentum=0.949\n', 'decay=0.0005\n', 'angle=0\n', 'saturation = 1.5\n', 'exposure = 1.5\n', 'hue=.1\n', '\n', 'learning_rate=0.001\n', 'burn_in=1000\n', 'max_batches=1000\n']
Configuration file updated at: /content/darknet/cfg/yolov4-custom.cfg


In [16]:
!cat "/content/darknet/cfg/yolov4-custom.cfg"

[net]
# Testing
#batch=1
#subdivisions=1
# Training
batch=64
subdivisions=4
width=608
height=608
channels=3
momentum=0.949
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1

learning_rate=0.001
burn_in=1000
max_batches=1000
policy=steps
steps=800,900
scales=.1,.1

#cutmix=1
mosaic=1

#:104x104 54:52x52 85:26x26 104:13x13 for 416

[convolutional]
batch_normalize=1
filters=18
size=3
stride=1
pad=1
activation=mish

# Downsample

[convolutional]
batch_normalize=1
filters=18
size=3
stride=2
pad=1
activation=mish

[convolutional]
batch_normalize=1
filters=18
size=1
stride=1
pad=1
activation=mish

[route]
layers = -2

[convolutional]
batch_normalize=1
filters=18
size=1
stride=1
pad=1
activation=mish

[convolutional]
batch_normalize=1
filters=18
size=1
stride=1
pad=1
activation=mish

[convolutional]
batch_normalize=1
filters=18
size=3
stride=1
pad=1
activation=mish

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=18
size=1
stride=1
pad=1
activation=mis

In [17]:
import os

def convert_to_yolo_format(ann_path, img_shape, output_path):
    """
    Convert annotation JSON to YOLO format and save to a .txt file.

    Args:
        ann_path (str): Path to the annotation JSON.
        img_shape (tuple): Shape of the image (width, height).
        output_path (str): Path to save the YOLO formatted annotation.
    """
    with open(ann_path, 'r') as f:
        annotation = json.load(f)

    height, width = img_shape
    with open(output_path, 'w') as f:
        for obj in annotation.get("objects", []):
            if obj["classTitle"] == "wheat"  and obj["geometryType"] == "rectangle":
                # Normalize bounding box coordinates
                x_min, y_min = obj["points"]["exterior"][0]
                x_max, y_max = obj["points"]["exterior"][1]
                x_center = (x_min + x_max) / 2 / width
                y_center = (y_min + y_max) / 2 / height
                box_width = (x_max - x_min) / width
                box_height = (y_max - y_min) / height

                # Write to file in YOLO format: <class_id> <x_center> <y_center> <width> <height>
                f.write(f"0 {x_center} {y_center} {box_width} {box_height}\n")

In [18]:
def convert_dataset(img_dir, ann_dir, output_dir):
    os.makedirs(output_dir, exist_ok=True)
    for img_file in os.listdir(img_dir):
        img_path = os.path.join(img_dir, img_file)
        ann_file = img_file + ".json"
        ann_path = os.path.join(ann_dir, ann_file)
        output_path = os.path.join(output_dir, img_file.replace(".jpg", ".txt"))

        # Get image dimensions
        img = Image.open(img_path)
        img_shape = img.size  # (width, height)

        # Convert annotation
        convert_to_yolo_format(ann_path, img_shape, output_path)

convert_dataset(
    img_dir=os.path.join(filtered_train_path, "img"),
    ann_dir=os.path.join(filtered_train_path, "ann"),
    output_dir=os.path.join(filtered_train_path, "labels")
)
convert_dataset(
    img_dir=os.path.join(filtered_val_path, "img"),
    ann_dir=os.path.join(filtered_val_path, "ann"),
    output_dir=os.path.join(filtered_val_path, "labels")
)
convert_dataset(
    img_dir=os.path.join(filtered_test_path, "img"),
    ann_dir=os.path.join(filtered_test_path, "ann"),
    output_dir=os.path.join(filtered_test_path, "labels")
)

In [19]:
# Generate .txt
def generate_image_list(img_dir, output_path):
    with open(output_path, 'w') as f:
        for img_file in os.listdir(img_dir):
            img_path = os.path.join(img_dir, img_file)
            f.write(f"{img_path}\n")

generate_image_list(
    img_dir=os.path.join(filtered_train_path, "img"),
    output_path=os.path.join(filtered_train_path, "train.txt")
)
generate_image_list(
    img_dir=os.path.join(filtered_val_path, "img"),
    output_path=os.path.join(filtered_val_path, "val.txt")
)
generate_image_list(
    img_dir=os.path.join(filtered_test_path, "img"),
    output_path=os.path.join(filtered_test_path, "test.txt")
)

In [20]:
!wget https://github.com/AlexeyAB/darknet/releases/download/yolov4/yolov4.conv.137 -P /content/darknet/

--2024-12-13 03:35:36--  https://github.com/AlexeyAB/darknet/releases/download/yolov4/yolov4.conv.137
Resolving github.com (github.com)... 140.82.112.3
Connecting to github.com (github.com)|140.82.112.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/75388965/2637cdb1-11b6-4da5-9c07-0e0f9901ce47?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241213%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241213T033536Z&X-Amz-Expires=300&X-Amz-Signature=a0fa7372f2f64849d93617fb481e6207ebe8357a2c6b94b8d319c2b62faf1a79&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dyolov4.conv.137&response-content-type=application%2Foctet-stream [following]
--2024-12-13 03:35:36--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/75388965/2637cdb1-11b6-4da5-9c07-0e0f9901ce47?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credent

In [21]:
!mv /content/dataset-ninja/wheat/train/labels/*.txt /content/dataset-ninja/wheat/train/img/

In [19]:
!cat /content/dataset-ninja/wheat/train/ann/fd8b236e8_jpg.rf.4db2734cd7d8efb6e0295823a7ca3859.jpg.json

{
    "description": "",
    "tags": [],
    "size": {
        "height": 1024,
        "width": 1024
    },
    "objects": [
        {
            "id": 6170255,
            "classId": 8685,
            "description": "",
            "geometryType": "rectangle",
            "labelerLogin": "iw@datasetninja.com",
            "createdAt": "2023-06-13T15:42:04.387Z",
            "updatedAt": "2023-06-13T15:42:04.387Z",
            "tags": [],
            "classTitle": "wheat",
            "points": {
                "exterior": [
                    [
                        19,
                        892
                    ],
                    [
                        152,
                        965
                    ]
                ],
                "interior": []
            }
        },
        {
            "id": 6170254,
            "classId": 8685,
            "description": "",
            "geometryType": "rectangle",
            "labelerLogin": "iw@datasetninja.com",
 

In [20]:
!cat /content/dataset-ninja/wheat/train/img/fd8b236e8_jpg.rf.4db2734cd7d8efb6e0295823a7ca3859.txt

0 0.08349609375 0.90673828125 0.1298828125 0.0712890625
0 0.099609375 0.87353515625 0.111328125 0.0556640625
0 0.1962890625 0.70654296875 0.17578125 0.0693359375
0 0.23583984375 0.5859375 0.0908203125 0.0390625
0 0.33203125 0.5 0.09375 0.07421875
0 0.3466796875 0.43017578125 0.05859375 0.0634765625
0 0.23291015625 0.43310546875 0.0751953125 0.0654296875
0 0.087890625 0.490234375 0.10546875 0.076171875
0 0.015625 0.36474609375 0.03125 0.0712890625
0 0.13720703125 0.37255859375 0.0693359375 0.0517578125
0 0.03515625 0.17578125 0.0546875 0.068359375
0 0.130859375 0.13427734375 0.0859375 0.0791015625
0 0.0751953125 0.060546875 0.0625 0.0625
0 0.13818359375 0.0283203125 0.0751953125 0.056640625
0 0.0810546875 0.00927734375 0.12109375 0.0185546875
0 0.322265625 0.0400390625 0.091796875 0.080078125
0 0.35546875 0.0859375 0.134765625 0.119140625
0 0.39013671875 0.21337890625 0.0654296875 0.0830078125
0 0.314453125 0.24169921875 0.0625 0.1201171875
0 0.2685546875 0.27392578125 0.0703125 0.09472

In [22]:
!mv /content/dataset-ninja/wheat/val/labels/*.txt /content/dataset-ninja/wheat/val/img/

In [26]:
!cat "/content/dataset-ninja/wheat/train/train.txt"

/content/dataset-ninja/wheat/train/img/ca22dfe75_jpg.rf.8ece0f8f36605ef7f5e899f174bb737b.jpg
/content/dataset-ninja/wheat/train/img/04fe56fd3_jpg.rf.7bf4af410c0bcea3e51981d5f406e27e.jpg
/content/dataset-ninja/wheat/train/img/fe14db6a5_jpg.rf.66694889ad12efd1b357408e5c617071.jpg
/content/dataset-ninja/wheat/train/img/29e44e305_jpg.rf.d12b114bb61063ccdb5ff6c1e2ef7946.jpg
/content/dataset-ninja/wheat/train/img/625311180_jpg.rf.0416195fc9949d0837b8e2bdf37e2bd2.jpg
/content/dataset-ninja/wheat/train/img/3a061fb14_jpg.rf.95d2cba113d3061f325c65afa2366c41.jpg
/content/dataset-ninja/wheat/train/img/52c1231da_jpg.rf.67819d51dc5620225b8693b42adec0ea.jpg
/content/dataset-ninja/wheat/train/img/a783f9679_jpg.rf.32b5576543fdb8ec37947b56a6bfcf7d.jpg
/content/dataset-ninja/wheat/train/img/0fd63ed98_jpg.rf.7472b5785babd1fa561ce4d607979e7c.jpg
/content/dataset-ninja/wheat/train/img/233cb8750_jpg.rf.b4ed05bfd72c1212b500747a86c64b7c.jpg
/content/dataset-ninja/wheat/train/img/e3d1d796c_jpg.rf.604aac6a17748d

In [29]:
# Train YOLOv4
!./darknet detector train /content/darknet/obj.data /content/darknet/cfg/yolov4-custom.cfg yolov4.conv.137 -map

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 139 Avg (IOU: 0.495350), count: 173, class_loss = 43.240604, iou_loss = 411.955902, total_loss = 455.196503 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 150 Avg (IOU: 0.005438), count: 297, class_loss = 2392.598145, iou_loss = 5.113281, total_loss = 2397.711426 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 161 Avg (IOU: 0.562780), count: 60, class_loss = 14.769695, iou_loss = 15.788549, total_loss = 30.558245 
 total_bbox = 10721525, rewritten_bbox = 0.273851 % 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 139 Avg (IOU: 0.555535), count: 141, class_loss = 35.218117, iou_loss = 342.780273, total_loss = 377.998383 
v3 (iou loss, Normalizer: (iou: 0.07, obj: 1.00, cls: 1.00) Region 150 Avg (IOU: 0.008590), count: 333, class_loss = 2234.002930, iou_loss = 4.714111, total_loss = 2238.717041 
v3 

In [32]:
!ls /content/darknet/backup/

yolov4-custom_last.weights


In [31]:
# Evaluate mAP
# if completed training:
# !./darknet detector map /content/darknet/obj.data /content/darknet/cfg/yolov4-custom.cfg /content/darknet/backup/yolov4-custom_best.weights

!./darknet detector map /content/darknet/obj.data /content/darknet/cfg/yolov4-custom.cfg /content/darknet/backup/yolov4-custom_last.weights

 CUDA-version: 12020 (12020), cuDNN: 8.9.6, CUDNN_HALF=1, GPU count: 1  
 CUDNN_HALF=1 
 OpenCV version: 4.5.4
 0 : compute_capability = 800, cudnn_half = 1, GPU: NVIDIA A100-SXM4-40GB 
net.optimized_memory = 0 
mini_batch = 1, batch = 16, time_steps = 1, train = 0 
   layer   filters  size/strd(dil)      input                output
   0 Create CUDA-stream - 0 
 Create cudnn-handle 0 
conv     18       3 x 3/ 1    608 x 608 x   3 ->  608 x 608 x  18 0.359 BF
   1 conv     18       3 x 3/ 2    608 x 608 x  18 ->  304 x 304 x  18 0.539 BF
   2 conv     18       1 x 1/ 1    304 x 304 x  18 ->  304 x 304 x  18 0.060 BF
   3 route  1 		                           ->  304 x 304 x  18 
   4 conv     18       1 x 1/ 1    304 x 304 x  18 ->  304 x 304 x  18 0.060 BF
   5 conv     18       1 x 1/ 1    304 x 304 x  18 ->  304 x 304 x  18 0.060 BF
   6 conv     18       3 x 3/ 1    304 x 304 x  18 ->  304 x 304 x  18 0.539 BF
   7 Shortcut Layer: 4,  wt = 0, wn = 0, outputs: 304 x 304 x  18 0.002 B

Make Predictions on Test Images, The predictions will be stored in result.json

In [80]:
# Predict on test images
!./darknet detector test /content/darknet/obj.data /content/darknet/cfg/yolov4-custom.cfg /content/darknet/backup/yolov4-custom_last.weights -ext_output -dont_show -thresh 0.1 -out result.json < /content/dataset-ninja/wheat/test/test.txt

 CUDA-version: 12020 (12020), cuDNN: 8.9.6, CUDNN_HALF=1, GPU count: 1  
 CUDNN_HALF=1 
 OpenCV version: 4.5.4
 0 : compute_capability = 800, cudnn_half = 1, GPU: NVIDIA A100-SXM4-40GB 
net.optimized_memory = 0 
mini_batch = 1, batch = 16, time_steps = 1, train = 0 
   layer   filters  size/strd(dil)      input                output
   0 Create CUDA-stream - 0 
 Create cudnn-handle 0 
conv     18       3 x 3/ 1    608 x 608 x   3 ->  608 x 608 x  18 0.359 BF
   1 conv     18       3 x 3/ 2    608 x 608 x  18 ->  304 x 304 x  18 0.539 BF
   2 conv     18       1 x 1/ 1    304 x 304 x  18 ->  304 x 304 x  18 0.060 BF
   3 route  1 		                           ->  304 x 304 x  18 
   4 conv     18       1 x 1/ 1    304 x 304 x  18 ->  304 x 304 x  18 0.060 BF
   5 conv     18       1 x 1/ 1    304 x 304 x  18 ->  304 x 304 x  18 0.060 BF
   6 conv     18       3 x 3/ 1    304 x 304 x  18 ->  304 x 304 x  18 0.539 BF
   7 Shortcut Layer: 4,  wt = 0, wn = 0, outputs: 304 x 304 x  18 0.002 B

In [78]:
!cat result.json

In [26]:
import json
import os

def compute_iou(pred_box, gt_box):
    x_min = max(pred_box[0], gt_box[0])
    y_min = max(pred_box[1], gt_box[1])
    x_max = min(pred_box[2], gt_box[2])
    y_max = min(pred_box[3], gt_box[3])
    inter_area = max(0, x_max - x_min) * max(0, y_max - y_min)
    pred_area = (pred_box[2] - pred_box[0]) * (pred_box[3] - gt_box[1])
    gt_area = (gt_box[2] - gt_box[0]) * (gt_box[3] - gt_box[1])
    return inter_area / (pred_area + gt_area - inter_area + 1e-6)

def load_ground_truths(annotation_dir):
    ground_truths = {}
    for file_name in os.listdir(annotation_dir):
        if file_name.endswith(".json"):
            file_path = os.path.join(annotation_dir, file_name)
            with open(file_path, "r") as f:
                data = json.load(f)
                # Extract bounding boxes in [x_min, y_min, x_max, y_max] format
                boxes = [
                    [obj["x_min"], obj["y_min"], obj["x_max"], obj["y_max"]]
                    for obj in data.get("annotations", [])
                ]
                ground_truths[file_name.replace(".json", ".jpg")] = boxes
    return ground_truths

def calculate_metrics(predictions, ground_truths):
    tp, fp, fn = 0, 0, 0

    for img, preds in predictions.items():
        gt_boxes = ground_truths.get(img, [])
        matched = [False] * len(gt_boxes)  # Track matched ground truths

        for pred in preds:
            iou_scores = [compute_iou(pred, gt) for gt in gt_boxes]
            max_iou = max(iou_scores) if iou_scores else 0
            max_iou_idx = iou_scores.index(max_iou) if iou_scores else -1

            if max_iou >= 0.5 and not matched[max_iou_idx]:
                tp += 1
                matched[max_iou_idx] = True  # Mark this ground truth as matched
            else:
                fp += 1

        fn += sum(1 for m in matched if not m)  # Unmatched ground truths are false negatives

    precision = tp / (tp + fp + 1e-6)
    recall = tp / (tp + fn + 1e-6)
    f1 = 2 * (precision * recall) / (precision + recall + 1e-6)

    return {"precision": precision, "recall": recall, "f1": f1}

# Paths
annotation_dir = "/content/dataset-ninja/wheat/ann/"
results_path = "/content/result.json"

# Load ground truths
ground_truths = load_ground_truths(annotation_dir)

# Load predictions
with open(results_path, "r") as f:
    predictions = json.load(f)

# Calculate metrics
metrics = calculate_metrics(predictions, ground_truths)
print(metrics)

FileNotFoundError: [Errno 2] No such file or directory: '/content/result.json'