In [1]:
from cleanvision import Imagelab
from pathlib import Path

import os
import glob
import json

In [14]:
data_path = Path("D:/Projects/DL/MonumentDetection/augmentedImages")

In [15]:
imagelab = Imagelab(data_path)

Reading images from D:/Projects/DL/MonumentDetection/augmentedImages


In [16]:
imagelab.find_issues()

Checking for dark, light, odd_aspect_ratio, low_information, exact_duplicates, near_duplicates, blurry, grayscale, odd_size images ...


  0%|          | 0/4358 [00:00<?, ?it/s]

  0%|          | 0/4358 [00:00<?, ?it/s]

Issue checks completed. 0 issues found in the dataset. To see a detailed report of issues found, use imagelab.report().


In [17]:
imagelab.report()

Issues found in images in order of severity in the dataset

|    | issue_type       |   num_images |
|---:|:-----------------|-------------:|
|  0 | dark             |            0 |
|  1 | light            |            0 |
|  2 | odd_aspect_ratio |            0 |
|  3 | low_information  |            0 |
|  4 | blurry           |            0 |
|  5 | grayscale        |            0 |
|  6 | odd_size         |            0 |
|  7 | exact_duplicates |            0 |
|  8 | near_duplicates  |            0 | 



## Ground Truths Checking

In [3]:
def normalize_bbox(x_center, y_center, width, height, img_width, img_height):
    return [
        x_center / img_width,
        y_center / img_height,
        width / img_width,
        height / img_height
    ]

In [4]:
def check_and_correct_bboxes(label_path, img_width, img_height):
    with open(label_path, 'r') as file:
        lines = file.readlines()

    corrected_bboxes = []
    for line in lines:
        parts = line.strip().split()
        class_id = parts[0]
        bbox = list(map(float, parts[1:]))
        normalized_bbox = normalize_bbox(*bbox, img_width, img_height)
        
        if all(0 <= coord <= 1 for coord in normalized_bbox):
            corrected_bboxes.append(f"{class_id} " + " ".join(map(str, normalized_bbox)))
        else:
            print(f"Out of bounds bbox found and ignored: {bbox}")

    with open(label_path, 'w') as file:
        file.write("\n".join(corrected_bboxes))

In [5]:
image_dir = 'path/to/images'
label_dir = 'path/to/labels'

In [None]:
for img_file in glob.glob(os.path.join(image_dir, '*.jpg')):
    img_width, img_height = get_image_dimensions(img_file)  # Replace with actual function to get dimensions
    label_file = os.path.join(label_dir, os.path.basename(img_file).replace('.jpg', '.txt'))
    check_and_correct_bboxes(label_file, img_width, img_height)

## Clamping values within bounds

In [11]:
labels_dirs = ["D:/Projects/DL/MonumentDetection/DL/ultralytics/dataset2/train/labels", "D:/Projects/DL/MonumentDetection/DL/ultralytics/dataset2/val/labels"]

In [12]:
def clamp(value, min_value=0.0, max_value=1.0):
    """Clamp a value to stay within min and max bounds."""
    return max(min_value, min(max_value, value))

In [13]:
def normalize_annotations(label_file):
    with open(label_file, 'r') as file:
        lines = file.readlines()

    modified = False
    normalized_lines = []

    for line in lines:
        parts = line.strip().split()
        if len(parts) != 5:
            continue  # Skip invalid lines
        class_id, x_center, y_center, width, height = map(float, parts)

        # Clamp the values to be within [0, 1]
        x_center = clamp(x_center)
        y_center = clamp(y_center)
        width = clamp(width)
        height = clamp(height)

        # Calculate new x_min, x_max, y_min, y_max to check if the bounding box is within the image
        x_min = clamp(x_center - width / 2)
        x_max = clamp(x_center + width / 2)
        y_min = clamp(y_center - height / 2)
        y_max = clamp(y_center + height / 2)

        # Update x_center, y_center, width, height based on clamped values
        x_center = (x_min + x_max) / 2
        y_center = (y_min + y_max) / 2
        width = x_max - x_min
        height = y_max - y_min

        normalized_lines.append(f"{int(class_id)} {x_center} {y_center} {width} {height}\n")

        if x_center != float(parts[1]) or y_center != float(parts[2]) or width != float(parts[3]) or height != float(parts[4]):
            modified = True

    if modified:
        with open(label_file, 'w') as file:
            file.writelines(normalized_lines)

In [14]:
def normalize_all_labels(labels_dir):
    for label_file in os.listdir(labels_dir):
        if label_file.endswith('.txt'):
            normalize_annotations(os.path.join(labels_dir, label_file))

In [15]:
# Call the function to normalize all labels
for labels_dir in labels_dirs:
    normalize_all_labels(labels_dir)

print("Normalization process completed.")

Normalization process completed.
