In [None]:
!nvidia-smi

In [None]:
!pip install ultralytics

In [35]:
import cv2, os, random, shutil
import numpy as np
from ultralytics import YOLO
from matplotlib import pyplot as plt

In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("thedatasith/sku110k-annotations")

print("Path to dataset files:", path)

In [None]:
def subsample_data(source_path, target_path, num_samples):
    # Specify directory paths
    image_path_suffix = ["images/train/", "images/val/", "images/test/"]
    label_path_suffix = ["labels/train/", "labels/val/", "labels/test/"]

    chosen_image_paths, chosen_label_paths = [], []

    for i in range(3):
        source_image_dir = source_path+image_path_suffix[i]
        source_label_dir = source_path+label_path_suffix[i]
        target_image_dir = target_path+image_path_suffix[i]
        target_label_dir = target_path+label_path_suffix[i]

        # Create directories for the subset
        os.makedirs(target_image_dir, exist_ok=True)
        os.makedirs(target_label_dir, exist_ok=True)

        # Get a list of all file names in the specified directory
        file_names = os.listdir(source_image_dir)

        # Randomlly choose some image examples
        random.seed(88)
        chosen_file_names = random.choices(file_names, k=num_samples[i])

        for file_path in chosen_file_names:
            shutil.copy(source_image_dir+file_path, target_image_dir+file_path)
            shutil.copy(source_label_dir+file_path[:-3]+"txt", target_label_dir+file_path[:-3]+"txt")
            chosen_image_paths.append(target_image_dir+file_path)
            chosen_label_paths.append(target_label_dir+file_path[:-3]+"txt")

    print(chosen_image_paths)
    print(chosen_label_paths)

    return chosen_image_paths, chosen_label_paths

image_paths, label_paths = subsample_data(path+"/SKU110K_fixed/", "./01_datasets/", [40, 10, 10])

In [None]:
def image_preprocess(image_paths, target_size=(640, 640), pad_color=(0, 0, 0)):
    image_objects, scaling_parameters = [], []
    for image_path in image_paths:
        
        image = cv2.imread(image_path)

        # Compute the scaling factor to resize the image
        original_h, original_w = image.shape[:2]
        target_w, target_h = target_size
        scale = min(target_w/original_w, target_h/original_h)
        scale_w, scale_h = int(original_w*scale), int(original_h*scale)

        # Resize to target size
        image_resized = cv2.resize(image, (scale_w, scale_h), interpolation=cv2.INTER_AREA)

        # Create a new image with the target size and pad color
        padded_image = np.full((target_h, target_w, 3), pad_color, dtype=np.uint8)

        # Compute padding offsets
        pad_top = (target_h - scale_h) // 2
        pad_left = (target_w - scale_w) // 2

        # Place the resized image onto the padded image
        padded_image[pad_top:pad_top+scale_h, pad_left:pad_left+scale_w] = image_resized

        image_objects.append(padded_image)
        scaling_parameters.append((
            pad_left/target_w,
            pad_top/target_h,
            scale_w/target_w,
            scale_h/target_h
        ))

        # Make sure to create the directory if it doesn't exist
        processed_path = image_path.replace("01_datasets", "02_preprocess")
        print(processed_path)
        os.makedirs(os.path.dirname(processed_path), exist_ok=True)
        
        # Write the image to the file
        cv2.imwrite(processed_path, padded_image)

    return image_objects, scaling_parameters

def show_images(input_image):
    # Display the image using matplotlib
    image_rgb = cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB)
    plt.imshow(image_rgb)
    plt.axis('off')
    plt.show()

image_objects, scaling_parameters = image_preprocess(image_paths)

for i in range(3):
    show_images(image_objects[i])
    print(scaling_parameters[i])


In [None]:
def prepare_labels(label_paths, scaling_parameters):
    bbox_objects = []
    for label_path, scale in zip(label_paths, scaling_parameters):
        lines = []
        w_pad, h_pad, w_ratio, h_ratio = scale
        with open(label_path, 'r') as fp:
            for line in fp:
                # Split each line and skip the first element, which is class
                bbox = line.split()
                # Convert each value in bbox to float
                bbox = [
                    int(bbox[0]),
                    float(bbox[1])*w_ratio+w_pad,
                    float(bbox[2])*h_ratio+h_pad,
                    float(bbox[3])*w_ratio,
                    float(bbox[4])*h_ratio
                ]
                lines.append(bbox)
        # Make sure to create the directory if it doesn't exist
        processed_path = label_path.replace("01_datasets", "02_preprocess")
        print(processed_path)
        os.makedirs(os.path.dirname(processed_path), exist_ok=True)
        with open(processed_path, 'w') as fp:
            for line in lines:
                fp.write(" ".join(map(str, line))+"\n")
        
        bbox_objects.append(lines)
        print(len(bbox_objects[-1]))
        
    return bbox_objects

bbox_objects = prepare_labels(label_paths, scaling_parameters)

In [None]:
def draw_one_image_with_bboxes (image, bounding_boxes):
    print(f"Number of labels = {len(bounding_boxes)}")
    # Draw each bounding box on the image
    for bbox in bounding_boxes:
        _, x_center, y_center, width, height = bbox
        scale_y, scale_x = image.shape[:2]

        x_min = int((x_center - width / 2) * scale_x)
        y_min = int((y_center - height / 2) * scale_y)
        x_max = int((x_center + width / 2) * scale_x)
        y_max = int((y_center + height / 2) * scale_y)

        # Draw the rectangle (Bounding Box)
        start_point = (x_min, y_min)
        end_point = (x_max, y_max)
        # print(start_point, end_point)
        color = (0, 255, 0)  # Green color for the bounding box
        thickness = 2
        image = cv2.rectangle(image, start_point, end_point, color, thickness)

    # Convert the image from BGR to RGB (OpenCV loads in BGR by default)
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Display the image with bounding boxes using Matplotlib
    plt.imshow(image_rgb)
    plt.axis('off')  # Hide the axes
    plt.show()

for i in range(5):
    draw_one_image_with_bboxes (image_objects[i], bbox_objects[i])

In [41]:
def write_train_yaml(yaml_name, full_dataset=False):
    with open(yaml_name, 'w') as fp:
        if full_dataset:
            fp.write(f"path: {path}/SKU110K_fixed/\n")
        else:
            fp.write(f"path: ../02_preprocess/\n")
        fp.write(f"train: images/train\n")
        fp.write(f"val: images/val\n")
        fp.write(f"nc: 1\n")
        fp.write(f"names: ['object']")

write_train_yaml("train.yaml", full_dataset=False)

In [None]:
model = YOLO('weights/epoch14.pt')

train_results = model.train(
    data="train.yaml",  # path to dataset YAML
    epochs=1,  # number of training epochs
    imgsz=640,  # training image size
    device="cpu",  # device to run on, i.e. device=0 or device=0,1,2,3 or device=cpu
    save_period=1
)

In [None]:
# Evaluate model performance on the validation set
metrics = model.val()

In [44]:
# Save the trained model
model.save('latest.pt')

In [None]:
# Load trained model (optional)
model = YOLO('latest.pt')

# Predict your image and save to ./runs/prdict{n} folder
results = model.predict(
    source=image_paths[-1],      # Path to your input image
    conf=0.4,                    # Confidence threshold for predictions
    save=True,                   # Save the output image
    line_width=3,                # Line thickness for bounding boxes
    show_labels=True,            # Hide the class labels
    show_conf=True               # Hide the confidence scores
)

# Show result on the screen
results[0].show()