In [None]:
import pandas as pd
import numpy as np
import os
from PIL import Image

# Path to the CSV file and the image folder
csv_file = 'ship_dataset_train_v2.csv'
image_folder = 'ship_dataset_train_v2/'
output_folder = 'yolo_label_ship_train'

# Create output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Load CSV file
df = pd.read_csv(csv_file)

# Function to decode run-length encoding (RLE)
def rle_decode(mask_rle, shape=(768, 768)):
    s = mask_rle.split()
    starts, lengths = [np.asarray(x, dtype=int) for x in (s[0::2], s[1::2])]
    starts -= 1
    ends = starts + lengths
    img = np.zeros(shape[0] * shape[1], dtype=np.uint8)
    for lo, hi in zip(starts, ends):
        img[lo:hi] = 1
    return img.reshape(shape).T  # Needed to align to the image size

# Function to extract bounding boxes from a binary mask
def extract_bounding_boxes(mask):
    # Find the row and column indices where mask is non-zero
    rows = np.any(mask, axis=1)
    cols = np.any(mask, axis=0)

    if np.any(rows):
        ymin, ymax = np.where(rows)[0][[0, -1]]
        xmin, xmax = np.where(cols)[0][[0, -1]]
        return xmin, ymin, xmax, ymax
    else:
        return None  # No object found

# Function to normalize bounding boxes for YOLO format
def normalize_bbox(xmin, ymin, xmax, ymax, img_width, img_height):
    x_center = (xmin + xmax) / 2 / img_width
    y_center = (ymin + ymax) / 2 / img_height
    width = (xmax - xmin) / img_width
    height = (ymax - ymin) / img_height
    return x_center, y_center, width, height
count=0
# Process each image in the CSV file
for img_id, encoded_pixels in df.groupby('ImageId')['EncodedPixels']:
    count+=1
    if count%100==0:
        break
    img_path = os.path.join(image_folder, img_id)

    # Skip images without encoded pixels (NaN)
    if encoded_pixels.isnull().all():
        continue

    # Load the image to get its dimensions
    img = Image.open(img_path)
    img_width, img_height = img.size

    # Initialize an empty mask for the current image
    final_mask = np.zeros((img_height, img_width), dtype=np.uint8)

    # Decode each mask and add it to the final mask
    for rle in encoded_pixels.dropna():
        final_mask += rle_decode(rle, shape=(img_height, img_width))

    # Extract bounding boxes from the mask
    bbox = extract_bounding_boxes(final_mask)

    if bbox is not None:
        xmin, ymin, xmax, ymax = bbox
        # Normalize the bounding box for YOLO format
        x_center, y_center, width, height = normalize_bbox(xmin, ymin, xmax, ymax, img_width, img_height)

        # Create a corresponding .txt file for the YOLO format
        yolo_txt_file = os.path.join(output_folder, f"{os.path.splitext(img_id)[0]}.txt")
        with open(yolo_txt_file, 'w') as f:
            # YOLO format: <class_id> <x_center> <y_center> <width> <height>
            # Assuming class_id for "ship" is 0 (change if needed)
            f.write(f"0 {x_center} {y_center} {width} {height}\n")

print("YOLO .txt files generated successfully!")


['mmmm']
