In [None]:
#PANGEA LABELS TO YOLO FORMAT
import csv
import os

def convert_to_yolo(csv_file, output_dir):
    image_bboxes = {}
    with open(csv_file, 'r') as file:
        csv_reader = csv.DictReader(file)
        for row in csv_reader:
            image_name = row['image']
            x = float(row['bbox_x'])
            y = float(row['bbox_y'])
            w = float(row['bbox_w'])
            h = float(row['bbox_h'])
            img_width = 1920  
            img_height = 1080  
            
            x_center = (x + w / 2) / img_width
            y_center = (y + h / 2) / img_height
            width = w / img_width
            height = h / img_height
            
            yolo_line = f"0 {x_center:.6f} {y_center:.6f} {width:.6f} {height:.6f}"
            
            if image_name not in image_bboxes:
                image_bboxes[image_name] = []
            image_bboxes[image_name].append(yolo_line)
    
    for image_name, bboxes in image_bboxes.items():
        output_file = os.path.join(output_dir, os.path.splitext(image_name)[0] + '.txt')
        with open(output_file, 'w') as file:
            for bbox in bboxes:
                file.write(bbox + '\n')

# Usage
csv_file = 'RawDatasets/Pangea/luderick_seagrass_all.csv'  
output_dir = 'RawDatasets/Pangea/YoloLabels'  

os.makedirs(output_dir, exist_ok=True)
convert_to_yolo(csv_file, output_dir)

In [None]:
#CONVERT ALL FISH4KNOWLEDGE DATASET CLASSES TO 0
import os

def convert_labels_to_single_class(labels_dir):
    converted_count = 0
    
    for filename in os.listdir(labels_dir):
        if filename.endswith(".txt"):
            filepath = os.path.join(labels_dir, filename)
            
            with open(filepath, 'r') as file:
                lines = file.readlines()
            
            with open(filepath, 'w') as file:
                for line in lines:
                    parts = line.strip().split()
                    if len(parts) == 5:
                        file.write(f"0 {' '.join(parts[1:])}\n")
                        converted_count += 1
    
    print(f"Converted {converted_count} labels to class 0")

labels_dir = "RawDatasets/Fish4Knowledge Dataset/Labels_Fish_4_Knowledge"
convert_labels_to_single_class(labels_dir)

In [None]:
#FISH4KNOWLEDGE DATASET IMAGE IMPAINTING TO REMOVE TIMESTAMPS
import os
from PIL import Image

def replace_rows(image_path):
    with Image.open(image_path) as img:
        if img.mode != 'RGB':
            img = img.convert('RGB')
        
        width, height = img.size
        
        new_img = Image.new('RGB', (width, height))
        
        new_img.paste(img, (0, 0))
        
        for y in range(8):
            for x in range(width):
                pixel = img.getpixel((x, y + 8))
                new_img.putpixel((x, y), pixel)
        
        new_img.save(image_path)

def process_directory(directory_path):
    for filename in os.listdir(directory_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            image_path = os.path.join(directory_path, filename)
            replace_rows(image_path)
            print(f"Processed: {filename}")

# Usage
directory_path = 'RawDatasets/Fish4Knowledge Dataset/Images_Fish_4_Knowledge'
process_directory(directory_path)

In [None]:
#PANGEA DATASET IMAGE IMPAINTING TO REMOVE TIMESTAMPS
import os
from PIL import Image
import numpy as np

def process_image(image_path):
    with Image.open(image_path) as img:
        if img.mode != 'RGB':
            img = img.convert('RGB')
        width, height = img.size
        if width < 600 or height < 80:
            print(f"Skipping {image_path}: Image size is less than 600x80 pixels")
            return
        img_array = np.array(img)
        avg_color = np.mean(img_array[:20, :60], axis=(0, 1)).astype(int)
        replacement_area = np.full((80, 600, 3), avg_color, dtype=np.uint8)
        img_array[:80, :600] = replacement_area
        new_img = Image.fromarray(img_array)
        new_img.save(image_path)
        print(f"Processed: {image_path}")

def process_folder(folder_path):
    for filename in os.listdir(folder_path):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.gif')):
            image_path = os.path.join(folder_path, filename)
            process_image(image_path)

folder_path = 'RawDatasets/Pangea/luderick-seagrass'  
process_folder(folder_path)