In [None]:
!pip install ultralytics

In [None]:
import os 
import shutil 
import random 
from PIL import Image
import numpy as np


def put_together_labels(video_label_dir, all_label_dir):

    for root, dirs, files in os.walk(video_label_dir):
        for file in files:
            if file.lower().endswith(('.txt')):
                src_file = os.path.join(root,file)
                dst_file = os.path.join(all_label_dir, file)
                shutil.copy(src_file, dst_file)

    print(f'All images have been copied to {all_label_dir}')




# Define source and destination directories
video_label_dir = '/kaggle/input/labels-dark-normalized/labels'
all_label_dir = '/kaggle/working/labels_t'

# Create the destination directory if it doesn't exist
if not os.path.exists(all_label_dir):
    os.makedirs(all_label_dir, exist_ok=True)
    
    
put_together_labels(video_label_dir, all_label_dir)

 

In [None]:
def select_labels_train(label_directory, label_train_directory):
    
    print(label_directory)
    print(label_train_directory)
    
    count_labelled_files = 0
    background_labels_filename = []
    for filename in os.listdir(label_directory):
        if os.path.getsize(os.path.join(label_directory, filename)) == 0:
            background_labels_filename.append(filename)
            continue
        else:
            label_path = os.path.join(label_directory, filename)
            label_train_path = os.path.join(label_train_directory, filename)
            
            shutil.copy(label_path, label_train_path)
            count_labelled_files +=1
            
            
    index = random.sample(range(0,len(background_labels_filename)),round(count_labelled_files*0.1))
    randomly_selected_files = [background_labels_filename[i] for i in index]
    for filename in randomly_selected_files:
        label_path = os.path.join(label_directory, filename)
        label_train_path = os.path.join(label_train_directory, filename)
        shutil.copy(label_path, label_train_path)

            
    
# Example usage
label_directory = "/kaggle/working/labels_t"  # Replace with your directory path
label_train_directory = "/kaggle/working/labels_train"  # Replace with your directory path
if not os.path.exists(label_train_directory):
    os.makedirs(label_train_directory)
select_labels_train(label_directory, label_train_directory)



In [None]:

def convert_float32_grayscale_to_png(image_data, filename):
  """
  Converts a floating-point 32-bit grayscale image to a PNG image.

  Args:
      image_data: A NumPy array representing the floating-point grayscale image.
      filename: Path to save the converted PNG image.
  """
  # Clip values to the valid range (0-1) to avoid potential scaling issues
  image_data = (image_data - np.min(image_data))/(np.max(image_data) - np.min(image_data))

  # Scale the data to the range 0-255 (expected by PNG format)
  image_data = (image_data * 255).astype(np.uint8)  # Cast to unsigned 8-bit integers

  # Convert the data to a PIL Image object
  image = Image.fromarray(image_data, mode='L')  # Specify grayscale mode

  # Save the image as PNG
  image.save(filename, format="PNG")
  
  

def select_images_train(label_train_directory,img_directory,img_train_directory):
    
    
    for filename in os.listdir(label_train_directory):
        filename = filename.split('.')[0] + '.tif'
        if filename.endswith(".tif"):
            t = os.listdir(img_directory)[0].split('_')[0] + '_' + filename.split('_')[0]
            # Get full path of the TIFF image
            tiff_path = os.path.join(img_directory,t, filename)
            # Construct output filename (optional)
            if img_train_directory:
                # Create output directory if it doesn't exist
                os.makedirs(img_train_directory, exist_ok=True)
                png_filename = os.path.join(img_train_directory, os.path.splitext(filename)[0] + ".png")
                
            else:
                png_filename = os.path.splitext(tiff_path)[0] + ".png"  # Replace TIFF extension with PNG
            try:
                with Image.open(tiff_path) as im:
                    convert_float32_grayscale_to_png(im, png_filename)
                #print(f"Converted {tiff_path} to {png_filename}")
            except OSError as e:
                print(f"Error: Could not open or save image files for {tiff_path}")
                print(e)
                        
# Example usage
label_train_directory = '/kaggle/working/labels_train'
img_directory = "/kaggle/input/segmented-images-new/archive"  # Replace with your directory path
img_train_directory = "/kaggle/working/images_train"  # Optional output directory (uncomment if needed)
if not os.path.exists(img_train_directory):
    os.makedirs(img_train_directory)

if not os.path.exists(img_directory):
    os.makedirs(img_directory)
# Convert images in the directory
select_images_train(label_train_directory, img_directory, img_train_directory=img_train_directory)  # Uncomment for separate output directory


### K-fold validation

In [None]:
from pathlib import Path

dataset_path = Path("/kaggle/working/")  # replace with 'path/to/dataset' for your custom data
labels = sorted(dataset_path.rglob("labels_train/*.txt"))  # all data in 'labels'

In [None]:
import pandas as pd

cls_idx = [0]
indx = [l.stem for l in labels]  # uses base filename as ID (no extension)
labels_df = pd.DataFrame([], columns=cls_idx, index=indx)

In [None]:
from collections import Counter

for label in labels:
    lbl_counter = Counter()

    with open(label, "r") as lf:
        lines = lf.readlines()

    for l in lines:
        # classes for YOLO label uses integer at first position of each line
        lbl_counter[int(l.split(" ")[0])] += 1

    labels_df.loc[label.stem] = lbl_counter

labels_df = labels_df.fillna(0.0)  # replace `nan` values with `0.0`

In [None]:
from sklearn.model_selection import KFold

ksplit = 5
kf = KFold(n_splits=ksplit, shuffle=True, random_state=20)  # setting random_state for repeatable results

kfolds = list(kf.split(labels_df))

In [None]:
folds = [f"split_{n}" for n in range(1, ksplit + 1)]
folds_df = pd.DataFrame(index=indx, columns=folds)

for idx, (train, val) in enumerate(kfolds, start=1):
    folds_df.loc[labels_df.iloc[train].index, f"split_{idx}"] = "train"
    folds_df.loc[labels_df.iloc[val].index, f"split_{idx}"] = "val"

In [None]:
fold_lbl_distrb = pd.DataFrame(index=folds, columns=cls_idx)

for n, (train_indices, val_indices) in enumerate(kfolds, start=1):
    train_totals = labels_df.iloc[train_indices].sum()
    val_totals = labels_df.iloc[val_indices].sum()

    # To avoid division by zero, we add a small value (1E-7) to the denominator
    ratio = val_totals / (train_totals + 1e-7)
    fold_lbl_distrb.loc[f"split_{n}"] = ratio

In [None]:
#images.extend(sorted((dataset_path / "images_train").rglob("*.png")))

In [None]:
import datetime
import yaml

supported_extensions = [".jpg", ".jpeg", ".png"]

# Initialize an empty list to store image file paths
images = []

# Loop through supported extensions and gather image files
for ext in supported_extensions:
    images.extend(sorted((dataset_path / "images_train").rglob(f"*{ext}")))

# Create the necessary directories and dataset YAML files (unchanged)
save_path = Path(dataset_path / f"{datetime.date.today().isoformat()}_{ksplit}-Fold_Cross-val")
save_path.mkdir(parents=True, exist_ok=True)
ds_yamls = []

for split in folds_df.columns:
    # Create directories
    split_dir = save_path / split
    split_dir.mkdir(parents=True, exist_ok=True)
    (split_dir / "train" / "images").mkdir(parents=True, exist_ok=True)
    (split_dir / "train" / "labels").mkdir(parents=True, exist_ok=True)
    (split_dir / "val" / "images").mkdir(parents=True, exist_ok=True)
    (split_dir / "val" / "labels").mkdir(parents=True, exist_ok=True)

    # Create dataset YAML files
    dataset_yaml = split_dir / f"{split}_dataset.yaml"
    ds_yamls.append(dataset_yaml)

    with open(dataset_yaml, "w") as ds_y:
        yaml.safe_dump(
            {
                "path": split_dir.as_posix(),
                "train": "train",
                "val": "val",
                'names': {
        '0': 'dark_islands'},
            },
            ds_y,
        )

In [None]:
import shutil

for image, label in zip(images, labels):
    for split, k_split in folds_df.loc[image.stem].items():
        # Destination directory
        img_to_path = save_path / split / k_split / "images"
        lbl_to_path = save_path / split / k_split / "labels"

        # Copy image and label files to new directory (SamefileError if file already exists)
        shutil.copy(image, img_to_path / image.name)
        shutil.copy(label, lbl_to_path / label.name)

In [None]:
#!rm -r /kaggle/working

In [None]:

# Function to count occurrences of objects '0' and '1' in a YOLO format file
def count_objects(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
        count_0 = 0
        count_1 = 0
        for line in lines:
            object_class = line.split()[0]
            if object_class == '0':
                count_0 += 1
            elif object_class == '1':
                count_1 += 1
    return count_0, count_1

# Get the current directory
current_dir = '/kaggle/working/labels_train'

# List all .txt files in the current directory
txt_files = [os.path.join(current_dir,file) for file in os.listdir(current_dir) if file.endswith('.txt')]

# Initialize counters
total_0 = 0
total_1 = 0

# Iterate over each .txt file and count occurrences of objects '0' and '1'
t = 0
img_count = 0
for txt_file in txt_files:
    count_0, count_1 = count_objects(txt_file)
    total_0 += count_0
    total_1 += count_1
    if count_0!= 0 or count_1!=0:
        img_count +=1
    t+=1

# Print the total counts
print("Total occurrences of object 0:", total_0)
print("Total occurrences of object 1:", total_1)
print('Total images', t)
print('Total images containing objects', img_count)

In [None]:
#!rm -r /kaggle/working/

In [None]:
import os
import shutil
import random

# Parameters
image_dir = '/kaggle/working/images_train/'
label_dir = '/kaggle/working/labels_train/'
train_ratio = 0.75
val_ratio = 0.25
#test_ratio = 0.1

# Create the necessary directories
for split in ['train', 'val']:
    os.makedirs(f'/kaggle/working/yolo/images/{split}', exist_ok=True)
    os.makedirs(f'/kaggle/working/yolo/labels/{split}', exist_ok=True)

# Get all image files
image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg') or f.endswith('.png')]

# Shuffle the files
random.shuffle(image_files)

# Split the files
train_split = int(train_ratio * len(image_files))

train_files = image_files[:train_split]
val_files = image_files[train_split:]

# Function to move files
def move_files(file_list, split):
    for file in file_list:
        # Move image file
        shutil.move(os.path.join(image_dir, file), os.path.join(f'/kaggle/working/yolo/images/{split}', file))
        # Move label file
        label_file = file.replace('.jpg', '.txt').replace('.png', '.txt')
        shutil.move(os.path.join(label_dir, label_file), os.path.join(f'/kaggle/working/yolo/labels/{split}', label_file))

# Move files to respective directories
move_files(train_files, 'train')
move_files(val_files, 'val')

print("Dataset split completed successfully.")


In [None]:
import yaml

def create_yolo_yaml(config):
    with open('yolo_config.yml', 'w') as file:
        yaml.dump(config, file)

# Define YOLO configuration
yolo_config = {
    'path': '/kaggle/working/yolo',
    'train': 'images/train',
    'val': 'images/val',
    'names': {
        '0': 'dark_islands',
        #'1': 'bright_islands'
    }

    
}

# Create YOLO YAML file
create_yolo_yaml(yolo_config)

In [None]:
#from ultralytics import YOLOv10
import wandb
from ultralytics import YOLO
#model = YOLOv10()
# If you want to finetune the model with pretrained weights, you could load the 
# pretrained weights like below
# model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')
# or
# wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10{n/s/m/b/l/x}.pt

#wandb.login(key='e02d0ae2cc6d4444e8dc4e7206a7eabe437cca93')
wandb.login(key='e02d0ae2cc6d4444e8dc4e7206a7eabe437cca93') # build a new model from scratch
#model.tune(data='/kaggle/working/yolo_config.yml', epochs=100, imgsz=224, lr0 = 0.01)


results = {}

# Define your additional arguments here
batch = 16
project = "kfold_demo"
epochs = 100

for k in range(ksplit):
    wandb.init(project = 'dark_islands_kfold_yolov10', name = f'yolov10m_{k}')
    # Load a model 
    model = YOLO("yolov10m.pt") # build a new model from scratch
    dataset_yaml = ds_yamls[k]
    model.train(data=dataset_yaml, epochs=100, imgsz=224, lr0 = 0.001)  # include any train arguments
    results[k] = model.metrics  # save output metrics for further analysis