In [1]:
import os
import shutil
import random

# Define the path to your dataset folder
dataset_folder = '../SAR-Ship-Dataset/ship_dataset_v0'
target_folder = "../data/ShipDataset"
images_folder = os.path.join(target_folder, 'images')
labels_folder = os.path.join(target_folder, 'labels')

# Create the subfolders if they don't exist
os.makedirs(images_folder, exist_ok=True)
os.makedirs(labels_folder, exist_ok=True)

In [2]:
# Create train and test subfolders inside images and labels folders
train_images_folder = os.path.join(images_folder, 'train')
test_images_folder = os.path.join(images_folder, 'test')
train_labels_folder = os.path.join(labels_folder, 'train')
test_labels_folder = os.path.join(labels_folder, 'test')

os.makedirs(train_images_folder, exist_ok=True)
os.makedirs(test_images_folder, exist_ok=True)
os.makedirs(train_labels_folder, exist_ok=True)
os.makedirs(test_labels_folder, exist_ok=True)

In [3]:
# List all files in the dataset folder
all_files = os.listdir(dataset_folder)
print(len(all_files))

79458


In [4]:
# Filter image files and their corresponding label files
image_files = [f for f in all_files if f.endswith('.jpg')]
label_files = [f.replace('.jpg', '.txt') for f in image_files]

# Ensure all labels have corresponding images
image_label_pairs = [(img, lbl) for img, lbl in zip(image_files, label_files) if os.path.exists(os.path.join(dataset_folder, lbl))]

# Split the image-label pairs into train and test sets
def split_pairs(pairs, train_ratio=0.8):
    """Splits a list of image-label pairs into training and testing sets based on the given ratio."""
    random.shuffle(pairs)
    split_idx = int(len(pairs) * train_ratio)
    return pairs[:split_idx], pairs[split_idx:]

train_pairs, test_pairs = split_pairs(image_label_pairs)


In [5]:
print(len(train_pairs))
print(len(test_pairs))

31783
7946


In [6]:
# Function to move files to their respective directories
def move_files(pairs, image_dest_folder, label_dest_folder):
    for img_file, lbl_file in pairs:
        img_source_path = os.path.join(dataset_folder, img_file)
        lbl_source_path = os.path.join(dataset_folder, lbl_file)
        img_dest_path = os.path.join(image_dest_folder, img_file)
        lbl_dest_path = os.path.join(label_dest_folder, lbl_file)

        try:
            shutil.move(img_source_path, img_dest_path)
            shutil.move(lbl_source_path, lbl_dest_path)
            print(f'Moved image and label: {img_file}, {lbl_file}')
        except Exception as e:
            print(f'Error moving files {img_file} and {lbl_file}: {e}')


In [7]:
# Move train files
move_files(train_pairs, train_images_folder, train_labels_folder)

Moved image and label: Sen_ship_hh_02016070501027017.jpg, Sen_ship_hh_02016070501027017.txt
Moved image and label: Gao_ship_hh_02018031467030205.jpg, Gao_ship_hh_02018031467030205.txt
Moved image and label: Sen_ship_hv_02016122601040046.jpg, Sen_ship_hv_02016122601040046.txt
Moved image and label: Sen_ship_hh_0201703030204024.jpg, Sen_ship_hh_0201703030204024.txt
Moved image and label: Gao_ship_vv_0201611300301018026.jpg, Gao_ship_vv_0201611300301018026.txt
Moved image and label: Gao_ship_hh_0201608254401030063.jpg, Gao_ship_hh_0201608254401030063.txt
Moved image and label: Gao_ship_vv_0201701156501021017.jpg, Gao_ship_vv_0201701156501021017.txt
Moved image and label: Gao_ship_hv_020170927780101010.jpg, Gao_ship_hv_020170927780101010.txt
Moved image and label: Sen_ship_hv_02018012601045038.jpg, Sen_ship_hv_02018012601045038.txt
Moved image and label: Sen_ship_hv_02016081001043020.jpg, Sen_ship_hv_02016081001043020.txt
Moved image and label: Gao_ship_vh_0201611300301014013.jpg, Gao_ship

In [8]:
# Move test files
move_files(test_pairs, test_images_folder, test_labels_folder)

Moved image and label: Gao_ship_hh_0201702157902039053.jpg, Gao_ship_hh_0201702157902039053.txt
Moved image and label: Gao_ship_hv_02017040596020101.jpg, Gao_ship_hv_02017040596020101.txt
Moved image and label: Gao_ship_vh_0201701156403027014.jpg, Gao_ship_vh_0201701156403027014.txt
Moved image and label: Sen_ship_hh_0201703200105802.jpg, Sen_ship_hh_0201703200105802.txt
Moved image and label: Gao_ship_hh_0201608254401011058.jpg, Gao_ship_hh_0201608254401011058.txt
Moved image and label: Gao_ship_hh_020170309680309013.jpg, Gao_ship_hh_020170309680309013.txt
Moved image and label: Sen_ship_vv_02018011301044017.jpg, Sen_ship_vv_02018011301044017.txt
Moved image and label: Gao_ship_hv_020171118980202008.jpg, Gao_ship_hv_020171118980202008.txt
Moved image and label: Sen_ship_hh_02017013101038047.jpg, Sen_ship_hh_02017013101038047.txt
Moved image and label: Gao_ship_hh_0201608254402028015.jpg, Gao_ship_hh_0201608254402028015.txt
Moved image and label: Gao_ship_hh_0201702275502012017.jpg, Ga

In [9]:
print('Files have been successfully moved to the respective train and test subfolders.')

Files have been successfully moved to the respective train and test subfolders.
