In [3]:
import os
import random
import shutil

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# --- Paths ---
# Using 'r' before the string to handle Windows backslashes correctly
base_path = r"/content/drive/MyDrive/deeplearning/Assignment/augmented_data"
image_dir = os.path.join(base_path, 'images')
label_dir = os.path.join(base_path, 'labels')

# Destination path as requested
output_dir = os.path.join(base_path, 'output')
train_ratio = 0.7

# Define the 4 specific folders you requested
output_paths = {
    'train_images': os.path.join(output_dir, 'train_images'),
    'train_labels': os.path.join(output_dir, 'train_labels'),
    'valid_images': os.path.join(output_dir, 'valid_images'),
    'valid_labels': os.path.join(output_dir, 'valid_labels')
}

# Create the folders
for path in output_paths.values():
    os.makedirs(path, exist_ok=True)

# --- Processing ---
# Map filenames (without extension) to their full filenames
image_map = {os.path.splitext(f)[0]: f for f in os.listdir(image_dir) if not f.startswith('.')}
label_map = {os.path.splitext(f)[0]: f for f in os.listdir(label_dir) if not f.startswith('.')}

# Identify common files present in both folders
common_names = list(set(image_map.keys()) & set(label_map.keys()))
random.seed(42) # Keeps the split consistent if you run it multiple times
random.shuffle(common_names)

# Split logic
split_idx = int(len(common_names) * train_ratio)
train_names = common_names[:split_idx]
valid_names = common_names[split_idx:]

def distribute_files(names, img_dest, lbl_dest):
    for name in names:
        # Get original file names (with extensions)
        img_file = image_map[name]
        lbl_file = label_map[name]

        # Copy files to the new destination
        shutil.copy(os.path.join(image_dir, img_file), os.path.join(img_dest, img_file))
        shutil.copy(os.path.join(label_dir, lbl_file), os.path.join(lbl_dest, lbl_file))

# Execute the copy process
distribute_files(train_names, output_paths['train_images'], output_paths['train_labels'])
distribute_files(valid_names, output_paths['valid_images'], output_paths['valid_labels'])

print(f"Process Complete!")
print(f"Results saved to: {output_dir}")
print(f"Training pairs: {len(train_names)}")
print(f"Validation pairs: {len(valid_names)}")

Process Complete!
Results saved to: /content/drive/MyDrive/deeplearning/Assignment/augmented_data/output
Training pairs: 50
Validation pairs: 22


In [6]:
import os
import shutil
from tqdm import tqdm # Optional: shows a progress bar

# --- Define Paths ---
source_folder = '/content/drive/MyDrive/deeplearning/Assignment/Data'
destination_folder = '/content/drive/MyDrive/deeplearning/Assignment/augmented_data/output/test'

# Create the destination folder if it doesn't exist
os.makedirs(destination_folder, exist_ok=True)

# --- Copy Process ---
# List all files in the source directory
all_files = os.listdir(source_folder)

# Filter for only .jpg files (case-insensitive)
jpg_files = [f for f in all_files if f.lower().endswith('.jpg')]

print(f"Found {len(jpg_files)} .jpg files. Starting copy...")

for file_name in tqdm(jpg_files):
    source_path = os.path.join(source_folder, file_name)
    destination_path = os.path.join(destination_folder, file_name)

    # Copy the file
    shutil.copy(source_path, destination_path)

print(f"Successfully copied all .jpg files to: {destination_folder}")

Found 18 .jpg files. Starting copy...


100%|██████████| 18/18 [00:07<00:00,  2.44it/s]

Successfully copied all .jpg files to: /content/drive/MyDrive/deeplearning/Assignment/augmented_data/output/test



