In [1]:
import os
import cv2
import numpy as np 
import random
import math
import re
import shutil
import torch
import torchvision.transforms as T
import torchvision.transforms.functional as F

from torchvision import tv_tensors
from torchvision.transforms import v2, PILToTensor
from torchvision.transforms.functional import pil_to_tensor
from torchvision.io import read_image, write_jpeg
from torchvision.transforms.v2 import functional as F
from torchvision import tv_tensors

from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor

from PIL import Image


In [2]:
repo_dir = os.getcwd().split('dslab25')[0] + 'dslab25/'
root_dir = repo_dir + "training/vacuum_pump"
# root_dir = repo_dir + "training/qwen"
original_images = os.path.join(root_dir, "images/original")
original_annotations = os.path.join(root_dir, "annotation/original")
base_dir_images = os.path.join(root_dir, "images/augmented")
base_dir_annotations = os.path.join(root_dir, "annotation/augmented")
out_base_images = os.path.join(root_dir, "images/augmented")
out_base_annotations = os.path.join(root_dir, "annotation/augmented")


In [3]:
N_STAGES = 8


# Copy orignals to augmented folder

In [None]:
shutil.copytree(original_images, base_dir_images)
shutil.copytree(original_annotations, base_dir_annotations)

# Delete screw permutations (optional)

In [14]:
# def delete_non_hello_files(folder_path, perm_number_to_keep):
# 	"""
# 	Deletes all files in the specified folder that do not contain 'hello' in their name.
	
# 	Args:
# 		folder_path (str): Path to the target folder.
# 	"""
# 	if not os.path.isdir(folder_path):
# 		print(f"The path {folder_path} is not a valid directory.")
# 		return

# 	for filename in os.listdir(folder_path):
# 		file_path = os.path.join(folder_path, filename)
# 		if os.path.isfile(file_path) and perm_number_to_keep not in filename:
# 			try:
# 				os.remove(file_path)
# 				print(f"Deleted: {filename}")
# 			except Exception as e:
# 				print(f"Could not delete {filename}: {e}")
# delete_non_hello_files(os.path.join(base_dir_images, "stage_5"), "perm_7")
# delete_non_hello_files(os.path.join(base_dir_images, "stage_7"), "perm_31")

# Rotate (images takes up to 2 mins)

In [None]:
def rotate_image(image_path, out_image_path, out_annotation_path, angle):
	try:
		# Try using torchvision's read_image first
		try:
			image = read_image(image_path)
		except:
			# Fallback to PIL
			pil_image = Image.open(image_path).convert("RGB")
			image = PILToTensor()(pil_image)
		
		# Create rotation transformation with expand=True to avoid clipping
		transform = v2.Compose([
			v2.RandomRotation(degrees=(angle, angle), expand=True)
			
		])
		
		# Apply rotation to image
		rotated_image = transform(image)
		
		# Save rotated image
		write_jpeg(rotated_image, out_image_path, quality=95)
		
		# Create empty annotation file
		with open(out_annotation_path, 'w') as f:
			pass
			
		return True
	
	except Exception as e:
		print(f"Error processing {image_path}: {str(e)}")
		return False

img_counter = 0
stages = [f"stage_{i}" for i in range(N_STAGES)]
for stage in stages:
	image_folder = os.path.join(base_dir_images, stage)
	in_image_folder = os.path.join(base_dir_images, stage)
	in_annotation_folder = os.path.join(base_dir_annotations, stage)
	out_image_folder = os.path.join(out_base_images, stage)
	out_annotation_folder = os.path.join(out_base_annotations, stage)
		
	# Create output directories if they don't exist
	os.makedirs(out_image_folder, exist_ok=True)
	os.makedirs(out_annotation_folder, exist_ok=True)

	# List only original images (skip already augmented ones)
	image_files = [f for f in os.listdir(image_folder) if f.lower().endswith((".jpg", ".jpeg", ".png")) and "_rot" not in f]

	def process_rotation(task):
		filename, angle = task
		image_path = os.path.join(image_folder, filename)
		base_filename = os.path.splitext(filename)[0]
		annotation_filename = base_filename + ".txt"
		annotation_path = os.path.join(in_annotation_folder, annotation_filename)

		out_image_filename = f"{base_filename}_rot{angle}.jpg"
		out_annotation_filename = f"{base_filename}_rot{angle}.txt"
		out_image_path = os.path.join(out_image_folder, out_image_filename)
		out_annotation_path = os.path.join(out_annotation_folder, out_annotation_filename)

		success = rotate_image(image_path, out_image_path, out_annotation_path, angle)

		# Copy annotation (comment this block if not using qwen)
		if os.path.exists(annotation_path):
			with open(annotation_path, 'r') as f:
				content = f.read()
			with open(out_annotation_path, 'w') as f:
				f.write(content)

		return (out_image_path, angle, success)

	# Prepare tasks: (filename, angle)
	tasks = [(filename, angle) for filename in image_files for angle in range(20, 361, 20)]

	# Parallel execution
	with ThreadPoolExecutor(max_workers=16) as executor:
		for idx, result in enumerate(executor.map(process_rotation, tasks)):
			out_image_path, angle, success = result
			if success:
				if idx % 1000 == 0:
					print(f"Processed {out_image_path} with rotation {angle}°")


# Rotate (labels correctly)

For this you need to label all 18 (360/20) rotations of render image 1, 2, 4

In [None]:
# Define the directories
BASE_LABELS_DIR = os.path.join(os.getcwd(), 'stage_0/labels/')

# Regular expressions for base and annotation files
base_pattern = re.compile(r'^stage_0_case_render_([124])_rot(\d+)\.txt$')
# For base we expect files: render_1, render_2, render_4.
# For the annotation files, we ignore any _perm_x part:
annot_pattern = re.compile(r'^stage_(\d+)(?:_perm_\d+)?(?:_var_\d+)?_case_render_(\d+)_rot(\d+)\.txt$')

# Data structure to hold rotation-specific base info
# key: rotation (as string), value: dict with base, col_shift, row_shift, w, h
rotation_data = {}

# First, process the base folder and group by rotation
# We need to read render_1, render_2, and render_4 for each rotation
for fname in os.listdir(BASE_LABELS_DIR):
	match = base_pattern.match(fname)
	if not match:
		continue
	render_number, rot = match.groups()
	path = os.path.join(BASE_LABELS_DIR, fname)
	with open(path, 'r') as f:
		# Assume each file has one line like "0 x y w h"
		parts = f.read().strip().split()
		# Convert numeric values (skip the class since we'll use our own later)
		# Order: class, x, y, w, h
		try:
			_, x, y, w, h = parts
			x, y, w, h = float(x), float(y), float(w), float(h)
		except Exception as e:
			print(f"Error processing {fname}: {e}")
			continue

	if rot not in rotation_data:
		rotation_data[rot] = {}
	rotation_data[rot][f'render_{render_number}'] = (x, y, w, h)

# Now, compute for each rotation the base values, col_shift and row_shift
for rot, data in rotation_data.items():
	try:
		base_x, base_y, w, h = data['render_1']
		col_x, col_y, _, _ = data['render_2']
		row_x, row_y, _, _ = data['render_4']
	except KeyError:
		print(f"Missing base files for rotation {rot}. Skipping.")
		continue

	# Calculate shifts
	col_shift = (col_x - base_x, col_y - base_y)
	row_shift = (row_x - base_x, row_y - base_y)
	# Save computed values back
	rotation_data[rot] = {
		'base': (base_x, base_y),
		'col_shift': col_shift,
		'row_shift': row_shift,
		'w': w,
		'h': h
	}

# Function to compute new coordinates given render number and rotation data
def compute_new_coords(render_num, rot_info):
	# Render number is expected as integer in 1..9, mapping to a 3x3 grid.
	# grid_x: how many times to add col_shift, grid_y: how many times to add row_shift.
	render_num = int(render_num)
	if render_num > 9:
		render_num -= 9 * (render_num // 9) + (render_num // 9 - 1)
	grid_x = (render_num - 1) % 3
	grid_y = (render_num - 1) // 3
	base_x, base_y = rot_info['base']
	col_shift_x, col_shift_y = rot_info['col_shift']
	row_shift_x, row_shift_y = rot_info['row_shift']

	new_x = base_x + grid_x * col_shift_x + grid_y * row_shift_x
	new_y = base_y + grid_x * col_shift_y + grid_y * row_shift_y
	return new_x, new_y

# Now, process each annotation file in the annotations folder
img_counter = 0

for stage_folder in os.listdir(base_dir_annotations):
	stage_path = os.path.join(base_dir_annotations, stage_folder)
	if not os.path.isdir(stage_path):
		continue
	# Expect folder name like stage_0, stage_1, etc.
	stage_match = re.match(r'stage_(\d+)', stage_folder)
	if not stage_match:
		continue
	class_id = stage_match.group(1)
	print("hm")
	# Process each file in the stage folder
	for fname in os.listdir(stage_path):
		annot_match = annot_pattern.match(fname)
		print(fname)
		if not annot_match:
			continue
		file_class, render_str, rot = annot_match.groups()
		# We ignore file_class (it might be redundant with the folder) and any perm parts
		if rot not in rotation_data:
			print(f"Rotation {rot} not found in base data for file {fname}. Skipping.")
			continue
		rot_info = rotation_data[rot]
		# Compute new x and y using the grid derived from render number
		new_x, new_y = compute_new_coords(render_str, rot_info)
		w = rot_info['w']
		h = rot_info['h']

		# Create new annotation line (class from folder, then new_x, new_y, w, h)
		new_line = f"{class_id} {new_x} {new_y} {w} {h}\n"
		# Overwrite the file
		out_path = os.path.join(stage_path, fname)
		with open(out_path, 'w') as f:
			f.write(new_line)
		if img_counter % 1000 == 0:
			print(f"Updated {out_path} with: {new_line.strip()}")
		img_counter += 1

# SAM (2.1)

In [None]:
from sam2.sam2_image_predictor import SAM2ImagePredictor

device = "cuda" if torch.cuda.is_available() else "cpu"

# Load the SAM model and processor.
sam_model = "facebook/sam2.1-hiera-large"
predictor = SAM2ImagePredictor.from_pretrained(sam_model)
def pca_color_feature(feature_map, n_components=3):
	"""
	Apply PCA to a feature map and reduce it to n_components (3 for RGB).

	Parameters:
		feature_map: a torch.Tensor of shape (channels, height, width)
		n_components: the number of PCA components (default is 3)

	Returns:
		A normalized image tensor of shape (height, width, n_components) with values in [0, 1].
	"""
	# Rearrange feature_map to shape (height, width, channels)
	fm = feature_map.permute(1, 2, 0)   # (H, W, C)
	H, W, C = fm.shape

	# Flatten the spatial dimensions: shape -> (H*W, C)
	fm_flat = fm.reshape(-1, C)

	# Compute PCA using torch.pca_lowrank, reducing to n_components.
	# This returns U, S, V such that fm_flat ≈ U @ diag(S) @ V.T
	U, S, V = torch.pca_lowrank(fm_flat, q=n_components)

	# Project the flattened feature vectors onto the PCA space using V.
	# V has shape (C, n_components), so use:
	reduced = torch.matmul(fm_flat, V)  # shape: (H*W, n_components)

	# Reshape the reduced features back to (H, W, n_components)
	reduced = reduced.reshape(H, W, n_components)

	# Normalize each PCA channel to [0, 1] for display.
	flat_reduced = reduced.reshape(-1, n_components)
	min_vals = flat_reduced.min(dim=0)[0]
	max_vals = flat_reduced.max(dim=0)[0]
	normalized = (reduced - min_vals) / (max_vals - min_vals + 1e-5)

	return normalized

def display_edges_with_pca(image_path, output_file=None):
	"""
	Load an image from disk, extract a feature map via SAM's image encoder,
	and use PCA to reduce the features to 3 channels (RGB). The result
	highlights edges and high-variance features.

	Parameters:
	image_path (str): Path to a jpg or png image.
	output_file (str, optional): Path where the output image will be saved.
															If None, the image is displayed.
	"""

	# Load the image and convert it to RGB.
	raw_image = Image.open(image_path).convert("RGB")
	predictor.set_image(raw_image)

	with torch.no_grad():
		# 1 x C x H x W  → take first batch dim
		feature_map = predictor.get_image_embedding()[0]	 # NEW

	pca_result = pca_color_feature(feature_map)

	# Either save the image or show it.
	if output_file is not None:
		np_img = pca_result.cpu().detach().numpy()
		np_img = (np_img * 255).astype(np.uint8)

		# Resize PCA result to match original image resolution
		original_size = raw_image.size  # (width, height)
		np_img_resized = cv2.resize(np_img, original_size, interpolation=cv2.INTER_CUBIC)

		# Convert RGB -> BGR and save with OpenCV
		np_img_bgr = cv2.cvtColor(np_img_resized, cv2.COLOR_RGB2BGR)
		cv2.imwrite(output_file, np_img_bgr)
	else:
		# Create a figure and display the PCA-reduced feature map.
		plt.figure(figsize=(10, 10))
		plt.imshow(pca_result.cpu().detach().numpy())
		plt.title("PCA Reduced Feature Map (Edges Emphasized)")
		plt.axis("off")

		plt.show()

# Example usage:
# To save the output image:
# display_edges_with_pca("path/to/your/image.jpg", output_file="output_pca_image.png")
# To simply display the image:
# display_edges_with_pca("path/to/your/image.jpg")


	
# Process images in each stage.
img_counter = 0
stages = [f"stage_{i}" for i in range(N_STAGES)]
for stage in stages:
	in_image_folder = os.path.join(base_dir_images, stage)
	in_annotation_folder = os.path.join(base_dir_annotations, stage)
	out_image_folder = os.path.join(out_base_images, stage)
	out_annotation_folder = os.path.join(out_base_annotations, stage)
	
	os.makedirs(out_image_folder, exist_ok=True)
	os.makedirs(out_annotation_folder, exist_ok=True)
	
	# Select image files (filtering out already augmented ones that include "_translate").
	image_files = [f for f in os.listdir(in_image_folder)
				   if f.lower().endswith(".jpg") and "_sam" not in f]
	
	for filename in image_files:
		image_path = os.path.join(in_image_folder, filename)
		base_filename = os.path.splitext(filename)[0]
		annotation_filename = base_filename + ".txt"
		annotation_path = os.path.join(in_annotation_folder, annotation_filename)
		
		out_image_filename = f"{base_filename}_sam.jpg"
		out_annotation_filename = f"{base_filename}_sam.txt"
		out_image_path = os.path.join(out_image_folder, out_image_filename)
		out_annotation_path = os.path.join(out_annotation_folder, out_annotation_filename)
		
		if not os.path.exists(out_image_path):
			# Apply the random translation augmentation.
			display_edges_with_pca(image_path, out_image_path)

			# Copy annotation unchanged.
			if os.path.exists(annotation_path):
				with open(annotation_path, 'r') as f:
					content = f.read()
				out_annotation_path = os.path.join(out_annotation_folder, f"{base_filename}_sam.txt")
				with open(out_annotation_path, 'w') as f:
					f.write(content)
			if img_counter % 1000 == 0:
				print(f"Processed sam augmentation: {out_image_path}")
			img_counter += 1

# Example usage:
# Replace the path below with the location of your local image.
# display_edges_with_pca("path/to/your/image.jpg")

# After you are done using predictor:
del predictor  # delete the object
if device == "cuda":
	torch.cuda.empty_cache()
	torch.cuda.ipc_collect()

# Tint patches

In [10]:
def tint_image_with_patches(image_path, annotation_path, out_image_path, out_annotation_path, patch_size=64, alpha=0.3):
	img = cv2.imread(image_path)
	if img is None:
		print("Failed to read image:", image_path)
		return
	
	h, w, _ = img.shape
	tinted_img = img.copy()

	# Iterate over image in 64x64 patches
	for y in range(0, h, patch_size):
		for x in range(0, w, patch_size):
			# Create random color
			color = np.array([random.randint(0, 255) for _ in range(3)], dtype=np.uint8)
			# Create a solid patch with the color
			color_patch = np.full((patch_size, patch_size, 3), color, dtype=np.uint8)

			# Define the region to blend
			end_y = min(y + patch_size, h)
			end_x = min(x + patch_size, w)

			# Blend original image and color patch
			region = tinted_img[y:end_y, x:end_x]
			blended = cv2.addWeighted(region, 1 - alpha, color_patch[:end_y - y, :end_x - x], alpha, 0)
			tinted_img[y:end_y, x:end_x] = blended

	cv2.imwrite(out_image_path, tinted_img)

	# Copy the annotation unchanged
	if os.path.exists(annotation_path):
		with open(annotation_path, 'r') as f:
			content = f.read()
		with open(out_annotation_path, 'w') as f:
			f.write(content)

tag = "colorpatch"
img_counter = 0
stages = [f"stage_{i}" for i in range(N_STAGES)]

for stage in stages:
	in_image_folder = os.path.join(base_dir_images, stage)
	in_annotation_folder = os.path.join(base_dir_annotations, stage)
	out_image_folder = os.path.join(out_base_images, stage)
	out_annotation_folder = os.path.join(out_base_annotations, stage)
	os.makedirs(out_image_folder, exist_ok=True)
	os.makedirs(out_annotation_folder, exist_ok=True)
	
	# Cache the list of original image files (skip those already augmented)
	image_files = [f for f in os.listdir(in_image_folder)
						if f.lower().endswith(".jpg") and "_colorpatch" not in f]
	
	def process_image(filename):
		image_path = os.path.join(in_image_folder, filename)
		base_filename = os.path.splitext(filename)[0]
		annotation_filename = base_filename + ".txt"
		annotation_path = os.path.join(in_annotation_folder, annotation_filename)

		out_image_filename = f"{base_filename}_{tag}.jpg"
		out_annotation_filename = f"{base_filename}_{tag}.txt"
		out_image_path = os.path.join(out_image_folder, out_image_filename)
		out_annotation_path = os.path.join(out_annotation_folder, out_annotation_filename)

		if not os.path.exists(out_image_path):
			tint_image_with_patches(image_path, annotation_path, out_image_path, out_annotation_path)

	with ProcessPoolExecutor(max_workers=8) as executor:
		executor.map(process_image, image_files)


# Brightness

In [11]:
def adjust_brightness_and_copy_annotation(image_path, annotation_path, out_image_path, out_annotation_path, factor):
	img = cv2.imread(image_path)
	if img is None:
		print("Failed to read image:", image_path)
		return
	# Adjust brightness/darkness using cv2.convertScaleAbs.
	adjusted_img = cv2.convertScaleAbs(img, alpha=factor, beta=0)
	cv2.imwrite(out_image_path, adjusted_img)
	
	# Copy the annotation unchanged.
	if os.path.exists(annotation_path):
		with open(annotation_path, 'r') as f:
			content = f.read()
		with open(out_annotation_path, 'w') as f:
			f.write(content)

img_counter = 0
stages = [f"stage_{i}" for i in range(N_STAGES)]
# Define brightness augmentation factors.
brightness_aug = {
	"bright125": 1.5,
	"dark075": 0.5,
}

for stage in stages:
	in_image_folder = os.path.join(base_dir_images, stage)
	in_annotation_folder = os.path.join(base_dir_annotations, stage)
	out_image_folder = os.path.join(out_base_images, stage)
	out_annotation_folder = os.path.join(out_base_annotations, stage)
	os.makedirs(out_image_folder, exist_ok=True)
	os.makedirs(out_annotation_folder, exist_ok=True)
	
	# Cache the list of original image files (skip those already augmented)
	image_files = [f for f in os.listdir(in_image_folder)
						if f.lower().endswith(".jpg") and "_bright" not in f and "_dark" not in f]
	
	def process_brightness_aug(task):
		filename, tag, factor = task
		image_path = os.path.join(in_image_folder, filename)
		base_filename = os.path.splitext(filename)[0]
		annotation_filename = base_filename + ".txt"
		annotation_path = os.path.join(in_annotation_folder, annotation_filename)

		out_image_filename = f"{base_filename}_{tag}.jpg"
		out_annotation_filename = f"{base_filename}_{tag}.txt"
		out_image_path = os.path.join(out_image_folder, out_image_filename)
		out_annotation_path = os.path.join(out_annotation_folder, out_annotation_filename)

		if not os.path.exists(out_image_path):
			adjust_brightness_and_copy_annotation(image_path, annotation_path, out_image_path, out_annotation_path, factor)

	# Prepare all (filename, tag, factor) tasks
	tasks = [(filename, tag, factor) for filename in image_files for tag, factor in brightness_aug.items()]

	# Parallel execution
	with ThreadPoolExecutor(max_workers=16) as executor:
		executor.map(process_brightness_aug, tasks)

# Obscure

For each image in the base directories, read the corresponding annotation file,
apply the obscure augmentation by drawing a random, smaller rectangle within each bounding box,
and then save the resulting image and a copy of the annotation file in the output directories.

In [None]:
def obscure_object_in_image(image, annotation_path):
	"""
	For each bounding box (in YOLO format) in the given annotation file,
	draw a randomly sized and rotated rectangle inside the bounding box to obscure it.
	"""
	H, W = image.shape[:2]
	if not os.path.exists(annotation_path):
		return image

	with open(annotation_path, 'r') as f:
		lines = f.readlines()

	for line in lines:
		parts = line.strip().split()
		if len(parts) != 5:
			continue
		cls, cx, cy, bw, bh = parts
		cx, cy, bw, bh = float(cx), float(cy), float(bw), float(bh)
		
		# Calculate bounding box pixel coordinates.
		box_w = bw * W
		box_h = bh * H
		box_x1 = cx * W - box_w / 2
		box_y1 = cy * H - box_h / 2
		box_x2 = box_x1 + box_w
		box_y2 = box_y1 + box_h

		# Determine random scale factors for the inner (obscuring) rectangle.
		scale_w = random.uniform(0.1, 0.4)
		scale_h = random.uniform(0.1, 0.4)
		rect_w = box_w * scale_w
		rect_h = box_h * scale_h

		# Choose a random center for the inner rectangle ensuring it fits within the box.
		min_cx = box_x1 + rect_w / 2
		max_cx = box_x2 - rect_w / 2
		min_cy = box_y1 + rect_h / 2
		max_cy = box_y2 - rect_h / 2
		if max_cx < min_cx or max_cy < min_cy:
			rect_center_x = (box_x1 + box_x2) / 2
			rect_center_y = (box_y1 + box_y2) / 2
		else:
			rect_center_x = random.uniform(min_cx, max_cx)
			rect_center_y = random.uniform(min_cy, max_cy)

		# Determine a random rotation angle.
		angle = random.uniform(0, 360)

		# Create the rotated rectangle and get its corner points.
		rect = ((rect_center_x, rect_center_y), (rect_w, rect_h), angle)
		box_points = cv2.boxPoints(rect)
		box_points = box_points.astype(np.int32)

		# Draw a filled rectangle (using black color to obscure).
		cv2.fillPoly(image, [box_points], (0, 0, 0))
		
	return image


img_counter = 0
stages = [f"stage_{i}" for i in range(8)]

for stage in stages:
	in_image_folder = os.path.join(base_dir_images, stage)
	in_annotation_folder = os.path.join(base_dir_annotations, stage)
	out_image_folder = os.path.join(out_base_images, stage)
	out_annotation_folder = os.path.join(out_base_annotations, stage)
	os.makedirs(out_image_folder, exist_ok=True)
	os.makedirs(out_annotation_folder, exist_ok=True)
	
	# Cache the list of original image files (skip those already augmented).
	image_files = [f for f in os.listdir(in_image_folder)
						if f.lower().endswith(".jpg") and "_obscure" not in f]
	
	def process_obscure(filename):
		image_path = os.path.join(in_image_folder, filename)
		base_filename = os.path.splitext(filename)[0]
		annotation_filename = base_filename + ".txt"
		annotation_path = os.path.join(in_annotation_folder, annotation_filename)
		out_image_filename = f"{base_filename}_obscure.jpg"
		out_image_path = os.path.join(out_image_folder, out_image_filename)

		if not os.path.exists(out_image_path):
			img = cv2.imread(image_path)
			if img is None:
				print("Failed to read image:", image_path)
				return  # Skip if image reading failed

			# Obscure objects in the image using their bounding boxes
			obscured_img = obscure_object_in_image(img, annotation_path)
			cv2.imwrite(out_image_path, obscured_img)

			# Copy annotation unchanged
			if os.path.exists(annotation_path):
				out_annotation_path = os.path.join(out_annotation_folder, f"{base_filename}_obscure.txt")
				with open(annotation_path, 'r') as f_in, open(out_annotation_path, 'w') as f_out:
					f_out.write(f_in.read())

			return out_image_path  # For optional progress tracking

	# Prepare list of filenames to process
	tasks = [f for f in image_files if not os.path.exists(os.path.join(out_image_folder, os.path.splitext(f)[0] + "_obscure.jpg"))]

	# Parallel execution
	with ThreadPoolExecutor(max_workers=16) as executor:
		for idx, result in enumerate(executor.map(process_obscure, tasks)):
			if result is not None and idx % 1000 == 0:
				print(f"Processed obscure augmentation: {result}")

# Scale

In [None]:
def random_scale_and_crop_annotation(image_path, annotation_path, out_image_path, out_annotation_path, scale_range=(1.0, 2.0)):
	"""
	Scales the image by a random factor and then crops the center region to 
	restore the original image size. Adjusts YOLO annotations accordingly.
	"""
	# Read the original image.
	img = cv2.imread(image_path)
	if img is None:
		print("Failed to read image:", image_path)
		return
	
	original_h, original_w = img.shape[:2]

	# Randomly select a scale factor.
	scale_factor = random.uniform(scale_range[0], scale_range[1])
	
	# Compute new dimensions for the scaled image.
	new_w = int(original_w * scale_factor)
	new_h = int(original_h * scale_factor)
	
	# Resize (scale) the image.
	scaled_img = cv2.resize(img, (new_w, new_h), interpolation=cv2.INTER_LINEAR)
	
	# Ensure the scaled image is large enough for the crop.
	if new_w < original_w or new_h < original_h:
		print("Scaled image is smaller than original; adjust scale_range or add padding.")
		return

	# Compute crop offsets to extract the center region.
	x_offset = (new_w - original_w) // 2
	y_offset = (new_h - original_h) // 2
	
	# Crop the center region.
	cropped_img = scaled_img[y_offset:y_offset + original_h, x_offset:x_offset + original_w]
	
	# Write the augmented image.
	cv2.imwrite(out_image_path, cropped_img)
	
	# Read and update the annotation file.
	# Assumption: annotations are in YOLO normalized format.
	adjusted_lines = []
	if os.path.exists(annotation_path):
		with open(annotation_path, 'r') as file:
			for line in file:
				parts = line.strip().split()
				if len(parts) == 5:
					class_id, cx, cy, bw, bh = parts
					cx = float(cx)
					cy = float(cy)
					bw = float(bw)
					bh = float(bh)
					
					# Convert normalized coordinates to absolute pixel coordinates in the original image.
					abs_cx = cx * original_w
					abs_cy = cy * original_h
					abs_bw = bw * original_w
					abs_bh = bh * original_h
					
					# Apply scaling.
					scaled_cx = abs_cx * scale_factor
					scaled_cy = abs_cy * scale_factor
					scaled_bw = abs_bw * scale_factor
					scaled_bh = abs_bh * scale_factor
					
					# Adjust for cropping by subtracting the offsets.
					cropped_cx = scaled_cx - x_offset
					cropped_cy = scaled_cy - y_offset
					
					# (Optional) If a bounding box falls partially outside,
					# you can clip the box here. For simplicity, we assume
					# the boxes are fully contained.
					
					# Convert back to normalized coordinates with respect to the output image dimensions.
					new_cx = cropped_cx / original_w
					new_cy = cropped_cy / original_h
					new_bw = scaled_bw / original_w
					new_bh = scaled_bh / original_h
					
					# Write the new annotation line.
					adjusted_line = f"{class_id} {new_cx:.6f} {new_cy:.6f} {new_bw:.6f} {new_bh:.6f}\n"
					adjusted_lines.append(adjusted_line)
				else:
					# For lines that do not match the expected format, copy them as is.
					adjusted_lines.append(line)
		# Save the adjusted annotations.
		with open(out_annotation_path, 'w') as file:
			file.writelines(adjusted_lines)
	

stages = [f"stage_{i}" for i in range(N_STAGES)]
img_counter = 0

for stage in stages:
	in_image_folder = os.path.join(base_dir_images, stage)
	in_annotation_folder = os.path.join(base_dir_annotations, stage)
	out_image_folder = os.path.join(out_base_images, stage)
	out_annotation_folder = os.path.join(out_base_annotations, stage)
	
	os.makedirs(out_image_folder, exist_ok=True)
	os.makedirs(out_annotation_folder, exist_ok=True)
	
	image_files = [f for f in os.listdir(in_image_folder)
				   if f.lower().endswith(".jpg") and "_scale" not in f]
	
	def process_scale(filename):
		image_path = os.path.join(in_image_folder, filename)
		base_filename = os.path.splitext(filename)[0]
		annotation_filename = base_filename + ".txt"
		annotation_path = os.path.join(in_annotation_folder, annotation_filename)

		out_image_filename = f"{base_filename}_scale.jpg"
		out_annotation_filename = f"{base_filename}_scale.txt"
		out_image_path = os.path.join(out_image_folder, out_image_filename)
		out_annotation_path = os.path.join(out_annotation_folder, out_annotation_filename)

		if not os.path.exists(out_image_path):
			random_scale_and_crop_annotation(image_path, annotation_path, out_image_path, out_annotation_path)
			return out_image_path  # For optional progress tracking

	# Prepare only tasks where output doesn't exist yet
	tasks = [f for f in image_files if not os.path.exists(os.path.join(out_image_folder, os.path.splitext(f)[0] + "_scale.jpg"))]

	# Parallel execution
	with ThreadPoolExecutor(max_workers=16) as executor:
		for idx, result in enumerate(executor.map(process_scale, tasks)):
			if result is not None and idx % 1000 == 0:
				print(f"Processed scale augmentation: {result}")


# Translate

In [None]:
def random_translate_and_update_annotation(image_path, annotation_path, out_image_path, out_annotation_path):
	"""
	Applies a random translation to the image and updates the annotation coordinates accordingly.
	
	The translation is random:
	  - x direction: uniformly chosen from -1/4 to +1/4 of the image width.
	  - y direction: uniformly chosen from -1/4 to +1/4 of the image height.
	
	The annotation is assumed to be in a simple format per line:
		class_id x_center y_center box_width box_height
	where x_center and y_center are given as pixel coordinates.
	"""
	# Read the image.
	img = cv2.imread(image_path)
	if img is None:
		print("Failed to read image:", image_path)
		return
	
	# Get image dimensions.
	height, width = img.shape[:2]
	
	# Determine maximum translation amounts.
	max_tx = width / 4.0  # maximum translation in x direction.
	max_ty = height / 4.0  # maximum translation in y direction.
	
	# Randomly select translation values (they can be negative or positive).
	tx = random.uniform(-max_tx, max_tx)
	ty = random.uniform(-max_ty, max_ty)
	
	# Define translation matrix.
	M = np.float32([[1, 0, tx],
					[0, 1, ty]])
	
	# Apply translation using cv2.warpAffine. The resulting image is kept at original dimensions.
	translated_img = cv2.warpAffine(img, M, (width, height))
	cv2.imwrite(out_image_path, translated_img)
	
	# If the annotation file exists, update the coordinates.
	if os.path.exists(annotation_path):
		new_lines = []
		with open(annotation_path, 'r') as f:
			for line in f:
				parts = line.strip().split()
				# Check if the line has the expected format.
				if len(parts) == 5:
					class_id, x_center, y_center, box_w, box_h = parts
					# Update the center coordinates by adding the translation offsets.
					x_center = (float(x_center) + (tx/width))
					y_center = (float(y_center) + (ty/height))
					new_line = f"{class_id} {x_center:.2f} {y_center:.2f} {box_w} {box_h}"
					new_lines.append(new_line)
				else:
					# If the line doesn't match the expected format, copy it unchanged.
					new_lines.append(line.strip())
		with open(out_annotation_path, 'w') as f:
			f.write("\n".join(new_lines))
	
	
# Process images in each stage.
stages = [f"stage_{i}" for i in range(N_STAGES)]
img_counter = 0

for stage in stages:
	in_image_folder = os.path.join(base_dir_images, stage)
	in_annotation_folder = os.path.join(base_dir_annotations, stage)
	out_image_folder = os.path.join(out_base_images, stage)
	out_annotation_folder = os.path.join(out_base_annotations, stage)
	
	os.makedirs(out_image_folder, exist_ok=True)
	os.makedirs(out_annotation_folder, exist_ok=True)
	
	# Select image files (filtering out already augmented ones that include "_translate").
	image_files = [f for f in os.listdir(in_image_folder)
				   if f.lower().endswith(".jpg") and "_translate" not in f]
	
	def process_translation(filename):
		image_path = os.path.join(in_image_folder, filename)
		base_filename = os.path.splitext(filename)[0]
		annotation_filename = base_filename + ".txt"
		annotation_path = os.path.join(in_annotation_folder, annotation_filename)

		out_image_filename = f"{base_filename}_translate.jpg"
		out_annotation_filename = f"{base_filename}_translate.txt"
		out_image_path = os.path.join(out_image_folder, out_image_filename)
		out_annotation_path = os.path.join(out_annotation_folder, out_annotation_filename)

		if not os.path.exists(out_image_path):
			random_translate_and_update_annotation(image_path, annotation_path, out_image_path, out_annotation_path)
			return out_image_path  # For optional progress tracking

	# Prepare only tasks where output doesn't exist yet
	tasks = [f for f in image_files if not os.path.exists(os.path.join(out_image_folder, os.path.splitext(f)[0] + "_translate.jpg"))]

	# Parallel execution
	with ThreadPoolExecutor(max_workers=16) as executor:
		for idx, result in enumerate(executor.map(process_translation, tasks)):
			if result is not None and idx % 1000 == 0:
				print(f"Processed translation augmentation: {result}")

# Visualize

In [None]:
# import matplotlib.pyplot as plt

# def show_image_with_bbox(filename, bbox):
# 	"""
# 	Display an image with a bounding box drawn on it.

# 	Parameters:
# 	- filename: str, path to the image file.
# 	- bbox: tuple of (class_id, x_center, y_center, width, height) in YOLO format (all normalized).
# 	"""
# 	# Load the image
# 	image = cv2.imread(filename)
# 	if image is None:
# 		print(f"Error: Unable to load image at {filename}")
# 		return

# 	# Get image dimensions (height, width)
# 	h, w, _ = image.shape

# 	# Convert YOLO normalized coordinates to absolute pixel values
# 	class_id, x_center, y_center, box_width, box_height = bbox
# 	x_center_pixel = x_center * w
# 	y_center_pixel = y_center * h
# 	box_width_pixel = box_width * w
# 	box_height_pixel = box_height * h

# 	# Calculate top-left and bottom-right coordinates of the bounding box
# 	x1 = int(x_center_pixel - box_width_pixel / 2)
# 	y1 = int(y_center_pixel - box_height_pixel / 2)
# 	x2 = int(x_center_pixel + box_width_pixel / 2)
# 	y2 = int(y_center_pixel + box_height_pixel / 2)

# 	# Convert image color from BGR (OpenCV default) to RGB for matplotlib
# 	image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# 	# Draw the bounding box (red color, thickness 2)
# 	cv2.rectangle(image_rgb, (x1, y1), (x2, y2), (255, 0, 0), 2)

# 	# Optionally add the class id as text above the bounding box
# 	label = str(int(class_id))
# 	cv2.putText(image_rgb, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (255, 0, 0), 2)

# 	# Display the image with matplotlib
# 	plt.figure(figsize=(8, 8))
# 	plt.imshow(image_rgb)
# 	plt.axis('off')
# 	plt.title("Image with Bounding Box")
# 	plt.show()

# # Example usage
# # 0.173398437
# if __name__ == "__main__":
# 	image_filename = f"{base_dir_images}/stage_0/stage_0_case_render_4_rot20.jpg"  # Replace with your image file path
# 	bounding_box_filename = f"{base_dir_annotations}stage_0/stage_0_case_render_4_rot20.txt"
# 	for stage in os.listdir(base_dir_annotations):
# 		if stage != "stage_1":
# 			continue
# 		for filename in os.listdir(os.path.join(base_dir_images, stage))[1000:1100]:
# 			if filename.endswith(".jpg"):
# 				image_filename = os.path.join(base_dir_images, stage, filename)
# 				bounding_box_filename = os.path.join(base_dir_annotations, stage, filename.replace(".jpg", ".txt"))
# 				with open(bounding_box_filename, 'r') as f:
# 					bbox_info = f.read().strip().split()
# 					bbox_info = (0, float(bbox_info[1]), float(bbox_info[2]), float(bbox_info[3]), float(bbox_info[4]))
# 					print(bounding_box_filename)
# 					show_image_with_bbox(image_filename, bbox_info)

# # 1: x 0.317216797 0.670823568 0.5 0.51
# # 2: x 0.490615234 0.670823568 0.5 0.51
# # 3: x 0.664013671 0.670823568 0.5 0.51
# # 4: x 0.317216797 0.497425131 0.5 0.51
# # 5: x 0.490615234 0.497425131 0.5 0.51
# # 6: x 0.664013671 0.497425131 0.5 0.51
# # 7: x 0.317216797 0.324026694 0.5 0.51
# # 8: x 0.490615234 0.324026694 0.5 0.51
# # 9: x 0.664013671 0.324026694 0.5 0.51



# Rename stuff from roboflow (Not used)

In [None]:
# import os
# import re

# def rename_files_in_folder(folder_path):
# 	pattern = re.compile(r"(.*)_jpg\.rf\.[a-f0-9]+\.jpg$")

# 	for filename in os.listdir(folder_path):
# 		if filename.endswith(".txt"):
# 			match = pattern.match(filename)
# 			if match:
# 				new_filename = f"{match.group(1)}.txt"
# 				old_path = os.path.join(folder_path, filename)
# 				new_path = os.path.join(folder_path, new_filename)
# 				os.rename(old_path, new_path)
# 				print(f"Renamed: {filename} -> {new_filename}")

# # Example usage
# folder = "/Users/georgye/Documents/repos/ethz/dslab25/obj_detection/preprocessing/stage_0/images"
# rename_files_in_folder(folder)

In [None]:
# import os
# import re
# import shutil

# def rename_and_duplicate(folder_path):
# 	# Regex pattern to match files like stage_x_case_render_n.txt
# 	pattern = re.compile(r"^(stage_\d+)_case_render_(\d+)\.txt$")
	
# 	for filename in os.listdir(folder_path):
# 		match = pattern.match(filename)
# 		if match:
# 			stage, render = match.groups()
# 			original_path = os.path.join(folder_path, filename)
			
# 			# New filenames
# 			new_name_var_0 = f"{stage}_var_0_case_render_{render}.txt"
# 			new_name_var_1 = f"{stage}_var_1_case_render_{render}.txt"
# 			path_var_0 = os.path.join(folder_path, new_name_var_0)
# 			path_var_1 = os.path.join(folder_path, new_name_var_1)

# 			# Rename original file to var_0
# 			os.rename(original_path, path_var_0)
# 			print(f"Renamed: {filename} -> {new_name_var_0}")
			
# 			# Duplicate to var_1
# 			shutil.copy(path_var_0, path_var_1)
# 			print(f"Duplicated: {new_name_var_0} -> {new_name_var_1}")
# rename_and_duplicate('/Users/georgye/Documents/repos/ethz/dslab25/assets/vacuum_pump/rendered/anno/stage_6')