In [None]:
from PIL import Image
import numpy as np
import os
from tqdm import tqdm

# Crop images

In [None]:
# Before cropping, check image size

image_path ='/mnt/smb/locker/issa-locker/users/Seojin/data/face_data/Basel_colored_texture/Basel_color_texture_resized/id0/trialnum0.png'
file_size = os.path.getsize(image_path)

# Read image dimensions
with Image.open(image_path) as img:
    width, height = img.size

print(f"File size: {file_size} bytes")
print(f"Dimensions: {width} x {height} pixels")

In [None]:
# Crop images
# Define original data path and save path (to save cropped images)
data_path = '/mnt/smb/locker/issa-locker/users/Seojin/data/face_data/vbsle_50k_texture_colorbg/20250501_Var6vbsl_im10000_Dan_anger_4'
save_path = '/mnt/smb/locker/issa-locker/users/Seojin/data/face_data/vbsle_50k_texture_colorbg/vbsl8000_dan_anger_4'

# Make the save file path 
os.makedirs(save_path, exist_ok=True)

# Read & sort img filenames
all_filenames = []
for filename in os.listdir(data_path):
    if filename.endswith('.png'):
        all_filenames.append(filename)
all_filenames = sorted(all_filenames, key=lambda x: int(x.split("_")[-1][5:-4]))

print(f"Number of images found: {len(all_filenames)}")


def crop_alpha(img):
    """
    Crop out the parts of the image where alpha value is zero.
    """
    alpha = img.split()[-1]  # Get the alpha channel
    alpha_np = np.array(alpha)
    # alpha_np[:, 1800:] = 0  # Adjust these based on your image to ignore unwanted regions
    alpha_masked = Image.fromarray(alpha_np)
    bbox = alpha_masked.getbbox()   # Get the bounding box of the non-zero alpha region
    if bbox:
        cropped = img.crop(bbox)  # Crop the image to the bounding box
        return cropped
    return img  # Return original image if bbox is None

# Record the images with errors for future fix
error_img_index_list = []
error_img_filename_list = []

# Process each image
for img_index, filename in tqdm(enumerate(all_filenames), total=len(all_filenames)):
    # Read image
    # if not os.path.exists(os.path.join(save_path, filename)):
    i = img_index 
    new_name = f'trialnum{i+8000}.png'
    try:
        img = Image.open(os.path.join(data_path, filename)).convert("RGBA")
    except Exception as e:  # In case image file is corrupted
        # print(f"Problem with img {img_index}: {e}")
        error_img_index_list.append(img_index)
        error_img_filename_list.append(filename)
        continue
    cropped_img = crop_alpha(img)
        # Save
    try:
        cropped_img.save(os.path.join(save_path, new_name), compress_level=3)
    except Exception as e:
        print(f"Problem saving img {img_index}: {e}")
        error_img_index_list.append(img_index)
        error_img_filename_list.append(new_name)

    
print("These images are corrupted or had issues:", error_img_filename_list)


# Split into train/test folders

In [None]:
# Randomly split training and validation sets
import os
import random
import shutil

# Set paths
source_dir1 = '/mnt/smb/locker/issa-locker/users/Seojin/data/face_data/vbsle_50k_texture_colorbg/vbsl4000_kedar_anger_4'
train_dir1 = '/mnt/smb/locker/issa-locker/users/Seojin/data/face_data/vbsle_50k_texture_colorbg/16way_IDEM/train/kedar_anger'
val_dir1 = '/mnt/smb/locker/issa-locker/users/Seojin/data/face_data/vbsle_50k_texture_colorbg/16way_IDEM/val/kedar_anger'

# Make sure destination directories exist
os.makedirs(train_dir1, exist_ok=True)
os.makedirs(val_dir1, exist_ok=True)

# Function to split data
def split_data(source_dir, train_dir, val_dir, train_size=5000, val_size=1250):
    # Get all files in the source dir & shuffle
    all_files = os.listdir(source_dir)
    random.shuffle(all_files)
    
    # Split files into train and validation
    train_files = all_files[:train_size]
    val_files = all_files[train_size:train_size + val_size]  

    # Move files to train directory
    for file_name in train_files:
        src_file = os.path.join(source_dir, file_name)
        dest_file = os.path.join(train_dir, file_name)
        shutil.copy(src_file, dest_file)

    # Move files to validation directory
    for file_name in val_files:
        src_file = os.path.join(source_dir, file_name)
        dest_file = os.path.join(val_dir, file_name)
        shutil.copy(src_file, dest_file)

# Split data
split_data(source_dir1, train_dir1, val_dir1, 5000, 1250)

# Apply transformations (blur, jitter, grayscale, etc.)

In [None]:
import os
from PIL import Image
from torchvision import transforms
from tqdm import tqdm
from torchvision.transforms import Lambda
from PIL import ImageFilter
# import kornia.augmentation as K

# paths
input_dir = "/mnt/smb/locker/issa-locker/users/Seojin/data/face_data/LFW_Bush_Powell/Bush"   # Replace with your folder
output_dir = "/mnt/smb/locker/issa-locker/users/Seojin/data/face_data/LFW_Bush_Powell_augmented3/Bush"  # Replace with your desired output folder
os.makedirs(output_dir, exist_ok=True)
from torchvision import transforms

# define transformation 
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=3),   # convert to grayscale
    transforms.GaussianBlur(kernel_size=15, sigma=(4, 10)), # gaussian blue
    # random color jitter 
    transforms.ColorJitter(
        brightness=0.8,
        contrast=0.8,
        saturation=0.4,
        hue=0.2
    ),
    # random crop/resize 
    transforms.RandomResizedCrop(
        size=224,
        scale=(0.3, 1.0),
        ratio=(0.6, 1.0)
    ),
    transforms.RandomRotation(degrees=(0, 150)), # random rotation
    transforms.RandomHorizontalFlip(p=0.5), # horizontal flip
    transforms.RandomVerticalFlip(p=0.5),   # vertical flip

    # convert to tensor BEFORE random erasing
    transforms.ToTensor(),
    # random erasing
    transforms.RandomErasing(
        p=0.5,
        scale=(0.025, 0.3),
        ratio=(0.3, 3.3)
    ),

    # convert back to PIL to save/display as an image
    transforms.ToPILImage()
])

# process image
for fname in tqdm(os.listdir(input_dir), desc="Processing images"):
    if not fname.lower().endswith(".png"):
        continue
    path = os.path.join(input_dir, fname)
    img = Image.open(path).convert("RGB")  # Make sure it's 3 channels

    transformed = transform(img)
    transformed.save(os.path.join(output_dir, fname))
