In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset

import numpy as np
import time
import os
import copy
import argparse
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from PIL import Image
import random


def set_seed(seed=42):
    """Set the seed for reproducibility."""
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.cuda.manual_seed_all(seed)  # If using CUDA
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False  # Ensures deterministic behavior

def preprocess_data(data_dir, seed=42):
    """Load the train/val data with fixed seed for reproducibility."""

    # Set seed before applying transformations
    set_seed(seed)

    data_transforms = {
        'train':transforms.Compose([
        transforms.CenterCrop((1500, 1024)),  # Center cropping
        transforms.RandomHorizontalFlip(p=0.3),  # Random horizontal flip
        transforms.RandomApply([transforms.RandomRotation(degrees=30)], p=0.3),  # Random rotation (-30 to +30 degrees)
        # Randomly scales the image within a given range (scale=(0.8, 1.2), meaning 90% to 110% of the original size).
        # Randomly crops the scaled image back to (1500, 1024), ensuring all final images are of a consistent size.
        transforms.RandomResizedCrop((1500, 1024), scale=(0.8, 1.2)),  # Random scaling
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val': transforms.Compose([
        transforms.CenterCrop((1500,1024)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
    }
    return data_transforms

def save_preprocessed_images(data_dir, output_dir):
    """Apply preprocessing and save the images in the same parent folder."""
    os.makedirs(output_dir, exist_ok=True)
    transform = preprocess_data(data_dir)['train']  # Use train transforms
    
    for filename in os.listdir(data_dir):
        if filename.endswith(".jpg"):
            img_path = os.path.join(data_dir, filename)
            image = Image.open(img_path).convert("RGB")
            processed_image = transform(image)
            
            save_path = os.path.join(output_dir, filename)
            processed_image = transforms.ToPILImage()(processed_image)  # Convert back to PIL for saving
            processed_image.save(save_path)
    
    print(f"Processed images saved to: {output_dir}")

In [None]:
data_dir = r"xxxx"
output_dir = r"xxxx"

save_preprocessed_images(data_dir, output_dir)