# Step 1 : Diffusion Models that creates the Dataset

In [1]:
import os
from PIL import Image
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader, random_split
import torch
from torch import nn
import torch.optim as optim
import torch.nn.functional as F

In [None]:
class ImagePairsDataset(Dataset):
    def __init__(self, originals_dir, filtered_dir, transform=None):
        self.originals_dir = originals_dir
        self.filtered_dir = filtered_dir
        self.transform = transform
        self.image_names = os.listdir(originals_dir)

    def __len__(self):
        return len(self.image_names)

    def __getitem__(self, idx):
        image_name = self.image_names[idx]
        original_path = os.path.join(self.originals_dir, image_name)
        filtered_path = os.path.join(self.filtered_dir, image_name)

        original_image = Image.open(original_path)
        filtered_image = Image.open(filtered_path)

        if self.transform:
            original_image = self.transform(original_image)
            filtered_image = self.transform(filtered_image)
        
        return original_image, filtered_image

Main

In [None]:
# Set the random seed for reproducibility
torch.manual_seed(42)


# Determine the device to run on (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Transformations to apply to the images
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
])


# Dataset e DataLoader
originals_dir = '/kaggle/input/e2gan-images/original_images'
filtered_dir = '/kaggle/input/e2gan-images/modified_images'

In [None]:
# Create the dataset and split it into train, validation, and test sets
dataset = ImagePairsDataset(originals_dir, filtered_dir, transform=transform)

# Split dataset into train (80%), validation (10%), and test (10%)
train_set, val_set, test_set = random_split(dataset, [0.8, 0.1, 0.1])

# DataLoader for loading batches of data
batch_size = 32
train_loader = DataLoader(train_set, 
                         batch_size=batch_size, 
                         shuffle=True,
                         num_workers=4,  # Caricamento parallelo
                         pin_memory=True)  # Utile con GPU
val_loader = DataLoader(val_set, batch_size=batch_size)
test_loader = DataLoader(test_set, batch_size=batch_size)
print(f"Dataset totale: {len(dataset)} immagini")
print(f"Training set: {len(train_set)} immagini")
print(f"Validation set: {len(val_set)} immagini")
print(f"Test set: {len(test_set)} immagini")