# Exercise 3: Evaluate Quality of Generated Images

Next, we will train a CNN to classify whether or not an image was generated or not. A low accuracy (near 50%) will indicate that the generated images are indistinguishable from the original images, while a higher accuracy may indicate noticeable differences.

In [None]:
import torchvision.transforms as transforms
import torch.nn as nn
import random

### Load original dataset, label as 0 for "not generated"

In [None]:
original_dataset = datasets.load_dataset("huggan/smithsonian_butterflies_subset", split="train")

image_size=64

# Same transform, but no horizontal flip to conserve original image
eval_preprocess=transforms.Compose([
        transforms.Resize((image_size, image_size)),  # Resize
        transforms.ToTensor(),  # Convert to tensor (0, 1)
        transforms.Normalize([0.5], [0.5]),  # Map to (-1, 1)
    ])

# Define function for transformation and labeling as original images
def transform_and_label(dataset):
    # Transform original images
    transformed_images = [eval_preprocess(image.convert("RGB")) for image in dataset["image"]]
    original_labels = torch.zeros(len(transformed_images), dtype=torch.long)  # Label for original images
    return {"images": torch.stack(transformed_images), "labels": original_labels}

original_dataset.set_transform(transform_and_label)

Repo card metadata block was not found. Setting CardData to empty.


### Generate images, label as 1 for "generated"

In [None]:
def generate_images(num_images, model, scheduler, device):
    # Start with random initializations for the entire batch
    samples = torch.randn(num_images, 3, 64, 64).to(device)

    # Iterate through the timesteps defined by the scheduler
    for t in scheduler.timesteps:
        with torch.no_grad():
            # Get model predictions for this timestep
            noise_pred = model(samples, t).sample

        # Update samples with the noise model's step function
        samples = scheduler.step(noise_pred, t, samples).prev_sample

    # Normalize the images to be between 0 and 1 for consistency and visualization
    generated_images = samples.clip(-1, 1) * 0.5 + 0.5

    return generated_images

num_generated = 10  # Define how many images you want to generate for testing purposes
scheduler = diffusers.DDPMScheduler(num_train_timesteps=1000, beta_start=0.001, beta_end=0.02, beta_schedule="linear")
generated_images = generate_images(num_generated, model, scheduler, device)
generated_labels = torch.ones(num_generated, dtype=torch.long)  # Label for generated images

### Split original dataset to match number of generated samples, combine
Ideally, we would generate the same number of samples as the size of the training set, but we do not have enough memory to generate 1000 samples

In [None]:
from torch.utils.data import random_split

# Define the size of the split
total_size = len(original_dataset)

# Split the dataset randomly, ensure split is same size as number of generated samples
split_data, _ = random_split(original_dataset, [num_generated, total_size - num_generated])

# Combine datasets
combined_images = torch.cat([split_data["images"], generated_images], dim=0)
combined_labels = torch.cat([split_data["labels"], generated_labels], dim=0)

### Create Combined Dataset

In [None]:
from torch.utils.data import TensorDataset, DataLoader

class CombinedDataset(TensorDataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

# Create dataset instance
full_dataset = CombinedDataset(combined_images, combined_labels)

### Split combined dataset into train/test

In [None]:
# Split the dataset into 80% training and 20% test
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

# Create DataLoaders for Train and Test datasets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

### Define simple CNN

In [None]:
import torch.optim as optim

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.act1 = nn.ReLU()
        self.pool = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(16 * 16 * 16, 2)  # Adjust size accordingly

    def forward(self, x):
        x = self.pool(self.act1(self.conv1(x)))
        x = x.view(-1, 16 * 16 * 16)  # Flatten
        x = self.fc1(x)
        return x

# Initialize model, loss, and optimizer
cnn_model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

### Train Simple CNN

In [None]:
from tqdm import tqdm

# Training loop
for epoch in range(10):  # Loop over the dataset multiple times
    running_loss = 0.0
    for data in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}"):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = cnn_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_dataloader)}')

print('Finished Training')

### Evaluate accuracy of CNN for classifying generated images

In [None]:
def evaluate_accuracy(data_loader, model):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in data_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Test the model
test_accuracy = evaluate_accuracy(test_loader, cnn_model)
print(f'Accuracy of the model on the test images: {test_accuracy}%')

In [None]:
# class CNN(nn.Module):
#     def __init__(self, verbose=False):
#         super().__init__()
#         self.conv1 = nn.Conv2d(1, 32, 3)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(32, 128, 3)
#         self.conv3 = nn.Conv2d(128, 256, 3)
#         self.fc1 = nn.Linear(256, 256)
#         self.fc2 = nn.Linear(256, 10)
#         self.verbose = verbose

#     def forward(self, x):
#         if self.verbose:
#           print(f"Input shape: {x.size()}")
#         x = self.pool(F.relu(self.conv1(x)))
#         if self.verbose:
#           print(f"After Layer 1: {x.size()}")
#         x = self.pool(F.relu(self.conv2(x)))
#         if self.verbose:
#           print(f"After Layer 2: {x.size()}")
#         x = self.pool(F.relu(self.conv3(x)))
#         if self.verbose:
#           print(f"After Layer 3: {x.size()}")
#         x = torch.flatten(x, 1)
#         if self.verbose:
#           print(f"Flattened: {x.size()}")
#         x = F.relu(self.fc1(x))
#         if self.verbose:
#           print(f"After first Fully connected layer: {x.size()}")
#         x = self.fc2(x)
#         if self.verbose:
#           print(f"Output shape: {x.size()}")
#         return x


# Part 4: Evaluate Quality of Generated Images

Next, we will train a CNN to classify whether or not an image was generated or not. A low accuracy (near 50%) will indicate that the generated images are indistinguishable from the original images, while a higher accuracy may indicate noticeable differences.

In [None]:
import torchvision.transforms as transforms
import torch.nn as nn

### Load original dataset, label as 0 for "not generated"

In [None]:
original_dataset = datasets.load_dataset("huggan/smithsonian_butterflies_subset", split="train")

image_size=64

# Same transform, but no horizontal flip to conserve original image
eval_preprocess=transforms.Compose([
        transforms.Resize((image_size, image_size)),  # Resize
        transforms.ToTensor(),  # Convert to tensor (0, 1)
        transforms.Normalize([0.5], [0.5]),  # Map to (-1, 1)
    ])

# Define function for transformation and labeling as original images
def transform_and_label(dataset):
    # Transform original images
    transformed_images = [eval_preprocess(image.convert("RGB")) for image in dataset["image"]]
    original_labels = torch.zeros(len(transformed_images), dtype=torch.long)  # Label for original images
    return {"images": torch.stack(transformed_images), "labels": original_labels}

original_dataset.set_transform(transform_and_label)

### Generate images, label as 1 for "generated"

In [None]:
def generate_images(num_images, model, scheduler, device):
    # Start with random initializations for the entire batch
    samples = torch.randn(num_images, 3, 64, 64).to(device)

    # Iterate through the timesteps defined by the scheduler
    for t in scheduler.timesteps:
        with torch.no_grad():
            # Get model predictions for this timestep
            noise_pred = model(samples, t).sample

        # Update samples with the noise model's step function
        samples = scheduler.step(noise_pred, t, samples).prev_sample

    # Normalize the images to be between 0 and 1 for consistency and visualization
    generated_images = samples.clip(-1, 1) * 0.5 + 0.5

    return generated_images

num_generated = 10  # Define how many images you want to generate for testing purposes
generated_images = generate_images(num_generated, model, scheduler, device)
generated_labels = torch.ones(num_generated, dtype=torch.long)  # Label for generated images

### Split original dataset to match number of generated samples, combine
Ideally, we would generate the same number of samples as the size of the training set, but we do not have enough memory to generate 1000 samples

In [None]:
# Define the size of the split
total_size = len(original_dataset)

# Split the dataset randomly, ensure split is same size as number of generated samples
split_data, _ = random_split(original_dataset, [num_generated, total_size - num_generated])

# Combine datasets
combined_images = torch.cat([split_data["images"], generated_images], dim=0)
combined_labels = torch.cat([split_data["labels"], generated_labels], dim=0)

### Create Combined Dataset

In [None]:
from torch.utils.data import TensorDataset, DataLoader

class CombinedDataset(TensorDataset):
    def __init__(self, images, labels):
        self.images = images
        self.labels = labels

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]

# Create dataset instance
full_dataset = CombinedDataset(combined_images, combined_labels)

### Split combined dataset into train/test

In [None]:
from torch.utils.data import random_split

# Split the dataset into 80% training and 20% test
train_size = int(0.8 * len(full_dataset))
test_size = len(full_dataset) - train_size
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

# Create DataLoaders for Train and Test datasets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

### Define simple CNN

In [None]:
import torch.optim as optim

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, 3, padding=1)
        self.act1 = nn.ReLU()
        self.pool = nn.MaxPool2d(2)
        self.fc1 = nn.Linear(16 * 16 * 16, 2)  # Adjust size accordingly

    def forward(self, x):
        x = self.pool(self.act1(self.conv1(x)))
        x = x.view(-1, 16 * 16 * 16)  # Flatten
        x = self.fc1(x)
        return x

# Initialize model, loss, and optimizer
cnn_model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=0.001)

### Train Simple CNN

In [None]:
from tqdm import tqdm

# Training loop
for epoch in range(10):  # Loop over the dataset multiple times
    running_loss = 0.0
    for data in tqdm(train_dataloader, desc=f"Epoch {epoch + 1}"):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = cnn_model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
    print(f'Epoch {epoch + 1}, Loss: {running_loss / len(train_dataloader)}')

print('Finished Training')

### Evaluate accuracy of CNN for classifying generated images

In [None]:
def evaluate_accuracy(data_loader, model):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in data_loader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Test the model
test_accuracy = evaluate_accuracy(test_loader, cnn_model)
print(f'Accuracy of the model on the test images: {test_accuracy}%')

In [None]:
# class CNN(nn.Module):
#     def __init__(self, verbose=False):
#         super().__init__()
#         self.conv1 = nn.Conv2d(1, 32, 3)
#         self.pool = nn.MaxPool2d(2, 2)
#         self.conv2 = nn.Conv2d(32, 128, 3)
#         self.conv3 = nn.Conv2d(128, 256, 3)
#         self.fc1 = nn.Linear(256, 256)
#         self.fc2 = nn.Linear(256, 10)
#         self.verbose = verbose

#     def forward(self, x):
#         if self.verbose:
#           print(f"Input shape: {x.size()}")
#         x = self.pool(F.relu(self.conv1(x)))
#         if self.verbose:
#           print(f"After Layer 1: {x.size()}")
#         x = self.pool(F.relu(self.conv2(x)))
#         if self.verbose:
#           print(f"After Layer 2: {x.size()}")
#         x = self.pool(F.relu(self.conv3(x)))
#         if self.verbose:
#           print(f"After Layer 3: {x.size()}")
#         x = torch.flatten(x, 1)
#         if self.verbose:
#           print(f"Flattened: {x.size()}")
#         x = F.relu(self.fc1(x))
#         if self.verbose:
#           print(f"After first Fully connected layer: {x.size()}")
#         x = self.fc2(x)
#         if self.verbose:
#           print(f"Output shape: {x.size()}")
#         return x
