# ECE 285 Final Project Proposal Demo

## Part 1 Install libraries

In [None]:
!pip install kaggle
!pip install datasets

## Part 2 Import libraries

In [2]:
# Python built-in libbraries
import os

# Pytorch libraries
import torch
from torch import nn
from torch.nn import functional as F
from torchvision import transforms
from torch.optim import AdamW
from torch.utils.data import DataLoader

# Hugging face libraries
from datasets import load_dataset
from diffusers import UNet2DModel, DDPMScheduler, DDPMPipeline
from diffusers.optimization import get_scheduler
from diffusers.utils import make_image_grid

# Painting libraries
from matplotlib import pyplot as plt

## Part 3 Set global parameters

In [4]:
# Device type
device = "cuda" if torch.cuda.is_available() else "cpu"

# Graph parameters
image_size = 128

# Training parameters
batch_size = 16
epoch_size = 50
learning_rate = 1e-5
warm_up_steps = 100

# Output dir
image_output_dir = "./images_output/"
os.makedirs(image_output_dir, exist_ok=True)

model_save_dir = "./model/"
os.makedirs(model_save_dir, exist_ok=True)

## Part 4 Load Dataset
Please select one dataset to test

### Butterfly dataset

In [5]:
# Butterfly dataset in tutorials

# Load dataset
dataset_name = "huggan/smithsonian_butterflies_subset"
dataset = load_dataset(dataset_name, split="train")
dataset = [image.convert("RGB") for image in dataset["image"]]

# Preprocess image
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
])

# Apply preprocess
dataset = [transform(image).cuda() for image in dataset]

# Generate train data
train_data = DataLoader(dataset, batch_size=batch_size, shuffle=True)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/609 [00:00<?, ?B/s]

Repo card metadata block was not found. Setting CardData to empty.


dataset_infos.json:   0%|          | 0.00/1.65k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/237M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/1000 [00:00<?, ? examples/s]

### Pikachu dataset

In [None]:
# Pikachu dataset

# Please upload your kaggle.json file
from google.colab import files
files.upload()

! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json

# Download dataset and unzip
!kaggle datasets list -s pikachu
!kaggle datasets download -d hal0samuel/pikachu-classification-dataset
!unzip pikachu-classification-dataset.zip
!rm -r pikachu_dataset/train/not_pikachu    # remove non-pikachu image

# Load dataset
dataset_name = "./pikachu_dataset"
dataset = load_dataset(dataset_name, split="train")
dataset = [image.convert("RGB") for image in dataset["image"]]

# Preprocess image
transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5]),
])

# Apply preprocess
dataset = [transform(image).cuda() for image in dataset]

# Generate train data
train_data = DataLoader(dataset, batch_size=batch_size, shuffle=True)

## Part 5 Define model

In [None]:
# Define model
def UNet():
    model = UNet2DModel(
        sample_size = image_size,
        in_channels = 3,
        out_channels = 3,
        layers_per_block = 2,
        block_out_channels = (128, 128, 256, 256, 512, 512),
        down_block_types = ("DownBlock2D",
                            "DownBlock2D",
                            "DownBlock2D",
                            "DownBlock2D",
                            "AttnDownBlock2D",
                            "DownBlock2D"
        ),
        up_block_types = ("UpBlock2D",
                        "AttnUpBlock2D",
                        "UpBlock2D",
                        "UpBlock2D",
                        "UpBlock2D",
                        "UpBlock2D"
        ),
    )

    return model

# Put model on certain device
model = UNet()
model.to(device)

In [7]:
# This cell is for test
sample_input_image = dataset[0]                         # First image
sample_input_image = sample_input_image.unsqueeze(0)    # Add the batch_size dim
print(f"Input shape: {sample_input_image.shape}")

sample_output_image = model(sample_input_image, timestep=0)
sample_output_image = sample_output_image.sample
print(f"Output shape: {sample_output_image.shape}")

Input shape: torch.Size([1, 3, 128, 128])
Output shape: torch.Size([1, 3, 128, 128])


### Schedulers and optimizers

In [8]:
# Noise scheduler
noise_scheduler = DDPMScheduler(num_train_timesteps=1000)

# Optimizer
optimizer = AdamW(model.parameters(), lr=learning_rate)

# Learning rate scheduler
learning_rate_scheduler = get_scheduler(
    name = "linear",
    optimizer = optimizer,
    num_warmup_steps = warm_up_steps,
    num_training_steps = epoch_size * len(train_data),
)

### Function for save and load

In [9]:
# Save function
def save_model(model, filename):
    torch.save(model.state_dict(), model_save_dir + filename)

def load_model(model, filename):
    model.load_state_dict(torch.load(model_save_dir + filename,
                                     weights_only=True))

### Function for train and evaluation

In [10]:
# Train function
def train_model(model, train_data, noise_scheduler, optimizer, learning_rate_scheduler):
    # Train model
    min_loss = torch.inf
    model.train()

    for epoch in range(epoch_size):
        # Accumulate loss for each epoch
        epoch_loss = 0

        for idx, graphs in enumerate(train_data):
            # Get batch, final batch might not be full
            batch_size = graphs.shape[0]

            # Generate random noises and timesteps
            noises = torch.randn(graphs.shape, device=graphs.device)
            timesteps = torch.randint(0, 1000, (batch_size,),
                                      device=graphs.device,
                                      dtype=torch.long)

            # Add noise to graphs
            noisy_graphs = noise_scheduler.add_noise(graphs, noises, timesteps)

            # Train
            optimizer.zero_grad()
            pred_noises = model(noisy_graphs, timesteps).sample
            loss = F.mse_loss(pred_noises, noises)

            loss.backward()
            optimizer.step()
            learning_rate_scheduler.step()
            epoch_loss += loss.item()

        # Save model which has lower loss
        if epoch_loss < min_loss:
            min_loss = epoch_loss
            save_model(model, "model.pth")

        # Print training log
        print(f"Epoch {epoch+1}, MSE loss: {epoch_loss}.")



In [11]:
# Evaluation function
def eval_model(model, noise_scheduler, filename):
    # Evaluate model
    model.eval()

    # Generate graphs in batch
    pipeline = DDPMPipeline(unet=model, scheduler=noise_scheduler)
    images = pipeline(batch_size=batch_size, num_inference_steps=1000).images

    # Put images together
    image_grid = make_image_grid(images, rows=4, cols=4)

    # Save images
    image_grid.save(f"{image_output_dir}/{filename}.png")


## Part 6 Train and evaluate

In [12]:
# Train
train_model(model, train_data, noise_scheduler, optimizer, learning_rate_scheduler)

Epoch 1, MSE loss: 58.39319711923599.
Epoch 2, MSE loss: 23.894571363925934.
Epoch 3, MSE loss: 8.60437098890543.
Epoch 4, MSE loss: 6.595658659934998.
Epoch 5, MSE loss: 5.4847811087965965.
Epoch 6, MSE loss: 5.029978543519974.
Epoch 7, MSE loss: 4.811599746346474.
Epoch 8, MSE loss: 4.843118667602539.
Epoch 9, MSE loss: 3.863627605140209.
Epoch 10, MSE loss: 4.1508523896336555.
Epoch 11, MSE loss: 4.001143971458077.
Epoch 12, MSE loss: 3.5623564925044775.
Epoch 13, MSE loss: 3.424580542370677.
Epoch 14, MSE loss: 3.2442902382463217.
Epoch 15, MSE loss: 3.343209221959114.
Epoch 16, MSE loss: 3.3788177371025085.
Epoch 17, MSE loss: 3.0356263052672148.
Epoch 18, MSE loss: 2.9128500390797853.
Epoch 19, MSE loss: 2.8952809665352106.
Epoch 20, MSE loss: 2.713100653141737.
Epoch 21, MSE loss: 2.8898843061178923.
Epoch 22, MSE loss: 2.675487097352743.
Epoch 23, MSE loss: 2.5537211764603853.
Epoch 24, MSE loss: 2.79895406588912.
Epoch 25, MSE loss: 2.742738911882043.
Epoch 26, MSE loss: 2.489

In [14]:
# Evaluate
model = UNet()
model.to(device)
load_model(model, "model.pth")
eval_model(model, noise_scheduler, "test")

  0%|          | 0/1000 [00:00<?, ?it/s]