# Diffusion Model Practice Notebook

This notebook provides a professional workflow for learning and practicing diffusion models from scratch. It covers data loading, preprocessing, model definition, training, sampling, and evaluation.

## 1. Import Required Libraries
Import essential libraries such as numpy, pandas, matplotlib, torch, and any diffusion model utilities.

In [None]:
# Import Required Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
# If you have custom diffusion utilities, import them here
# from diffusion_utils import *

## 2. Load and Visualize Dataset
Load the dataset relevant to diffusion model practice and visualize sample data points to understand its structure.

In [None]:
# Load and Visualize Dataset
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

# Visualize some sample images
examples = enumerate(dataloader)
batch_idx, (example_data, example_targets) = next(examples)
plt.figure(figsize=(8, 8))
for i in range(9):
    plt.subplot(3, 3, i+1)
    plt.imshow(example_data[i][0], cmap='gray')
    plt.title(f"Label: {example_targets[i].item()}")
    plt.axis('off')
plt.tight_layout()
plt.show()

## 3. Preprocess Data for Diffusion Model
Apply necessary preprocessing steps such as normalization, reshaping, and splitting the data for training and testing.

In [None]:
# Preprocess Data for Diffusion Model
from sklearn.model_selection import train_test_split

# Flatten images for simple models (if needed)
X = example_data.view(example_data.size(0), -1).numpy()
y = example_targets.numpy()

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")

## 4. Define Diffusion Model Architecture
Implement the architecture of the diffusion model using PyTorch, including forward and reverse diffusion processes.

In [None]:
# Define Diffusion Model Architecture
class SimpleDiffusionModel(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super(SimpleDiffusionModel, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, input_dim)
        )
    def forward(self, x):
        return self.net(x)

# Example usage
input_dim = X_train.shape[1]
hidden_dim = 128
model = SimpleDiffusionModel(input_dim, hidden_dim)
print(model)

## 5. Train the Diffusion Model
Set up the training loop, define loss functions, and train the diffusion model on the preprocessed dataset.

In [None]:
# Train the Diffusion Model
num_epochs = 10
learning_rate = 1e-3
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

for epoch in range(num_epochs):
    model.train()
    inputs = torch.tensor(X_train, dtype=torch.float32)
    targets = inputs  # For denoising autoencoder setup
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

## 6. Generate Samples Using the Trained Model
Use the trained diffusion model to generate new samples and visualize the results.

In [None]:
# Generate Samples Using the Trained Model
model.eval()
with torch.no_grad():
    test_inputs = torch.tensor(X_test, dtype=torch.float32)
    generated = model(test_inputs)

# Visualize generated samples
plt.figure(figsize=(8, 8))
for i in range(9):
    plt.subplot(3, 3, i+1)
    plt.imshow(generated[i].view(28, 28), cmap='gray')
    plt.title(f"Generated Sample {i+1}")
    plt.axis('off')
plt.tight_layout()
plt.show()

## 7. Evaluate Model Performance
Assess the performance of the diffusion model using appropriate metrics and visualizations.

In [None]:
# Evaluate Model Performance
from sklearn.metrics import mean_squared_error

model.eval()
with torch.no_grad():
    test_inputs = torch.tensor(X_test, dtype=torch.float32)
    generated = model(test_inputs)
    mse = mean_squared_error(X_test, generated.numpy())
    print(f"Test MSE: {mse:.4f}")

# Visualize some original vs generated samples
plt.figure(figsize=(12, 6))
for i in range(5):
    plt.subplot(2, 5, i+1)
    plt.imshow(X_test[i].reshape(28, 28), cmap='gray')
    plt.title("Original")
    plt.axis('off')
    plt.subplot(2, 5, i+6)
    plt.imshow(generated[i].view(28, 28), cmap='gray')
    plt.title("Generated")
    plt.axis('off')
plt.tight_layout()
plt.show()