In [1]:
import os
import sys

# 1. Define your Repo details
REPO_URL = "https://github.com/msamir-dls/DIFFUSION-COOKBOOK.git"
REPO_NAME = "DIFFUSION-COOKBOOK"

# 2. Clone or Update the repo
if not os.path.exists(REPO_NAME):
    print(f"[*] Cloning {REPO_NAME}...")
    !git clone {REPO_URL}
    %cd {REPO_NAME}
else:
    print(f"[*] {REPO_NAME} already exists. Updating...")
    %cd {REPO_NAME}
    !git pull

# 3. Fix the Python Path
# This is crucial so that the scripts can find the 'src' module
sys.path.append(os.getcwd())

# 4. Install requirements
!pip install -r requirements.txt

[*] Cloning DIFFUSION-COOKBOOK...
Cloning into 'DIFFUSION-COOKBOOK'...
remote: Enumerating objects: 26, done.[K
remote: Counting objects: 100% (26/26), done.[K
remote: Compressing objects: 100% (23/23), done.[K
remote: Total 26 (delta 2), reused 26 (delta 2), pack-reused 0 (from 0)[K
Receiving objects: 100% (26/26), 16.01 KiB | 315.00 KiB/s, done.
Resolving deltas: 100% (2/2), done.
/content/DIFFUSION-COOKBOOK
Collecting mlflow>=2.0.0 (from -r requirements.txt (line 6))
  Downloading mlflow-3.8.1-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-skinny==3.8.1 (from mlflow>=2.0.0->-r requirements.txt (line 6))
  Downloading mlflow_skinny-3.8.1-py3-none-any.whl.metadata (31 kB)
Collecting mlflow-tracing==3.8.1 (from mlflow>=2.0.0->-r requirements.txt (line 6))
  Downloading mlflow_tracing-3.8.1-py3-none-any.whl.metadata (19 kB)
Collecting Flask-CORS<7 (from mlflow>=2.0.0->-r requirements.txt (line 6))
  Downloading flask_cors-6.0.2-py3-none-any.whl.metadata (5.3 kB)
Collecting dock

In [2]:
import torch
import os

# Check GPU
if torch.cuda.is_available():
    print(f"Success! Connected to GPU: {torch.cuda.get_device_name(0)}")
else:
    print("WARNING: No GPU found. Go to Runtime > Change runtime type and select GPU.")

# Create necessary directories
os.makedirs("checkpoints", exist_ok=True)
os.makedirs("data", exist_ok=True)
os.makedirs("outputs", exist_ok=True)

Success! Connected to GPU: Tesla T4


In [3]:
import os
import threading
import time
from google.colab import output

# 1. Kill any existing MLflow processes to fix "Address already in use"
!pkill -f mlflow || echo "No existing mlflow process found."

# 2. Start MLflow with security flags for Colab
def run_mlflow():
    # --host 0.0.0.0 allows external connections
    # --allowed-hosts "*" bypasses the "Invalid Host header" security check
    # --x-frame-options NONE allows the UI to be embedded in the Colab iframe
    !mlflow ui --port 5000 --host 0.0.0.0 --allowed-hosts "*" --x-frame-options NONE

# Start server in background thread
threading.Thread(target=run_mlflow, daemon=True).start()

# Wait for server to boot
time.sleep(5)

print("Opening MLflow Dashboard...")

# 3. Try to serve as an iframe (best for inline viewing)
try:
    output.serve_kernel_port_as_iframe(5000)
except:
    print("Iframe failed, providing window link instead.")

# 4. Provide a clickable window link as a backup
output.serve_kernel_port_as_window(5000)

^C
Opening MLflow Dashboard...


<IPython.core.display.Javascript object>

Try `serve_kernel_port_as_iframe` instead. [0m


<IPython.core.display.Javascript object>

In [6]:
%%writefile configs/stable_diffusion.yaml
project_name: "mnist-diffusion-comparison"
run_name: "stable-diffusion-latent"

dataset:
  name: "MNIST"
  root: "./data"
  img_size: 32
  channels: 1

vae:
  latent_dim: 4
  scaling_factor: 0.18215
  pretrained_path: "./checkpoints/sd_vae_mnist.pth"

model:
  type: "latent_unet"
  base_channels: 64
  channel_mult: [1, 2]
  num_res_blocks: 2

diffusion:
  timesteps: 1000
  beta_start: 0.00085
  beta_end: 0.0120
  schedule: "scaled_linear"

train:
  batch_size: 256
  lr: 0.0001
  epochs: 15
  device: "cuda"

mlflow:
  experiment_name: "MNIST_Diffusion"

Overwriting configs/stable_diffusion.yaml


In [7]:
print("--- Training Stable Diffusion (Latent Space) ---")
!python train_sd.py

--- Training Stable Diffusion (Latent Space) ---
Encoding images into latent space...
Traceback (most recent call last):
  File "/content/DIFFUSION-COOKBOOK/train_sd.py", line 77, in <module>
    main()
  File "/content/DIFFUSION-COOKBOOK/train_sd.py", line 47, in main
    latent_ds = LatentMNISTDataset(vae, pixel_loader, device)
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/content/DIFFUSION-COOKBOOK/src/dataset.py", line 50, in __init__
    latent = vae.reparameterize(moments) 
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: VAE.reparameterize() missing 1 required positional argument: 'logvar'


In [4]:
# This calls your run_all.py which handles the full sequence
!python run_all.py

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 16 | Loss: 0.0191:   7%|▋         | 35/469 [00:06<01:20,  5.36it/s]
Epoch 16 | Loss: 0.0191:   8%|▊         | 36/469 [00:06<01:20,  5.36it/s]
Epoch 16 | Loss: 0.0195:   8%|▊         | 36/469 [00:07<01:20,  5.36it/s]
Epoch 16 | Loss: 0.0195:   8%|▊         | 37/469 [00:07<01:20,  5.40it/s]
Epoch 16 | Loss: 0.0205:   8%|▊         | 37/469 [00:07<01:20,  5.40it/s]
Epoch 16 | Loss: 0.0205:   8%|▊         | 38/469 [00:07<01:19,  5.40it/s]
Epoch 16 | Loss: 0.0171:   8%|▊         | 38/469 [00:07<01:19,  5.40it/s]
Epoch 16 | Loss: 0.0171:   8%|▊         | 39/469 [00:07<01:19,  5.42it/s]
Epoch 16 | Loss: 0.0203:   8%|▊         | 39/469 [00:07<01:19,  5.42it/s]
Epoch 16 | Loss: 0.0203:   9%|▊         | 40/469 [00:07<01:19,  5.42it/s]
[32mINFO[0m:     127.0.0.1:50570 - "[1mPOST /api/2.0/mlflow/runs/log-metric HTTP/1.1[0m" [32m200 OK[0m
Epoch 16 | Loss: 0.0179:   9%|▊         | 40/469 [00:07<01:19,  5.42it/s]
Epoch 16 | L

In [5]:
import matplotlib.pyplot as plt
from PIL import Image

# Display the comparison grid
if os.path.exists("i2i_comparison.png"):
    img = Image.open("i2i_comparison.png")
    plt.figure(figsize=(20, 10))
    plt.imshow(img)
    plt.title("I2I Benchmark: Source | DDPM | DDIM | Stable Diffusion", fontsize=16)
    plt.axis('off')
    plt.show()
else:
    print("Comparison image not found. Ensure the benchmark script ran successfully.")

Comparison image not found. Ensure the benchmark script ran successfully.
