# 🧪 Fine-tune DDPM on Mango Leaf Dataset (Kaggle Ready)

In [None]:
!pip install diffusers==0.27.2 transformers accelerate -q

In [None]:
import os
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from tqdm import tqdm
from diffusers import UNet2DModel, DDPMScheduler
from accelerate import Accelerator

class MangoLeafDataset(Dataset):
    def __init__(self, folder):
        self.image_paths = [os.path.join(folder, f) for f in os.listdir(folder) if f.endswith(('.jpg', '.png'))]
        self.transform = transforms.Compose([
            transforms.Resize((64, 64)),
            transforms.ToTensor(),
            transforms.Normalize([0.5], [0.5])
        ])
    def __len__(self):
        return len(self.image_paths)
    def __getitem__(self, idx):
        image = Image.open(self.image_paths[idx]).convert("RGB")
        return self.transform(image)

# Set path to a class folder
dataset_path = "/kaggle/input/mango-dataset/Anthracnose"
dataset = MangoLeafDataset(dataset_path)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

# Define Model
model = UNet2DModel(
    sample_size=64, in_channels=3, out_channels=3,
    layers_per_block=2,
    block_out_channels=(64, 128, 128),
    down_block_types=("DownBlock2D", "DownBlock2D", "AttnDownBlock2D"),
    up_block_types=("AttnUpBlock2D", "UpBlock2D", "UpBlock2D")
)
scheduler = DDPMScheduler(num_train_timesteps=1000)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
accelerator = Accelerator()
model, optimizer, dataloader = accelerator.prepare(model, optimizer, dataloader)

# Train
model.train()
for epoch in range(10):
    for batch in tqdm(dataloader):
        noise = torch.randn_like(batch)
        timesteps = torch.randint(0, scheduler.config.num_train_timesteps, (batch.shape[0],), device=batch.device).long()
        noisy_images = scheduler.add_noise(batch, noise, timesteps)
        noise_pred = model(noisy_images, timesteps).sample
        loss = torch.nn.functional.mse_loss(noise_pred, noise)
        optimizer.zero_grad()
        accelerator.backward(loss)
        optimizer.step()

# Save model
model.save_pretrained("/kaggle/working/AnthracnoseDiffusionModel")

In [None]:
# Generate 10,000 Images
from diffusers import UNet2DModel, DDPMScheduler
from torchvision.utils import save_image
import os

model = UNet2DModel.from_pretrained("/kaggle/working/AnthracnoseDiffusionModel").to("cuda")
scheduler = DDPMScheduler(num_train_timesteps=1000)
model.eval()
os.makedirs("/kaggle/working/AnthracnoseGenerated", exist_ok=True)

for i in range(10000):
    x = torch.randn((1, 3, 64, 64)).to("cuda")
    for t in scheduler.timesteps:
        with torch.no_grad():
            residual = model(x, t).sample
        x = scheduler.step(residual, t, x).prev_sample
    save_image(x, f"/kaggle/working/AnthracnoseGenerated/image_{i:05}.png", normalize=True)