In [1]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets
from diffusers import StableDiffusionPipeline, DDPMScheduler

# 1. Define Dataset and Transformations
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

data_dir = "modified-mini-GCD"
train_dir = os.path.join(data_dir, "train")
test_dir = os.path.join(data_dir, "test")

train_dataset = datasets.ImageFolder(root=train_dir, transform=transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False)

# 2. Load Pre-trained Diffusion Model
model_id = "CompVis/stable-diffusion-v1-4"
model = StableDiffusionPipeline.from_pretrained(model_id)
scheduler = DDPMScheduler.from_pretrained(model_id, subfolder="scheduler")

# 3. Define the Classifier Head
class ClassifierHead(nn.Module):
    def __init__(self, input_dim, num_classes):
        super(ClassifierHead, self).__init__()
        self.fc = nn.Linear(input_dim, num_classes)

    def forward(self, x):
        return self.fc(x)

# Assuming the diffusion model's output dimension is 512
input_dim = 512
num_classes = len(train_dataset.classes)
classifier_head = ClassifierHead(input_dim, num_classes)

# 4. Define Loss Function and Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(classifier_head.parameters(), lr=0.001)

# 5. Fine-tune the Model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
classifier_head.to(device)

num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    classifier_head.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass through the diffusion model
        with torch.no_grad():
            features = model(images, output_type="latent")

        # Forward pass through the classifier head
        outputs = classifier_head(features)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}")

# 6. Evaluate the Model
model.eval()
classifier_head.eval()
correct = 0
total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images = images.to(device)
        labels = labels.to(device)

        # Forward pass through the diffusion model
        features = model(images, output_type="latent")

        # Forward pass through the classifier head
        outputs = classifier_head(features)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy: {100 * correct / total}%")


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

AttributeError: 'StableDiffusionPipeline' object has no attribute 'train'