In [19]:
import inspect
import math
import os
import pdb
import datetime
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Callable, Iterable, List, Optional, Union, Literal
import pandas as pd
import diffusers
import numpy as np
import torch
import torch.nn.functional as F
from diffusers import StableDiffusionInpaintPipeline, StableDiffusionPipeline, DPMSolverMultistepScheduler
from IPython.display import display
from PIL import Image
from torch import Tensor, autocast
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import functional as TVF
from torchvision.utils import make_grid
from tqdm.rich import tqdm, trange
from transformers import AutoTokenizer

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

In [1]:
from datasets import load_dataset

# Load the sample dataset
cache_directory = "/n/scratch/users/t/thb286/hf_cache"

dataset = load_dataset("tbuckley/synthetic-derm-10k", cache_dir=cache_directory)

In [6]:
dataset["train"][0]["json"]

{'generation_num': '00',
 'label': 'all',
 'md5hash': '0393479daef8938467fae9bfd1f7b358',
 'method': 'finetune_inpaint',
 'name': 'finetune_inpaint_all_inpaint-outpaint_00_0393479daef8938467fae9bfd1f7b358.png',
 'submethod': 'inpaint-outpaint'}

In [58]:
from datasets import load_dataset, Image

# Load the sample dataset
cache_directory = "/n/scratch/users/t/thb286/hf_cache"

dataset = load_dataset("tbuckley/synthetic-derm-10k", cache_dir=cache_directory)

# Ensure the 'png' column is treated as images
dataset = dataset.cast_column('png', Image())

# Device and autograd
ctx = torch.inference_mode()
ctx.__enter__()
device = 'cuda'
dtype = torch.float16

# Set up the experiment
prompt = 'An image of {}, a skin disease'
resolution = 512
batch_size = 16
model_type = "text-to-image"
#pretrained_model_name_or_path = "runwayml/stable-diffusion-inpainting"
pretrained_model_name_or_path = "stabilityai/stable-diffusion-2-1-base"
start_index = 0
num_generations_per_image = 1
seed = 42
guidance_scale = 3.0
num_inference_steps = 50




In [59]:
# Model
print('Loading model')
if model_type == 'inpaint':
    pipeline = StableDiffusionInpaintPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=dtype,
        safety_checker=None, feature_extractor=None, requires_safety_checker=False)
elif model_type == 'text-to-image':
    pipeline = StableDiffusionPipeline.from_pretrained(pretrained_model_name_or_path, torch_dtype=dtype,
        safety_checker=None, feature_extractor=None, requires_safety_checker=False)
else:
    raise ValueError(model_type)
pipeline.set_progress_bar_config(disable=True)
pipeline.to(device)

print(f'Loaded pipeline with {sum(p.numel() for p in pipeline.unet.parameters()):_} unet parameters')


Loading model


model_index.json:   0%|          | 0.00/543 [00:00<?, ?B/s]

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]

text_encoder/config.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/460 [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/346 [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/807 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.36G [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/911 [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/553 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.46G [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loaded pipeline with 865_910_724 unet parameters


In [60]:
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms

class CustomDataset(Dataset):
    def __init__(self, hf_dataset, instance_prompt, transform=None):
        self.dataset = hf_dataset
        self.transform = transform
        self.instance_prompt = instance_prompt

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        entry = self.dataset[idx]

        image_name = entry["json"]["md5hash"]
        prompt = self.instance_prompt.format(entry["json"]["label"])
        image = entry["png"]

        if self.transform:
            image = self.transform(image)

        return {"prompt": prompt, "image_name": image_name, "pixel_values": image}


In [61]:
transform = transforms.Compose([
    transforms.Resize((resolution, resolution)),
    transforms.ToTensor(),
    transforms.Normalize([0.5], [0.5]),  # Normalize images to [-0.5, 0.5]
])

In [62]:
from torch.utils.data import DataLoader

custom_dataset = CustomDataset(dataset['train'], instance_prompt = prompt, transform=transform)
dataloader = DataLoader(custom_dataset, batch_size=batch_size, shuffle=True)


In [63]:
# Randomness
generator = torch.Generator(device=device)
generator.manual_seed(seed + start_index)

<torch._C.Generator at 0x7f19b0937b10>

In [64]:
# Parse args
output_dir_path = Path("/n/scratch/users/t/thb286/generation_test")

def get_output_paths(batch: dict, stage: str, idx: int) -> list[Path]:
    return [
        output_dir_path / stage / f'{idx:02d}' / f'{image_name}.png'
        for image_name in batch['image_name']
    ]

def save(image, path):
    path = Path(path) if isinstance(path, str) else path
    path.parent.mkdir(exist_ok=True, parents=True)
    image.save(path)



In [65]:
# text-to-image generation
for idx in range(start_index, start_index + num_generations_per_image):
    for batch_idx, batch in enumerate(tqdm(dataloader)):

        # Shared arguments
        gen_kwargs = dict(
            prompt=batch["prompt"],
            guidance_scale=guidance_scale,
            generator=generator,
            num_inference_steps=num_inference_steps,
            height=resolution,
            width=resolution,
        )

        # Text-to-image
        if model_type == 'text-to-image':
            output_paths = get_output_paths(batch, 'text-to-image', idx)
            if all(output_path.is_file() for output_path in output_paths):
                continue  # Images have already been generated, skip this batch

            # Generate images
            images = pipeline(**gen_kwargs).images
            assert len(images) == len(output_paths)
            for image, path in zip(images, output_paths):
                save(image, path)

            # Image grid
            if batch_idx < 10:
                grid_images = [transforms.ToTensor()(img) for img in images]
                original_images = [img * 0.5 + 0.5 for img in batch["pixel_values"]]
                grid = make_grid(grid_images + original_images, nrow=batch_size, padding=4, pad_value=1.0)
                grid_path = output_dir_path / "grid" / f'{idx:02d}-batch-{batch_idx:02d}.png'
                save(transforms.ToPILImage()(grid), grid_path)

                if batch_idx % 1000 == 0:
                    print(f'[Repeat {idx}, batch {batch_idx}] Saved image grid to {grid_path}')


Output()

  for batch_idx, batch in enumerate(tqdm(dataloader)):


KeyboardInterrupt: 

In [68]:
image_path = "/n/scratch/users/t/thb286/generation_test/grid/00-batch-00.png"
from PIL import Image
from IPython.display import display
#display(Image.open(image_path))

In [None]:
# Generate a single batch of synthetic images, and display in this notebook using the Grid format that Luke designed

In [None]:
# Run the following script to generate lots of images

In [None]:
# In this notebook, generate about 100 synthetic images from a dummy dataset (with labels) to demonstrate how it works. There should be streamlined functionality to do this (just choose method, backbone, etc.) 

# generate_synthetic_dataset(
# real_images, # needs to be a metadata dataframe -- containing where to find an image, the label, etc.
# map_real_to_synthetic_label,  (this would be the hash for fitz, or some kind of unique ID which we need to store)
# method = "text-to-image", "inpaint", "outpaint",
# text_label, (for text-to-image, this is the image description "label")
# text_prompt,   # the prompt that will be applied to the label -- this is optional
# num_synthetic_per_real,  (defaults to 10)
# num_total, # option for specifying the total dataset size we want -- we will handle dataset balance
# num_total_type, # options are balanced, same --  works with num_total, do we want to make a balanced dataset or keep the same proportions
# output_dir,
# model_path = "xxx" can set this if we want to use a custom model
# )

# Then, we can show how to run the script for generating larger amounts of images

In [None]:
! python generate.py --output_root generations-pretrained --instance_data_dir=${FITZPATRICK17K_DATASET_DIR} --model_type "text-to-image" --pretrained_model_name_or_path="stabilityai/stable-diffusion-2-1-base" --instance_prompt="An image of {}, a skin disease" --disease_class=allergic-contact-dermatitis