Skip to content

Commit

Permalink
Added im2img
Browse files Browse the repository at this point in the history
  • Loading branch information
lucataco committed Nov 7, 2023
1 parent 1dd17f4 commit 418193e
Show file tree
Hide file tree
Showing 8 changed files with 221 additions and 23 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
__pycache__
.cog
model-cache
safety-cache
sdxl-cache
15 changes: 12 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,19 @@ First, download the pre-trained weights:

Then, you can run predictions:

cog predict -i prompt="with smoke, half ice and half fire and ultra realistic in detail.wolf, typography, dark fantasy, wildlife photography, vibrant, cinematic and on a black background" -i seed=36446545871
cog predict -i prompt="with smoke, half ice and half fire and ultra realistic in detail.wolf, typography, dark fantasy, wildlife photography, vibrant, cinematic and on a black background" -i seed=36446545872

## Example:
Or img2img:

"with smoke, half ice and half fire and ultra realistic in detail.wolf, typography, dark fantasy, wildlife photography, vibrant, cinematic and on a black background"
cog predict -i image=@output.0.png -i prompt="a wolf with pink and blue fur" -i seed=21272 -i disable_safety_checker=True


## Examples:

txt2img

![alt text](output.0.png)

img2img

![alt text](output.img2img.png)
12 changes: 7 additions & 5 deletions cog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ build:
python_version: "3.11"
python_packages:
- "torch==2.0.1"
- "torchvision"
- "transformers"
- "accelerate"
- "safetensors"
- "git+https://github.com/huggingface/diffusers"
- "torchvision==0.15.2"
- "transformers==4.31.0"
- "diffusers==0.22.0"
- "accelerate==0.21.0"

run:
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.0.3/pget" && chmod +x /usr/local/bin/pget

# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"
20 changes: 20 additions & 0 deletions feature-extractor/preprocessor_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"crop_size": 224,
"do_center_crop": true,
"do_convert_rgb": true,
"do_normalize": true,
"do_resize": true,
"feature_extractor_type": "CLIPFeatureExtractor",
"image_mean": [
0.48145466,
0.4578275,
0.40821073
],
"image_std": [
0.26862954,
0.26130258,
0.27577711
],
"resample": 3,
"size": 224
}
Binary file modified output.0.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added output.img2img.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
179 changes: 167 additions & 12 deletions predict.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,34 @@

from cog import BasePredictor, Input, Path
import os
import time
import torch
import shutil
import subprocess
import numpy as np
from typing import List
from diffusers.utils import load_image
from transformers import CLIPImageProcessor
from diffusers import (
StableDiffusionXLPipeline,
DiffusionPipeline,
StableDiffusionXLImg2ImgPipeline,
StableDiffusionXLInpaintPipeline,
DDIMScheduler,
DPMSolverMultistepScheduler,
EulerAncestralDiscreteScheduler,
EulerDiscreteScheduler,
HeunDiscreteScheduler,
PNDMScheduler
)
from typing import List
from diffusers.pipelines.stable_diffusion.safety_checker import (
StableDiffusionSafetyChecker,
)

MODEL_NAME = "segmind/SSD-1B"
MODEL_CACHE = "model-cache"
MODEL_CACHE = "./sdxl-cache"
SAFETY_CACHE = "./safety-cache"
FEATURE_EXTRACTOR = "./feature-extractor"
SAFETY_URL = "https://weights.replicate.delivery/default/sdxl/safety-1.0.tar"

class KarrasDPM:
def from_config(config):
Expand All @@ -31,15 +45,72 @@ def from_config(config):
"PNDM": PNDMScheduler,
}

def download_weights(url, dest):
start = time.time()
print("downloading url: ", url)
print("downloading to: ", dest)
subprocess.check_call(["pget", "-x", url, dest], close_fds=False)
print("downloading took: ", time.time() - start)

class Predictor(BasePredictor):
def setup(self) -> None:
"""Load the model into memory to make running multiple predictions efficient"""
self.pipe = StableDiffusionXLPipeline.from_pretrained(
start = time.time()
self.tuned_model = False
self.is_lora = False

print("Loading safety checker...")
if not os.path.exists(SAFETY_CACHE):
download_weights(SAFETY_URL, SAFETY_CACHE)
self.safety_checker = StableDiffusionSafetyChecker.from_pretrained(
SAFETY_CACHE, torch_dtype=torch.float16
).to("cuda")
self.feature_extractor = CLIPImageProcessor.from_pretrained(FEATURE_EXTRACTOR)

self.txt2img_pipe = DiffusionPipeline.from_pretrained(
MODEL_CACHE,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16",
).to("cuda")
print("Loading SDXL img2img pipeline...")
self.img2img_pipe = StableDiffusionXLImg2ImgPipeline(
vae=self.txt2img_pipe.vae,
text_encoder=self.txt2img_pipe.text_encoder,
text_encoder_2=self.txt2img_pipe.text_encoder_2,
tokenizer=self.txt2img_pipe.tokenizer,
tokenizer_2=self.txt2img_pipe.tokenizer_2,
unet=self.txt2img_pipe.unet,
scheduler=self.txt2img_pipe.scheduler,
)
self.img2img_pipe.to("cuda")
print("Loading SDXL inpaint pipeline...")
self.inpaint_pipe = StableDiffusionXLInpaintPipeline(
vae=self.txt2img_pipe.vae,
text_encoder=self.txt2img_pipe.text_encoder,
text_encoder_2=self.txt2img_pipe.text_encoder_2,
tokenizer=self.txt2img_pipe.tokenizer,
tokenizer_2=self.txt2img_pipe.tokenizer_2,
unet=self.txt2img_pipe.unet,
scheduler=self.txt2img_pipe.scheduler,
)
self.inpaint_pipe.to("cuda")
print("setup took: ", time.time() - start)

def load_image(self, path):
shutil.copyfile(path, "/tmp/image.png")
return load_image("/tmp/image.png").convert("RGB")

def run_safety_checker(self, image):
safety_checker_input = self.feature_extractor(image, return_tensors="pt").to(
"cuda"
)
np_image = [np.array(val) for val in image]
image, has_nsfw_concept = self.safety_checker(
images=np_image,
clip_input=safety_checker_input.pixel_values.to(torch.float16),
)
return image, has_nsfw_concept

@torch.inference_mode()
def predict(
Expand All @@ -52,6 +123,14 @@ def predict(
description="Negative Input prompt",
default="scary, cartoon, painting"
),
image: Path = Input(
description="Input image for img2img or inpaint mode",
default=None,
),
mask: Path = Input(
description="Input mask for inpaint mode. Black areas will be preserved, white areas will be inpainted.",
default=None,
),
width: int = Input(
description="Width of output image",
default=768
Expand All @@ -77,17 +156,78 @@ def predict(
guidance_scale: float = Input(
description="Scale for classifier-free guidance", ge=1, le=50, default=7.5
),
prompt_strength: float = Input(
description="Prompt strength when using img2img / inpaint. 1.0 corresponds to full destruction of information in image",
ge=0.0,
le=1.0,
default=0.8,
),
seed: int = Input(
description="Random seed. Leave blank to randomize the seed", default=None
),
apply_watermark: bool = Input(
description="Applies a watermark to enable determining if an image is generated in downstream applications. If you have other provisions for generating or deploying images safely, you can use this to disable watermarking.",
default=True,
),
lora_scale: float = Input(
description="LoRA additive scale. Only applicable on trained models.",
ge=0.0,
le=1.0,
default=0.6,
),
replicate_weights: str = Input(
description="Replicate LoRA weights to use. Leave blank to use the default weights.",
default=None,
),
disable_safety_checker: bool = Input(
description="Disable safety checker for generated images. This feature is only available through the API. See https://replicate.com/docs/how-does-replicate-work#safety",
default=False
)
) -> List[Path]:
"""Run a single prediction on the model"""
if seed is None:
seed = int.from_bytes(os.urandom(2), "big")
print(f"Using seed: {seed}")
generator = torch.Generator("cuda").manual_seed(seed)

self.pipe.scheduler = SCHEDULERS[scheduler].from_config(self.pipe.scheduler.config)
if replicate_weights:
self.load_trained_weights(replicate_weights, self.txt2img_pipe)

# OOMs can leave vae in bad state
if self.txt2img_pipe.vae.dtype == torch.float32:
self.txt2img_pipe.vae.to(dtype=torch.float16)

sdxl_kwargs = {}
if self.tuned_model:
# consistency with fine-tuning API
for k, v in self.token_map.items():
prompt = prompt.replace(k, v)
print(f"Prompt: {prompt}")
if image and mask:
print("inpainting mode")
sdxl_kwargs["image"] = self.load_image(image)
sdxl_kwargs["mask_image"] = self.load_image(mask)
sdxl_kwargs["strength"] = prompt_strength
sdxl_kwargs["width"] = width
sdxl_kwargs["height"] = height
pipe = self.inpaint_pipe
elif image:
print("img2img mode")
sdxl_kwargs["image"] = self.load_image(image)
sdxl_kwargs["strength"] = prompt_strength
pipe = self.img2img_pipe
else:
print("txt2img mode")
sdxl_kwargs["width"] = width
sdxl_kwargs["height"] = height
pipe = self.txt2img_pipe

# toggles watermark for this prediction
if not apply_watermark:
watermark_cache = pipe.watermark
pipe.watermark = None

pipe.scheduler = SCHEDULERS[scheduler].from_config(pipe.scheduler.config)
generator = torch.Generator("cuda").manual_seed(seed)

common_args = {
"prompt": [prompt] * num_outputs,
Expand All @@ -97,16 +237,31 @@ def predict(
"num_inference_steps": num_inference_steps,
}

sdxl_kwargs = {}
sdxl_kwargs["width"] = width
sdxl_kwargs["height"] = height
if self.is_lora:
sdxl_kwargs["cross_attention_kwargs"] = {"scale": lora_scale}

output = self.pipe(**common_args, **sdxl_kwargs)
output = pipe(**common_args, **sdxl_kwargs)

if not apply_watermark:
pipe.watermark = watermark_cache
self.refiner.watermark = watermark_cache

if not disable_safety_checker:
_, has_nsfw_content = self.run_safety_checker(output.images)

output_paths = []
for i, _ in enumerate(output.images):
for i, image in enumerate(output.images):
if not disable_safety_checker:
if has_nsfw_content[i]:
print(f"NSFW content detected in image {i}")
continue
output_path = f"/tmp/out-{i}.png"
output.images[i].save(output_path)
image.save(output_path)
output_paths.append(Path(output_path))

if len(output_paths) == 0:
raise Exception(
f"NSFW content detected. Try running it again, or try a different prompt."
)

return output_paths
15 changes: 13 additions & 2 deletions script/download-weights
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ import os
import sys
import torch
from diffusers import StableDiffusionXLPipeline
from diffusers.pipelines.stable_diffusion.safety_checker import (
StableDiffusionSafetyChecker,
)

# append project directory to path so predict.py can be imported
sys.path.append('.')
Expand All @@ -14,11 +17,19 @@ from predict import MODEL_NAME, MODEL_CACHE
if not os.path.exists(MODEL_CACHE):
os.makedirs(MODEL_CACHE)

# SD-XL-Base-1.0 refiner
# SSD-1B
pipe = StableDiffusionXLPipeline.from_pretrained(
MODEL_NAME,
torch_dtype=torch.float16,
use_safetensors=True,
variant="fp16",
)
pipe.save_pretrained(MODEL_CACHE, safe_serialization=True)
pipe.save_pretrained(MODEL_CACHE, safe_serialization=True)


# safety checker
safety = StableDiffusionSafetyChecker.from_pretrained(
"CompVis/stable-diffusion-safety-checker",
torch_dtype=torch.float16,
)
safety.save_pretrained("./safety-cache")

0 comments on commit 418193e

Please sign in to comment.