Add support for the timbrooks/instruct-pix2pix model

livepeer · Apr 11, 2024 · 77c1c14 · 77c1c14
1 parent 74e31fc
commit 77c1c14
Show file tree

Hide file tree

Showing 2 changed files with 10 additions and 0 deletions.
diff --git a/runner/app/pipelines/image_to_image.py b/runner/app/pipelines/image_to_image.py
@@ -3,6 +3,7 @@
 
 from diffusers import (
     AutoPipelineForImage2Image,
+    StableDiffusionInstructPix2PixPipeline,
     StableDiffusionXLPipeline,
     UNet2DConditionModel,
     EulerDiscreteScheduler,
@@ -22,6 +23,7 @@
 logger = logging.getLogger(__name__)
 
 SDXL_LIGHTNING_MODEL_ID = "ByteDance/SDXL-Lightning"
+PIX2PIX_MODEL_ID = "timbrooks/instruct-pix2pix"
 
 
 class ImageToImagePipeline(Pipeline):
@@ -87,6 +89,10 @@ def __init__(self, model_id: str):
             self.ldm.scheduler = EulerDiscreteScheduler.from_config(
                 self.ldm.scheduler.config, timestep_spacing="trailing"
             )
+        elif PIX2PIX_MODEL_ID in model_id:
+            self.ldm = StableDiffusionInstructPix2PixPipeline.from_pretrained(
+                model_id, **kwargs
+            ).to(torch_device)
         else:
             self.ldm = AutoPipelineForImage2Image.from_pretrained(
                 model_id, **kwargs

diff --git a/runner/dl_checkpoints.sh b/runner/dl_checkpoints.sh
@@ -60,6 +60,7 @@ if [ "$MODE" = "alpha" ]; then
 
     # Download text-to-image and image-to-image models.
     huggingface-cli download ByteDance/SDXL-Lightning --include "*unet.safetensors" --exclude "*lora.safetensors*" --cache-dir models
+    huggingface-cli download timbrooks/instruct-pix2pix --include "*fp16.safetensors" --exclude "*lora.safetensors*" --cache-dir models
 
     # Download image-to-video models (token-gated).
     printf "\nDownloading token-gated models...\n"
@@ -80,6 +81,9 @@ else
     # Download image-to-video models.
     huggingface-cli download stabilityai/stable-video-diffusion-img2vid-xt --include "*.fp16.safetensors" "*.json" --cache-dir models
 
+    # Download text-to-video models.
+    huggingface-cli download ali-vilab/text-to-video-ms-1.7b --include "*.fp16.safetensors" "*.json" --cache-dir models
+
     # Download image-to-video models (token-gated).
     printf "\nDownloading token-gated models...\n"
     check_hf_auth