## Stable Diffusion

In [1]:
! python -m pip install --upgrade pip
! pip install botocore --upgrade --quiet

Collecting pip
  Using cached pip-23.0.1-py3-none-any.whl (2.1 MB)
Installing collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.0
    Uninstalling pip-23.0:
      Successfully uninstalled pip-23.0
Successfully installed pip-23.0.1
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
awscli 1.27.70 requires botocore==1.29.70, but you have botocore 1.29.86 which is incompatible.[0m[31m
[0m

In [3]:
!pip install "sagemaker==2.116.0" "huggingface_hub==0.10.1" --upgrade --quiet

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
transformers 4.26.1 requires huggingface-hub<1.0,>=0.11.0, but you have huggingface-hub 0.10.1 which is incompatible.[0m[31m
[0m

In [4]:
import sagemaker
import boto3
sess = sagemaker.Session()
# sagemaker session bucket -> used for uploading data, models and logs
# sagemaker will automatically create this bucket if it not exists
sagemaker_session_bucket=None
if sagemaker_session_bucket is None and sess is not None:
    # set to default bucket if a bucket name is not given
    sagemaker_session_bucket = sess.default_bucket()

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

sess = sagemaker.Session(default_bucket=sagemaker_session_bucket)

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {sess.boto_region_name}")

sagemaker role arn: arn:aws:iam::687912291502:role/service-role/AmazonSageMaker-ExecutionRole-20220807T143615
sagemaker bucket: sagemaker-ap-southeast-1-687912291502
sagemaker session region: ap-southeast-1


### Prepare BYOS code

In [6]:
!mkdir code

In [7]:
%%writefile code/requirements.txt
sentencepiece==0.1.97
accelerate==0.14.0
diffusers==0.9.0
transformers==4.24.0
huggingface-hub==0.11.1
tokenizers==0.12.1
ftfy==6.1.1
deepspeed==0.7.4
deepspeed-mii==0.0.3
triton==2.0.0.dev20221005
clip==0.2.0

Writing code/requirements.txt


In [8]:
%%writefile code/inference.py
import base64
import torch
from io import BytesIO
from diffusers import StableDiffusionPipeline,DiffusionPipeline
import deepspeed


def model_fn(model_dir):

    # Load stable diffusion and move it to the GPU
    pipe = StableDiffusionPipeline.from_pretrained(model_dir, torch_dtype=torch.float16, revision="fp16")
    pipe=deepspeed.init_inference(
        model=getattr(pipe,"model", pipe),      # Transformers models
        mp_size=1,        # Number of GPU
        dtype=torch.float16, # dtype of the weights (fp16)
        replace_method="auto", # Lets DS autmatically identify the layer to replace
        replace_with_kernel_inject=False, # replace the model with the kernel injector
    )

    print("!!!!DeepSpeed Inference Engine initialized!!!!!!!!")
    pipe = pipe.to("cuda")
    torch.cuda.synchronize("cuda")
    return pipe


def predict_fn(data, pipe):

    # get prompt & parameters
    prompt = data.pop("inputs", data)
    print(prompt)
    # set valid HP for stable diffusion
    num_inference_steps = data.pop("num_inference_steps", 50)
    guidance_scale = data.pop("guidance_scale", 7.5)
    num_images_per_prompt = data.pop("num_images_per_prompt", 4)
    width = data.pop("width", 512)
    height = data.pop("height", 512)

    # run generation with parameters
    generated_images = pipe(
        prompt,
        #num_inference_steps=num_inference_steps,
        #guidance_scale=guidance_scale,
        height=height,
        width=width,
        num_images_per_prompt=num_images_per_prompt
    )["images"]

    # create response
    encoded_images = []
    for image in generated_images:
        buffered = BytesIO()
        image.save(buffered, format="JPEG")
        encoded_images.append(base64.b64encode(buffered.getvalue()).decode())

    # create response
    return {"generated_images": encoded_images}


Writing code/inference.py


### Prepare pre-trained SD model

In [None]:
from distutils.dir_util import copy_tree
from pathlib import Path
from huggingface_hub import snapshot_download
import random

HF_MODEL_ID="CompVis/stable-diffusion-v1-4"
HF_TOKEN="" # your hf token: https://huggingface.co/settings/tokens
assert len(HF_TOKEN) > 0, "Please set HF_TOKEN to your huggingface token. You can find it here: https://huggingface.co/settings/tokens"

# download snapshot
snapshot_dir = snapshot_download(repo_id=HF_MODEL_ID,revision="fp16",use_auth_token=HF_TOKEN)

# create model dir
model_tar = Path(f"model-{random.getrandbits(16)}")
model_tar.mkdir(exist_ok=True)

# copy snapshot to model dir
copy_tree(snapshot_dir, str(model_tar))

In [None]:
# copy code/ to model dir
copy_tree("code/", str(model_tar.joinpath("code")))

In [None]:
import tarfile
import os

# helper to create the model.tar.gz
def compress(tar_dir=None,output_file="model.tar.gz"):
    parent_dir=os.getcwd()
    os.chdir(tar_dir)
    with tarfile.open(os.path.join(parent_dir, output_file), "w:gz") as tar:
        for item in os.listdir('.'):
          print(item)
          tar.add(item, arcname=item)
    os.chdir(parent_dir)

compress(str(model_tar))

In [None]:
from sagemaker.s3 import S3Uploader

# upload model.tar.gz to s3
s3_model_uri=S3Uploader.upload(local_path="model.tar.gz", desired_s3_uri=f"s3://{sess.default_bucket()}/stable-diffusion-v1-4")

print(f"model uploaded to: {s3_model_uri}")

In [None]:
#!aws s3 ls s3://sagemaker-ap-southeast-1-687912291502/stable-diffusion/models/768-v-ema.ckpt
#! aws s3 cp s3://sagemaker-ap-southeast-1-687912291502/stable-diffusion/models/768-v-ema.yaml ./models_ckpt/
#!pip install diffusers==0.14.0
#!cd /root/dreambooth/models_safetensor/ && wget https://huggingface.co/Lykon/DreamShaper/resolve/main/DreamShaper_3.3_baked_vae.safetensors
#!python convert_original_stable_diffusion_to_diffusers.py  --checkpoint_path ./models_safetensor/DreamShaper_3.3_baked_vae.safetensors  --from_safetensor --dump_path ./models_diffuser
#!python convert_original_stable_diffusion_to_diffusers.py  --checkpoint_path ./models_ckpt/768-v-ema.ckpt   --dump_path ./models_diffuser
!pip3 install --upgrade diffusers[torch]
!pip3 install transformers
!pip3 install omegaconf
!pip3 install safetensors


Collecting huggingface-hub<1.0,>=0.11.0
  Using cached huggingface_hub-0.12.1-py3-none-any.whl (190 kB)
Installing collected packages: huggingface-hub
  Attempting uninstall: huggingface-hub
    Found existing installation: huggingface-hub 0.10.1
    Uninstalling huggingface-hub-0.10.1:
      Successfully uninstalled huggingface-hub-0.10.1


In [9]:
import os
import json
from diffusers import StableDiffusionPipeline
from diffusers import StableDiffusionImg2ImgPipeline
import boto3
import sagemaker
import uuid
import torch
from torch import autocast
from PIL import Image
import io
import requests
import traceback
import os
import json
import torch
from diffusers import StableDiffusionPipeline
from diffusers import StableDiffusionImg2ImgPipeline
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
import deepspeed



model_dir='/root/dreambooth/models_diffuser/'
model = StableDiffusionPipeline.from_pretrained(model_dir, torch_dtype=torch.float16, revision="fp16")
print("model loaded:",model)
 
torch.backends.cudnn.benchmark = True
torch.backends.cuda.matmul.allow_tf32 = True


try:
    print("begin load deepspeed....")    
    model=deepspeed.init_inference(
        model=getattr(model,"model", model),      # Transformers models
        mp_size=1,        # Number of GPU
        dtype=torch.float16, # dtype of the weights (fp16)
        replace_method="auto", # Lets DS autmatically identify the layer to replace
        replace_with_kernel_inject=False, # replace the model with the kernel injector
    )
    print('model accelarate with deepspeed!')
except Exception as e:
    print("deepspeed accelarate excpetion!")
    print(e)
    
model = model.to("cuda")
model.enable_attention_slicing()    

model loaded: StableDiffusionPipeline {
  "_class_name": "StableDiffusionPipeline",
  "_diffusers_version": "0.14.0",
  "feature_extractor": [
    null,
    null
  ],
  "requires_safety_checker": false,
  "safety_checker": [
    null,
    null
  ],
  "scheduler": [
    "diffusers",
    "PNDMScheduler"
  ],
  "text_encoder": [
    "transformers",
    "CLIPTextModel"
  ],
  "tokenizer": [
    "transformers",
    "CLIPTokenizer"
  ],
  "unet": [
    "diffusers",
    "UNet2DConditionModel"
  ],
  "vae": [
    "diffusers",
    "AutoencoderKL"
  ]
}

begin load deepspeed....
[2023-03-08 10:44:01,507] [INFO] [logging.py:77:log_dist] [Rank -1] DeepSpeed info: version=0.8.2, git-hash=unknown, git-branch=unknown
[2023-03-08 10:44:01,509] [INFO] [logging.py:77:log_dist] [Rank -1] quantize_bits = 8 mlp_extra_grouping = False, quantize_groups = 1
deepspeed accelarate excpetion!
'StableDiffusionPipeline' object has no attribute 'children'


### Deploy

In [None]:
from sagemaker.huggingface.model import HuggingFaceModel

#s3://sagemaker-us-east-1-549828897912/stable-diffusion-v1-4/model.tar.gz

# create Hugging Face Model Class
huggingface_model = HuggingFaceModel(
   model_data=s3_model_uri,      # path to your model and script
   role=role,                    # iam role with permissions to create an Endpoint
   transformers_version="4.17",  # transformers version used
   pytorch_version="1.10",       # pytorch version used
   py_version='py38',            # python version used
)

# deploy the endpoint endpoint
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    instance_type="ml.g5.xlarge"
    )

### Inference

In [None]:
from PIL import Image
from io import BytesIO
from IPython.display import display
import base64
import matplotlib.pyplot as plt

# helper decoder
def decode_base64_image(image_string):
  base64_image = base64.b64decode(image_string)
  buffer = BytesIO(base64_image)
  return Image.open(buffer)

# display PIL images as grid
def display_images(images=None,columns=3, width=100, height=100):
    plt.figure(figsize=(width, height))
    for i, image in enumerate(images):
        plt.subplot(int(len(images) / columns + 1), columns, i + 1)
        plt.axis('off')
        plt.imshow(image)

In [None]:
%%time
en = predictor.endpoint_name
prompt = "A dog trying catch a flying pizza art drawn by disney concept artists, golden colour, high quality, highly detailed, elegant, sharp focus"
prompt = "portrait photo headshot by mucha, sharp focus, elegant, render, octane, detailed, award winning photography, masterpiece, rim lit"
prompt = "priest, blue robes, 68 year old nun, national geographic, portrait, photo, photography"
prompt = "hotel room with a swimming pool outside of the window, TV on the table, moon in the sky"
#prompt = "那人却在灯火阑珊处，色彩艳丽，古风，资深插画师作品，桌面高清壁纸 Van Gogh style"#3D绘画
#prompt = "interior design, open plan, kitchen and living room, modular furniture with cotton textiles, wooden floor, high ceiling, large steel windows viewing a city"
prompt = "小桥流水人家，Van Gogh style"
# run prediction
response = predictor.predict(data={
  "inputs": prompt
  }
)

# decode images
decoded_images = [decode_base64_image(image) for image in response["generated_images"]]

# visualize generation
display_images(decoded_images)

### Delete

In [None]:
predictor.delete_model()
predictor.delete_endpoint()