## Create a SageMaker Model for Deployment
As a first step, we'll import the relevant libraries and configure several global variables such as the hosting image that will be used nd the S3 location of our model artifacts

In [1]:
import sagemaker
from sagemaker.model import Model
from sagemaker import serializers, deserializers
from sagemaker import image_uris
import boto3
import os
import time
import json
from pathlib import Path

In [8]:
role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session()  # sagemaker session for interacting with different AWS APIs
bucket = sess.default_bucket()  # bucket to house artifacts
model_bucket = sess.default_bucket()  # bucket to house artifacts
s3_code_prefix = (
    "stable-diffusion-2/code"  # folder within bucket where code artifact will go
)
s3_model_prefix = "stable-diffusion-2/model"  # folder where model checkpoint will go

region = sess._region_name
account_id = sess.account_id()

s3_client = boto3.client("s3")
sm_client = boto3.client("sagemaker")
smr_client = boto3.client("sagemaker-runtime")

In [9]:
inference_image_uri = (
    f"763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.20.0-deepspeed0.7.5-cu116"
)
print(f"Image going to be used is ---- > {inference_image_uri}")

Image going to be used is ---- > 763104351884.dkr.ecr.us-east-1.amazonaws.com/djl-inference:0.20.0-deepspeed0.7.5-cu116


In [10]:
def deploy_model(image_uri, model_data, role, endpoint_name, instance_type, sagemaker_session):
    
    """Helper function to create the SageMaker Endpoint resources and return a predictor"""
    
    model = Model(
            image_uri=image_uri, 
              model_data=model_data, 
              role=role
             )
    
    model.deploy(
        initial_instance_count=1,
        instance_type=instance_type,
        endpoint_name=endpoint_name
        )
    
    predictor = sagemaker.Predictor(
        endpoint_name=endpoint_name, 
        sagemaker_session=sagemaker_session, 
        serializer=serializers.JSONSerializer(), 
        deserializer=deserializers.JSONDeserializer())
    
    return predictor

## Deploy Model

In [11]:
!cp sd_pagination/model_src/serving_txt2img.properties sd_pagination/serving.properties
!cp sd_pagination/model_src/txt2img.py sd_pagination/model.py

In [12]:
!pygmentize sd_pagination/serving.properties | cat -n

     1	[36mengine[39;49;00m=[33mPython[39;49;00m
     2	[36moption.model_id[39;49;00m=[33mstabilityai/stable-diffusion-2-1-base[39;49;00m
     3	[36moption.dtype[39;49;00m=[33mfp16[39;49;00m


In [13]:
!tar czvf sd_txt_model.tar.gz sd_pagination/

sd_pagination/
sd_pagination/model.py
sd_pagination/diffusers/
sd_pagination/diffusers/MANIFEST.in
sd_pagination/diffusers/CODE_OF_CONDUCT.md
sd_pagination/diffusers/setup.cfg
sd_pagination/diffusers/docs/
sd_pagination/diffusers/docs/source/
sd_pagination/diffusers/docs/source/ko/
sd_pagination/diffusers/docs/source/ko/_toctree.yml
sd_pagination/diffusers/docs/source/ko/quicktour.mdx
sd_pagination/diffusers/docs/source/ko/index.mdx
sd_pagination/diffusers/docs/source/ko/installation.mdx
sd_pagination/diffusers/docs/source/ko/in_translation.mdx
sd_pagination/diffusers/docs/source/en/
sd_pagination/diffusers/docs/source/en/_toctree.yml
sd_pagination/diffusers/docs/source/en/quicktour.mdx
sd_pagination/diffusers/docs/source/en/using-diffusers/
sd_pagination/diffusers/docs/source/en/using-diffusers/reusing_seeds.mdx
sd_pagination/diffusers/docs/source/en/using-diffusers/using_safetensors.mdx
sd_pagination/diffusers/docs/source/en/using-diffusers/using_safetensors
sd_pagination/diffusers/d

In [14]:
sd_s3_code_artifact = sess.upload_data("sd_model.tar.gz", bucket, s3_code_prefix)
print(f"S3 Code or Model tar ball uploaded to --- > {sd_s3_code_artifact}")

S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-east-1-152804913371/stable-diffusion-2/code/sd_model.tar.gz


In [28]:
sd_endpoint_name = sagemaker.utils.name_from_base("stable-diffusion")
sd_predictor = deploy_model(image_uri=inference_image_uri,
                            model_data=sd_s3_code_artifact,
                            role=role,
                            endpoint_name=sd_endpoint_name, 
                            instance_type="ml.g5.4xlarge", 
                            sagemaker_session=sess)

------------------!

In [16]:
from io import BytesIO
from PIL import Image
import base64
from concurrent.futures import ThreadPoolExecutor
import time

In [33]:
import matplotlib.pyplot as plt
from IPython import display
from IPython.core.display import HTML
%matplotlib inline

In [18]:
def decode_image(img):
    buff = BytesIO(base64.b64decode(img.encode("utf8")))
    image = Image.open(buff)
    return image

In [19]:
def invoke_endpoint(predictor, payload):
    result = predictor.predict(payload)
    return result

In [35]:
def run_inference(predictor, payload):
    num_inference_steps = payload["parameters"]["num_inference_steps"]
    steps_completed = 0
    while steps_completed < num_inference_steps:

        if steps_completed == 0:
            payload["parameters"]["starting_step"] = 0
            result = invoke_endpoint(predictor, payload)
        else:
            while not future.done():
                time.sleep(0.1)
            result = future.result()
            
#         images = [decode_image(img) for img in result["images"]]
        images = result["images"]
        steps_completed = result["step"]
                
        payload["parameters"]["starting_step"] = result["step"]
        payload["parameters"]["latents"] = result["latents"]
        with ThreadPoolExecutor(max_workers=1) as e:
            future = e.submit(predictor.predict, payload)
        
        for img in images:
            yield img
            

In [64]:
prompt = """60s cartoon style photo of a Panda bear wearing underground clothes  in far off galaxy, style of Hannah Barbara, studio ghibli, akira toriyama, james gilleard, warner brothers, trending pixiv fanbox, acrylic palette knife, 8k, vibrant colors, devinart, trending on artstation, low details, smooth 
"""
negative_prompt= "ugly, tiling, blurred, watermark, grainy, signature, cut off, draft, amateur, multiple,  text, poor, low, basic, worst, unprofessional"

In [65]:
payload = {"parameters": {"num_inference_steps":50, "guidance_scale":9, "num_images_per_prompt":1, "starting_step":0, "num_interim_images":5, "negative_prompt":negative_prompt}, "prompt": prompt}

In [66]:
it = run_inference(sd_predictor, payload)

In [67]:
for img in it:
    html = f'''<div>
      <img src="data:image/png;base64, {img}" />
    </div>'''
    display.display(HTML(html))
    time.sleep(0.01)

In [59]:
# fig, ax = plt.subplots(1,1, figsize=(10,10))
# ax.axis("off")

# for img in it:
#     ax.imshow(img)
#     fig.canvas.draw()
#     display.display(plt.gcf())
#     display.clear_output(wait=True)

In [27]:
sd_predictor.delete_endpoint()