# Setup

In [21]:
import os
!pip install huggingface_hub
!pip install PIL
!pip install matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib import rcParams
import sagemaker
import shutil
from distutils.dir_util import copy_tree
from pathlib import Path
import boto3
import sagemaker
from huggingface_hub import snapshot_download
from sagemaker.huggingface import HuggingFaceModel
import tarfile
import os
import json 
import base64
import io
from PIL import Image
sess = sagemaker.Session(boto3.session.Session())
bucket=sess.default_bucket()
print(bucket)

[31mERROR: Could not find a version that satisfies the requirement PIL (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for PIL[0m[31m
sagemaker-ap-south-1-057641535369


In [22]:
technique='tile'
os.makedirs(f'script/{technique}/',exist_ok=True)
os.makedirs(f'compressed/{technique}/',exist_ok=True)

In [23]:
%%writefile script/tile/inference.py
import json
import torch
from diffusers import ControlNetModel, StableDiffusionControlNetImg2ImgPipeline, DPMSolverMultistepScheduler
import os
import io
import base64
from pathlib import Path
from PIL import Image

def _encode(image):
    img = image
    img_byte_arr = io.BytesIO()
    img.save(img_byte_arr, format='PNG')
    img_byte_arr = img_byte_arr.getvalue()
    img_byte_arr=base64.b64encode(img_byte_arr).decode()
    return img_byte_arr

def _decode(image):
    image=base64.b64decode(image)
    image=Image.open(io.BytesIO(image))
    return image

def model_fn(model_dir, extra):
    device = "cuda"
    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32

    controlnet1 = ControlNetModel.from_pretrained(f"{model_dir}/lllyasviel-1",torch_dtype=torch_dtype).to(device)
    controlnet2 = ControlNetModel.from_pretrained(f"{model_dir}/lllyasviel-1",torch_dtype=torch_dtype).to(device)
    controlnet3 = ControlNetModel.from_pretrained(f"{model_dir}/lllyasviel-2",torch_dtype=torch_dtype).to(device)

    pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(f"{model_dir}/sinkinai-0", controlnet=[controlnet1,controlnet2, controlnet3], torch_dtype=torch_dtype).to(device)

    # Sampler: DPM++ 2M Karras,
    # https://github.com/huggingface/diffusers/issues/1887
    # https://github.com/huggingface/diffusers/issues/1633

    # dpmsolver or dpmsolver++ or sde-dpmsolver or sde-dpmsolver++
    pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras_sigmas=True, algorithm_type="dpmsolver++")
    pipe.enable_xformers_memory_efficient_attention()
    pipe.enable_model_cpu_offload()
    
    return pipe


def transform_fn(model, data, input_content_type, output_content_type):
   
    #parse inference input
    input_data=json.loads(data)

    """
    Expected request format :
    request={
         "prompt":p_prompt,
         "negative_prompt":n_prompt,
         "starting_image":starting_image.decode(),
         "controlnet_1_image":cnet_image_1.decode(),
         "controlnet_2_image":cnet_image_2.decode(),
         "controlnet_3_image":cnet_image_3.decode(),
         "seed": 1441837673,
         "num_inference_steps": 50,
         "num_images_per_prompt": 1,
         "strength": 0.75,
         "guidance_scale": 7.00,
         "controlnet_1_conditioning_scale": 0.85,
         "controlnet_1_guidance_start": 0.23,
         "controlnet_1_guidance_end": 0.96,
         "controlnet_2_conditioning_scale": 0.65,
         "controlnet_2_guidance_start": 0.18,
         "controlnet_2_guidance_end": 0.80,
         "controlnet_3_conditioning_scale": 0.65,
         "controlnet_3_guidance_start": 0.18,
         "controlnet_3_guidance_end": 0.80
        }
    """
    
    #set defaults if not provided during inference
    seed=int(input_data["seed"])  if "seed" in input_data.keys() else 1441837673
    num_inference_steps=int(input_data["num_inference_steps"])  if "num_inference_steps" in input_data.keys() else 50
    num_images_per_prompt=int(input_data["num_images_per_prompt"])  if "num_images_per_prompt" in input_data.keys() else 1
    strength=float(input_data["strength"])  if "strength" in input_data.keys() else 0.75
    guidance_scale=float(input_data["guidance_scale"])  if "guidance_scale" in input_data.keys() else 7.00
    
    controlnet_1_conditioning_scale=float(input_data["controlnet_1_conditioning_scale"])  if "controlnet_1_conditioning_scale" in input_data.keys() else 0.85
    controlnet_2_conditioning_scale=float(input_data["controlnet_2_conditioning_scale"])  if "controlnet_2_conditioning_scale" in input_data.keys() else 0.65
    controlnet_3_conditioning_scale=float(input_data["controlnet_3_conditioning_scale"])  if "controlnet_3_conditioning_scale" in input_data.keys() else 0.65
    controlnet_conditioning_scale_list = [controlnet_1_conditioning_scale, controlnet_2_conditioning_scale, controlnet_3_conditioning_scale]

    controlnet_1_guidance_start=float(input_data["controlnet_1_guidance_start"])  if "controlnet_1_guidance_start" in input_data.keys() else 0.23
    controlnet_2_guidance_start=float(input_data["controlnet_2_guidance_start"])  if "controlnet_2_guidance_start" in input_data.keys() else 0.18
    controlnet_3_guidance_start=float(input_data["controlnet_3_guidance_start"])  if "controlnet_3_guidance_start" in input_data.keys() else 0.18
    control_guidance_start_list = [controlnet_1_guidance_start, controlnet_2_guidance_start, controlnet_3_guidance_start]
    
    controlnet_1_guidance_end=float(input_data["controlnet_1_guidance_end"])  if "controlnet_1_guidance_end" in input_data.keys() else 0.96
    controlnet_2_guidance_end=float(input_data["controlnet_2_guidance_end"])  if "controlnet_2_guidance_end" in input_data.keys() else 0.80
    controlnet_3_guidance_end=float(input_data["controlnet_3_guidance_end"])  if "controlnet_3_guidance_end" in input_data.keys() else 0.80
    control_guidance_end_list = [controlnet_1_guidance_end, controlnet_2_guidance_end, controlnet_3_guidance_end]
    
    starting_image = _decode(input_data['starting_image'])
    controlnet_1_image = _decode(input_data['controlnet_1_image'])
    controlnet_2_image = _decode(input_data['controlnet_2_image'])
    controlnet_3_image = _decode(input_data['controlnet_3_image'])

    if torch.cuda.is_available():
        generator = torch.Generator('cuda').manual_seed(seed)
    else:
        generator = torch.Generator().manual_seed(seed)

    # Generate output image(s)
    output_images = model(
        image=starting_image,
        control_image=[controlnet_1_image, controlnet_2_image, controlnet_3_image],
        controlnet_conditioning_scale=controlnet_conditioning_scale_list,
        control_guidance_start=control_guidance_start_list,
        control_guidance_end=control_guidance_end_list,
        prompt=input_data["prompt"],
        negative_prompt=input_data["negative_prompt"],
        num_inference_steps=num_inference_steps,
        num_images_per_prompt=num_images_per_prompt,
        strength=strength,
        guidance_scale=guidance_scale,
        generator=generator
    ).images

    response_list = []
    
    for image in output_images:
        encoded_output = _encode(image)
        response_list.append(encoded_output)
    
    response={
        "output_images":response_list
    }
    return response    

Overwriting script/tile/inference.py


In [24]:
%%writefile script/tile/requirements.txt
diffusers==0.19.3
accelerate
opencv-contrib-python
controlnet-aux
xformers==0.0.20
opencv-python-headless
transformers
qrcode
rembg
boto3

Overwriting script/tile/requirements.txt


In [25]:
%%time

model_id = ["sinkinai/GhostMix-V2-BakedVAE","lllyasviel/control_v11f1e_sd15_tile","lllyasviel/control_v11p_sd15_canny"]

# Creating a tarfile to to compress our model to a `tar.gz` format as required by SageMaker
tar = tarfile.open(f"compressed/tile/tile.tar.gz", "w:gz",compresslevel=2)
counter=0
for ids in model_id:
    
    model_tar_dir = Path(ids.split("/")[0] +"-"+ str(counter))
    if model_tar_dir.exists():
        shutil.rmtree(str(model_tar_dir))
    model_tar_dir.mkdir(exist_ok=True)    
    name=ids.split("/")[0]+"-"+ str(counter)
    
    snapshot_download(ids, local_dir=str(model_tar_dir), local_dir_use_symlinks=False) 
    tar.add(str(model_tar_dir),arcname=name)
    counter = counter + 1
    #!rm -r {str(model_tar_dir)}

Fetching 16 files:   0%|          | 0/16 [00:00<?, ?it/s]

(…)ature_extractor/preprocessor_config.json:   0%|          | 0.00/520 [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

scheduler/scheduler_config.json:   0%|          | 0.00/341 [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

safety_checker/config.json:   0%|          | 0.00/4.89k [00:00<?, ?B/s]

text_encoder/pytorch_model.bin:   0%|          | 0.00/492M [00:00<?, ?B/s]

safety_checker/pytorch_model.bin:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer/merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

tokenizer/vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

unet/config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/577 [00:00<?, ?B/s]

unet/diffusion_pytorch_model.bin:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

vae/diffusion_pytorch_model.bin:   0%|          | 0.00/335M [00:00<?, ?B/s]

Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

images/output.png:   0%|          | 0.00/1.24M [00:00<?, ?B/s]

diffusion_pytorch_model.bin:   0%|          | 0.00/1.45G [00:00<?, ?B/s]

images/original.png:   0%|          | 0.00/11.3k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/955 [00:00<?, ?B/s]

sd.png:   0%|          | 0.00/59.5k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/15.8k [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

diffusion_pytorch_model.fp16.safetensors:   0%|          | 0.00/723M [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/1.45G [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

control_net_canny.py:   0%|          | 0.00/1.38k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/16.6k [00:00<?, ?B/s]

diffusion_pytorch_model.bin:   0%|          | 0.00/1.45G [00:00<?, ?B/s]

diffusion_pytorch_model.fp16.bin:   0%|          | 0.00/723M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/996 [00:00<?, ?B/s]

images/bird_canny_out.png:   0%|          | 0.00/835k [00:00<?, ?B/s]

images/control.png:   0%|          | 0.00/29.1k [00:00<?, ?B/s]

images/bird_canny.png:   0%|          | 0.00/29.1k [00:00<?, ?B/s]

images/bird.png:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

images/image_out.png:   0%|          | 0.00/835k [00:00<?, ?B/s]

sd.png:   0%|          | 0.00/59.5k [00:00<?, ?B/s]

images/input.png:   0%|          | 0.00/1.07M [00:00<?, ?B/s]

CPU times: user 9min 1s, sys: 44.5 s, total: 9min 46s
Wall time: 14min 49s


In [26]:
# Adding the inference scripts and requirements file to the tarfile
tar.add(f"script/tile/",arcname='code')
tar.close()

# Upload to S3

In [27]:
model_s3_uri=f"s3://{bucket}/model_controlnet/{technique}.tar.gz"
!aws s3 cp compressed/{technique}/{technique}.tar.gz {model_s3_uri}

upload: compressed/tile/tile.tar.gz to s3://sagemaker-ap-south-1-057641535369/model_controlnet/tile.tar.gz


# Deploy model

In [28]:
from sagemaker.huggingface.model import HuggingFaceModel
from sagemaker import get_execution_role

image_uri ="763104351884.dkr.ecr.ap-south-1.amazonaws.com/huggingface-pytorch-inference:1.13.1-transformers4.26.0-gpu-py39-cu117-ubuntu20.04"
huggingface_model = HuggingFaceModel(
   model_data=model_s3_uri,  # path to your trained sagemaker model
   role=get_execution_role(), # iam role with permissions to create an Endpoint  
   py_version="py39", # python version of the DLC  
   image_uri=image_uri, # sagemaker container image uri 
   env={
       "MMS_MAX_REQUEST_SIZE": "2000000000", 
       "MMS_MAX_RESPONSE_SIZE": "2000000000",
       "MMS_DEFAULT_RESPONSE_TIMEOUT": '9000',
       "SAGEMAKER_CONTAINER_LOG_LEVEL": "20",
       "SAGEMAKER_PROGRAM": "inference.py",
       "SAGEMAKER_REGION": "ap-south-1",
       "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code"
   }
)

In [29]:
from sagemaker.async_inference.async_inference_config import AsyncInferenceConfig
from sagemaker.s3 import s3_path_join

# create async endpoint configuration
async_config = AsyncInferenceConfig(
    output_path=s3_path_join("s3://",bucket,"qart_async_inference/output"),
    #notification_config={
            #   "SuccessTopic": "arn:aws:sns:us-east-2:123456789012:MyTopic",
            #   "ErrorTopic": "arn:aws:sns:us-east-2:123456789012:MyTopic",
    # }, #  Notification configuration
)

In [30]:
%%time

# Deploying the model
from sagemaker.deserializers import JSONDeserializer,NumpyDeserializer
from sagemaker.serializers import CSVSerializer, JSONSerializer, IdentitySerializer
import datetime

ts = "{}".format(datetime.datetime.now().strftime("%d-%m-%y-%H-%M-%S"))
endpoint_name = "qart-face-async-tile-canny-"+ts

predictor = huggingface_model.deploy(
    endpoint_name=endpoint_name,
    initial_instance_count=2,
    instance_type= "ml.g4dn.4xlarge",
    async_inference_config=async_config
)

----------!CPU times: user 204 ms, sys: 14 ms, total: 218 ms
Wall time: 5min 33s
