-
Notifications
You must be signed in to change notification settings - Fork 34
/
inf2_sdxl.py
209 lines (186 loc) · 8.67 KB
/
inf2_sdxl.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
# # Deploy Stable Diffusion XL 1.0 on AWS Inferentia
# This example demonstrates how to deploy a
# [Stable Diffusion XL model from Hugging Face](https://huggingface.co/aws-neuron/stable-diffusion-xl-base-1-0-1024x1024)
# on AWS Inferentia2 using
# Runhouse. [AWS Inferentia2 instances](https://aws.amazon.com/ec2/instance-types/inf2/)
# are powered by AWS Neuron, a custom hardware accelerator for machine learning
# inference workloads. This example uses a model that was pre-compiled for AWS Neuron, and is available on the
# Hugging Face Hub.
#
# ## Setup credentials and dependencies
#
# Optionally, set up a virtual environment:
# ```shell
# $ conda create -n rh-inf2 python=3.9.15
# $ conda activate rh-inf2
# ```
# Install the few required dependencies:
# ```shell
# $ pip install -r requirements.txt
# ```
#
# We'll be launching an AWS EC2 instance via [SkyPilot](https://github.com/skypilot-org/skypilot), so we need to
# make sure our AWS credentials are set up:
# ```shell
# $ aws configure
# $ sky check
# ```
# We'll be downloading the Llama2 model from Hugging Face, so we need to set up our Hugging Face token:
# ```shell
# $ export HF_TOKEN=<your huggingface token>
# ```
#
# ## Setting up a model class
#
# We import runhouse and other required libraries:
import base64
import os
from io import BytesIO
import runhouse as rh
from PIL import Image
# Next, we define a class that will hold the model and allow us to send prompts to it.
# You'll notice this class inherits from `rh.Module`.
# This is a Runhouse class that allows you to
# run code in your class on a remote machine.
#
# Learn more in the [Runhouse docs on functions and modules](/docs/tutorials/api-modules).
class StableDiffusionXLPipeline(rh.Module):
def __init__(
self,
model_id: str = "aws-neuron/stable-diffusion-xl-base-1-0-1024x1024",
model_dir: str = "sdxl_neuron",
):
super().__init__()
self.model_dir = model_dir
self.model_id = model_id
self.pipeline = None
def _model_loaded_on_disk(self):
return (
self.model_dir
and os.path.isdir(self.model_dir)
and len(os.listdir(self.model_dir)) > 0
)
def _load_pipeline(self):
from huggingface_hub import snapshot_download
from optimum.neuron import NeuronStableDiffusionXLPipeline
if not self._model_loaded_on_disk():
# save compiled model to local directory
# Downloads our compiled model from the HuggingFace Hub
# using the revision as neuron version reference
# and makes sure we exclude the symlink files and "hidden" files, like .DS_Store, .gitignore, etc.
snapshot_download(
self.model_id,
revision="2.15.0",
local_dir=self.model_dir,
local_dir_use_symlinks=False,
allow_patterns=["[!.]*.*"],
)
# load local converted model into pipeline
self.pipeline = NeuronStableDiffusionXLPipeline.from_pretrained(
self.model_dir, device_ids=[0, 1]
)
def generate(self, input_prompt: str, output_format: str = "JPEG", **parameters):
if not self.pipeline:
self._load_pipeline()
generated_images = self.pipeline(input_prompt, **parameters)["images"]
if output_format == "PIL":
return generated_images
# postprocess convert image into base64 string
encoded_images = []
for image in generated_images:
buffered = BytesIO()
image.save(buffered, format=output_format)
encoded_images.append(base64.b64encode(buffered.getvalue()).decode())
return encoded_images
def decode_base64_image(image_string):
base64_image = base64.b64decode(image_string)
buffer = BytesIO(base64_image)
return Image.open(buffer)
# ## Setting up Runhouse primitives
#
# Now, we define the main function that will run locally when we run this script, and set up
# our Runhouse module on a remote cluster. First, we create a cluster with the desired instance type and provider.
# Our `instance_type` here is defined as `inf2.8xlarge`, which is one of the special
# [AWS Inferentia2 instance types](https://aws.amazon.com/ec2/instance-types/inf2/).
# We can alternatively specify an accelerator type and count, such as `A10G:1`,
# and any instance type with those specifications will be used.
#
# We use a specific `image_id`, which in this case is the
# [Deep Learning AMI Base Neuron](https://aws.amazon.com/releasenotes/aws-deep-learning-ami-base-neuron-ubuntu-20-04/)
# which comes with the AWS Neuron drivers preinstalled. The image_id is region-specific. To change the region,
# use the AWS CLI command on the page above under "Query AMI-ID with AWSCLI."
#
# The cluster we set up here also uses `tls` for the `server_connection_type`, which means that all communication
# will be over HTTPS and encrypted. We need to tell SkyPilot to open port 443 for this to work.
#
# We also set `den_auth` to `True`, which means that we will use [Runhouse Den](/dashboard) to
# authenticate public requests to this cluster. This means that we can open this cluster to the public internet, and
# only people who have ran `runhouse login` and set up Runhouse accounts will be able to access it.
#
# Learn more in the [Runhouse docs on clusters](/docs/tutorials/api-clusters).
#
# :::note{.info title="Note"}
# Make sure that your code runs within a `if __name__ == "__main__":` block, as shown below. Otherwise,
# the script code will run when Runhouse attempts to run code remotely.
# :::
if __name__ == "__main__":
cluster = rh.cluster(
name="rh-inf2",
instance_type="inf2.8xlarge",
provider="aws",
image_id="ami-0e0f965ee5cfbf89b",
region="us-east-1",
server_connection_type="tls",
open_ports=[443],
den_auth=True,
).up_if_not()
# Set up dependencies
# We can run commands directly on the cluster via `cluster.run()`. Here, we set up the environment for our
# upcoming environment that installed some AWS-neuron specific libraries. The `torch_neuronx` library needs to be
# installed before the rest of the env is set up in order to avoid a
# [common error](https://awsdocs-neuron.readthedocs-hosted.com/en/latest/frameworks/torch/torch-neuronx/
# training-troubleshooting.html#protobuf-error-typeerror-descriptors-cannot-not-be-created-directly),
# so we run this first.
cluster.run(
[
"python -m pip config set global.extra-index-url https://pip.repos.neuron.amazonaws.com",
"python -m pip install neuronx-cc==2.* torch-neuronx==1.13.1.1.13.1",
],
)
# Next, we define the environment for our module. This includes the required dependencies that need
# to be installed on the remote machine, as well as any secrets that need to be synced up from local to remote.
# Passing `huggingface` to the `secrets` parameter will load the Hugging Face token we set up earlier.
# We also can set environment variables, such as `NEURON_RT_NUM_CORES` which is required for AWS Neuron.
#
# Learn more in the [Runhouse docs on envs](/docs/tutorials/api-envs).
env = rh.env(
name="sdxl_inference",
reqs=[
"optimum-neuron==0.0.20",
"diffusers==0.27.2",
],
secrets=["huggingface"], # Needed to download Llama2
env_vars={"NEURON_RT_NUM_CORES": "2"},
)
# Finally, we define our module and run it on the remote cluster. We construct it normally and then call
# `get_or_to` to run it on the remote cluster. Using `get_or_to` allows us to load the exiting Module
# by the name `sdxl_neuron` if it was already put on the cluster. If we want to update the module each
# time we run this script, we can use `to` instead of `get_or_to`.
#
# Note that we also pass the `env` object to the `get_or_to` method, which will ensure that the environment is
# set up on the remote machine before the module is run.
model = StableDiffusionXLPipeline().get_or_to(cluster, env=env, name="sdxl_neuron")
# ## Calling our remote function
#
# We can call the `generate` method on the model class instance if it were running locally.
# This will run the function on the remote cluster and return the response to our local machine automatically.
# Further calls will also run on the remote machine, and maintain state that was updated between calls, like
# `self.model`.
prompt = "A woman runs through a large, grassy field towards a house."
response = model.generate(
prompt,
num_inference_steps=25,
negative_prompt="disfigured, ugly, deformed",
)
img = decode_base64_image(response[0])
img.show()