In [None]:
#%pip install huggingface-hub

In [1]:
import boto3
import sagemaker
import json
from sagemaker import Model, image_uris, serializers, deserializers
from utils import download_model

boto3_session=boto3.session.Session(region_name="us-west-2")
smr = boto3_session.client('sagemaker-runtime-demo')
sm = boto3_session.client('sagemaker')
role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session(boto3_session, sagemaker_client=sm, sagemaker_runtime_client=smr)  # sagemaker session for interacting with different AWS APIs
bucket = sess.default_bucket()  # sagemaker session for interacting with different AWS APIs
region = sess._region_name  # region name of the current SageMaker Studio environment


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# download quantized model
local_model_path = download_model("TheBloke/Llama-2-13B-chat-GPTQ", "./Llama-2-13B-chat-GPTQ")

Model already exists at Llama-2-13B-chat-GPTQ
Skipping download


In [3]:

image_uri = f"763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118"

In [4]:
s3_model_location = sess.upload_data(path=local_model_path.as_posix(), bucket=bucket, key_prefix="Llama-2-13B-chat-GPTQ")

In [5]:
!sed -i "s|\(option\.s3url=\).*|\1$s3_model_location|" gptq-stream/serving.properties

In [6]:
%%sh
tar czvf gptq_stream.tar.gz gptq-stream/


gptq-stream/
gptq-stream/requirements.txt
gptq-stream/__pycache__/
gptq-stream/__pycache__/model.cpython-39.pyc
gptq-stream/serving.properties
gptq-stream/model.py


In [7]:
s3_code_prefix = "quantized-models/gptq-stream"
code_artifact = sess.upload_data("gptq_stream.tar.gz", bucket, s3_code_prefix)
print(f"S3 Code or Model tar ball uploaded to --- > {code_artifact}")
env = {"HUGGINGFACE_HUB_CACHE": "/tmp", "TRANSFORMERS_CACHE": "/tmp"}

model = Model(sagemaker_session=sess, image_uri=image_uri, model_data=code_artifact, env=env, role=role)

S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-west-2-152804913371/quantized-models/gptq-stream/gptq_stream.tar.gz


In [8]:
instance_type = "ml.g5.2xlarge"
endpoint_name = sagemaker.utils.name_from_base("gptq-stream")

model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    endpoint_name=endpoint_name,
)

------------------!

In [15]:
import time 
time.sleep(20)

In [17]:
# invoke with streaming enabled

prompt = "I'm going to Paris. What should I do there?"
prompt_template=f'''[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>
{prompt}[/INST]'''



body = {"prompt": prompt_template, "model_kwargs": {"max_new_tokens":1000, "temperature":0.8, "stream_enabled": True}}
resp = smr.invoke_endpoint_with_response_stream(EndpointName=endpoint_name, Body=json.dumps(body), ContentType="application/json")
event_stream = resp['Body']

for event in event_stream:
    print(json.loads(event['PayloadPart']['Bytes'].decode('utf-8'))["outputs"], end="")

 Bonjour! Paris, the City of Light, is a beautiful and vibrant destination that offers a wide range of activities and experiences for visitors. Here are some suggestions for things to do while you're there:

1. Explore iconic landmarks: Paris is home to many famous landmarks like the Eiffel Tower, Notre-Dame Cathedral, the Louvre Museum, and the Arc de Triomphe. Take a guided tour or simply admire these iconic structures on your own.
2. Stroll along the Seine: The Seine River is the lifeblood of Paris, and a stroll along its banks is a must-do activity. Take in the city's beauty, street performers, and boat tours.
3. Discover art and culture: Paris is known for its artistic heritage, and you can explore numerous museums like the Louvre, Musée d'Orsay, and the Pompidou Center. Each museum offers a unique collection of art and artifacts that showcase French culture and history.
4. Enjoy the city's green spaces: Paris has many beautiful parks and gardens, such as the Luxembourg Gardens, t

In [20]:
# invoke with streaming disabled

body = {"prompt": prompt_template, "model_kwargs": {"max_new_tokens":1000, "temperature":0.8, "stream_enabled": False}}
resp = smr.invoke_endpoint(EndpointName=endpoint_name, Body=json.dumps(body), ContentType="application/json")
output = resp["Body"].read().decode("utf-8")
print(output)

  Bonjour! Paris, the City of Light, is a beautiful and vibrant destination that offers a wide range of activities and experiences for visitors. Here are some suggestions for things to do while you're there:

1. Explore iconic landmarks: Paris is home to many famous landmarks like the Eiffel Tower, Notre-Dame Cathedral, the Louvre Museum, and the Arc de Triomphe. Take a guided tour or simply admire these iconic structures on your own.
2. Stroll along the Seine: The Seine River is the lifeblood of Paris, and a stroll along its banks is a must-do activity. Take in the city's beauty, street performers, and boat tours.
3. Discover art and culture: Paris is known for its artistic heritage, and you can explore numerous museums like the Louvre, Musée d'Orsay, and the Pompidou Center. Each museum offers a unique collection of art and artifacts that showcase French culture and history.
4. Enjoy the city's green spaces: Paris has many beautiful parks and gardens, such as the Luxembourg Gardens, 

In [None]:
sess.delete_endpoint(endpoint_name)
sess.delete_endpoint_config(endpoint_name)
model.delete_model()