In [None]:
#%pip install huggingface-hub

In [1]:
import boto3
import sagemaker
import json
from sagemaker import Model, image_uris, serializers, deserializers
from utils import download_model

boto3_session=boto3.session.Session(region_name="us-west-2")
smr = boto3_session.client('sagemaker-runtime-demo')
sm = boto3_session.client('sagemaker')
role = sagemaker.get_execution_role()  # execution role for the endpoint
sess = sagemaker.session.Session(boto3_session, sagemaker_client=sm, sagemaker_runtime_client=smr)  # sagemaker session for interacting with different AWS APIs
bucket = sess.default_bucket()  # sagemaker session for interacting with different AWS APIs
region = sess._region_name  # region name of the current SageMaker Studio environment


  from .autonotebook import tqdm as notebook_tqdm


In [2]:

# download non-quantized fp16 model
local_model_path = download_model("TheBloke/Llama-2-13B-Chat-fp16", "./Llama-2-13B-Chat-fp16")

Model already exists at Llama-2-13B-Chat-fp16
Skipping download


In [3]:

image_uri = f"763104351884.dkr.ecr.{region}.amazonaws.com/djl-inference:0.23.0-deepspeed0.9.5-cu118"

In [9]:
s3_model_location = sess.upload_data(path=local_model_path.as_posix(), bucket=bucket, key_prefix="Llama-2-13B-chat-fp16")

In [10]:
!sed -i "s|\(option\.s3url=\).*|\1$s3_model_location|" bnb-stream/serving.properties

In [11]:
%%sh
tar czvf gptq_stream.tar.gz bnb-stream/


bnb-stream/
bnb-stream/requirements.txt
bnb-stream/__pycache__/
bnb-stream/__pycache__/model.cpython-39.pyc
bnb-stream/serving.properties
bnb-stream/model.py


In [12]:
s3_code_prefix = "quantized-models/gptq-stream"
code_artifact = sess.upload_data("gptq_stream.tar.gz", bucket, s3_code_prefix)
print(f"S3 Code or Model tar ball uploaded to --- > {code_artifact}")
env = {"HUGGINGFACE_HUB_CACHE": "/tmp", "TRANSFORMERS_CACHE": "/tmp"}

model = Model(sagemaker_session=sess, image_uri=image_uri, model_data=code_artifact, env=env, role=role)

S3 Code or Model tar ball uploaded to --- > s3://sagemaker-us-west-2-152804913371/quantized-models/gptq-stream/gptq_stream.tar.gz


In [13]:
instance_type = "ml.g5.2xlarge"
endpoint_name = sagemaker.utils.name_from_base("bnb-stream")

model.deploy(
    initial_instance_count=1,
    instance_type=instance_type,
    endpoint_name=endpoint_name,
)

------------------!

In [15]:
# invoke with streaming enabled

prompt = "I'm going to Paris. What should I do there?"
prompt_template=f'''[INST] <<SYS>>
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.
<</SYS>>
{prompt}[/INST]'''



body = {"prompt": prompt_template, "model_kwargs": {"max_new_tokens":1000, "temperature":0.8, "stream_enabled": True}}
resp = smr.invoke_endpoint_with_response_stream(EndpointName=endpoint_name, Body=json.dumps(body), ContentType="application/json")
event_stream = resp['Body']

for event in event_stream:
    print(json.loads(event['PayloadPart']['Bytes'].decode('utf-8'))["outputs"], end="")

 Bonjour! Paris, the City of Light, is a beautiful destination with a rich history, culture, and entertainment. Here are some suggestions for your trip:

1. Explore iconic landmarks: Visit the Eiffel Tower, the Louvre Museum, Notre-Dame Cathedral, and the Arc de Triomphe. Take a river cruise along the Seine to see these attractions from a different perspective.
2. Discover charming neighborhoods: Wander through Montmartre, Saint-Germain-des-Prés, and the Latin Quarter to experience the city's vibrant street art, cafés, and boutiques.
3. Enjoy French cuisine: Indulge in delicious pastries, cheeses, and wine at a traditional bistro or restaurant. Don't forget to try escargots, croissants, and macarons!
4. Attend a cultural event: Catch a ballet or opera performance at the Palais Garnier, or visit the Musée d'Orsay for an impressive collection of Impressionist art.
5. Relax in beautiful gardens: Stroll through the Luxembourg Gardens, the Tuileries Garden, or the Jardin des Plantes to esca

In [16]:
# invoke with streaming disabled

body = {"prompt": prompt_template, "model_kwargs": {"max_new_tokens":1000, "temperature":0.8, "stream_enabled": False}}
resp = smr.invoke_endpoint(EndpointName=endpoint_name, Body=json.dumps(body), ContentType="application/json")
output = resp["Body"].read().decode("utf-8")
print(output)

  Bonjour! Paris, the City of Light, is a beautiful destination with a rich history, culture, and entertainment. Here are some suggestions for your trip:

1. Explore iconic landmarks: Visit the Eiffel Tower, the Louvre Museum, Notre-Dame Cathedral, and the Arc de Triomphe. Take a river cruise along the Seine to see these attractions from a different perspective.
2. Discover charming neighborhoods: Wander through Montmartre, Saint-Germain-des-Prés, and the Latin Quarter to experience the city's vibrant street art, cafés, and boutiques.
3. Enjoy French cuisine: Indulge in delicious pastries, cheeses, and wine at a traditional bistro or restaurant. Don't forget to try escargots, croissants, and macarons!
4. Attend a cultural event: Catch a ballet or opera performance at the Palais Garnier, or visit the Musée d'Orsay for an impressive collection of Impressionist art.
5. Relax in beautiful gardens: Stroll through the Luxembourg Gardens, the Tuileries Garden, or the Jardin des Plantes to esc

In [18]:
sess.delete_endpoint(endpoint_name)
sess.delete_endpoint_config(endpoint_name)
model.delete_model()