In [4]:
#load variables from previous notebook
%store -r
print(model_package_arn_domain)
print(model_package_arn_instruct)
print(model_tar_gz_s3_domain)
print(model_tar_gz_s3_instruct)
print(model_name)
print(model_id)

arn:aws:sagemaker:us-east-1:327216439222:model-package/gai-fine-tuned-domain-falcon-7b/1
arn:aws:sagemaker:us-east-1:327216439222:model-package/gai-fine-tuned-instruct-falcon-7b/1
s3://sagemaker-us-east-1-327216439222/huggingface-qlora-2023-07-26-12-50-47-2023-07-26-12-50-47-199/output/model.tar.gz
s3://sagemaker-us-east-1-327216439222/huggingface-qlora-2023-07-27-03-23-11-2023-07-27-03-23-11-897/output/model.tar.gz


In [6]:
#TODO remove
model_package_arn_domain = "arn:aws:sagemaker:us-east-1:327216439222:model-package/gai-fine-tuned-domain-falcon-7b/1"
model_package_arn_instruct = "arn:aws:sagemaker:us-east-1:327216439222:model-package/gai-fine-tuned-instruct-falcon-7b/1"
model_tar_gz_s3_domain = "s3://sagemaker-us-east-1-327216439222/huggingface-qlora-2023-07-26-12-50-47-2023-07-26-12-50-47-199/output/model.tar.gz"
model_tar_gz_s3_instruct = "s3://sagemaker-us-east-1-327216439222/huggingface-qlora-2023-07-27-03-23-11-2023-07-27-03-23-11-897/output/model.tar.gz"
model_name = "falcon-7b"
model_id = "tiiuae/falcon-7b"

In [2]:
!pip install huggingface_hub --upgrade --quiet
!pip install "transformers==4.30.2" "datasets[s3]==2.13.0" sagemaker --upgrade --quiet

If you are going to use Sagemaker in a local environment. You need access to an IAM Role with the required permissions for Sagemaker. You can find [here](https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html) more about it.



In [3]:
#required to work in local_mode on your notebook instance for development/debugging purpose
#!pip install 'sagemaker[local]' --upgrade --quiet
#!pip install docker-compose --quiet

In [7]:
import sagemaker
import boto3
import os

#uncomment to run in local mode
#from sagemaker import LocalSession
#sess = LocalSession()
#the below help setting up the container's root on the EBS volume of your instance.
#sess.config = {'local' : {'local_code' : True, 'container_root' : '/home/ec2-user/SageMaker/'}}
#if you're running local mode and run into out of space issues, consider running docker_scripts/prepare-docker.sh to set the docker root under /home/ec2-user/SageMaker

sess = sagemaker.Session()
region = sess.boto_region_name

#replace the below by a specific bucket if you need
sagemaker_session_bucket = sess.default_bucket()
role = sagemaker.get_execution_role()
s3_client = boto3.client("s3")
s3_prefix = "model-fine-tuning"

#local notebook path
notebook_home = "/home/ec2-user/SageMaker/"

print(f"sagemaker role arn: {role}")
print(f"sagemaker bucket: {sess.default_bucket()}")
print(f"sagemaker session region: {region}")

sagemaker role arn: arn:aws:iam::327216439222:role/Sagemaker
sagemaker bucket: sagemaker-us-east-1-327216439222
sagemaker session region: us-east-1


## Deploy the fined tuned model

In [8]:
import time
endpoint_name_domain = f'{model_name}-tuned-domain-{time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())}'
endpoint_name_instruct = f'{model_name}-tuned-instruct-{time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())}'

### from the registry

In [9]:
from sagemaker import ModelPackage
from time import gmtime, strftime

def deploy_from_registry(role, model_package_arn, sess, endpoint_name):
    model = ModelPackage(role=role, 
                         model_package_arn=model_package_arn, 
                         sagemaker_session=sess)
    model.deploy(initial_instance_count=1, instance_type='ml.g5.12xlarge', wait=False, endpoint_name=endpoint_name)

In [10]:
#domain
deploy_from_registry(role, model_package_arn_domain, sess, endpoint_name_domain)

In [11]:
#instruct
deploy_from_registry(role, model_package_arn_instruct, sess, endpoint_name_instruct)

### or directly with HuggingFaceModel sagemaker API

In [33]:
from sagemaker.huggingface import HuggingFaceModel

def deploy_with_huggingfacemodel(model_tar_gz_s3, role, endpoint_name):
    #URL: 763104351884.dkr.ecr.us-east-1.amazonaws.com/huggingface-pytorch-inference:2.0.0-transformers4.28.1-gpu-py310-cu118-ubuntu20.04
    
    # create Hugging Face Model Class
    huggingface_model = HuggingFaceModel(
       model_data=model_tar_gz_s3,
       role=role, 
       transformers_version="4.28.1", 
       pytorch_version="2.0.0", 
       py_version="py310",
       model_server_workers=4,
    )

    # deploy model to SageMaker Inference
    predictor_hf = huggingface_model.deploy(
        initial_instance_count=1,
        instance_type= "ml.g5.12xlarge",
        wait=False,
        endpoint_name=endpoint_name,
    )

    return predictor_hf.endpoint_name

Uncomment to use that way of deploying your model. by default it deploys from the registry.

In [35]:
#domain
deploy_with_huggingfacemodel(model_tar_gz_s3_domain, role, endpoint_name_domain)

'falcon-7b-tuned-domain-2023-07-31-00-56-09'

In [36]:
#instruct
deploy_with_huggingfacemodel(model_tar_gz_s3_instruct, role, endpoint_name_instruct)

'falcon-7b-tuned-instruct-2023-07-31-00-56-09'

## Deploy the original model for comparison with jumpstart APIs

In [12]:
from sagemaker.jumpstart.model import JumpStartModel

def deploy_with_jumpstart(jumpstart_model_id):
    model_id, model_version = jumpstart_model_id, "*"

    js_model = JumpStartModel(model_id=model_id, instance_type="ml.g5.12xlarge")
    predictor_js = js_model.deploy(wait=False)
    return predictor_js.endpoint_name

In [13]:
jumpstart_model_id = "huggingface-llm-falcon-7b-bf16"
original_model_endpoint_name = deploy_with_jumpstart(jumpstart_model_id)

### Querying the endpoints

In [21]:
import json
import time

def query_endpoint_with_json_payload(encoded_json, endpoint_name, content_type="application/json"):
    client = boto3.client("runtime.sagemaker")
    response = client.invoke_endpoint(
        EndpointName=endpoint_name, ContentType=content_type, Body=encoded_json
    )
    return response

#method used to parse the inference model's response. we pass it as part of the model's config
def parse_response_model(query_response):
    model_predictions = json.loads(query_response["Body"].read())
    return [gen["generated_text"] for gen in model_predictions]

def query_llm(payload, endpoint_name):
    start_time = time.time()
    query_response = query_endpoint_with_json_payload(json.dumps(payload).encode("utf-8"), endpoint_name=endpoint_name)
    print("--- query_llm -- %s seconds ---" % (time.time() - start_time))
    return parse_response_model(query_response)

In [22]:
prompt_template = f"Summarize the following text:\n{{text}}\n---\nSummary:\n"

In [23]:
text = """Ad sales boost Time Warner profit. Quarterly profits at US media giant TimeWarner jumped 76% to $1.13bn (£600m) for the three months to December, from $639m year-earlier.The firm, which is now one of the biggest investors in Google, benefited from sales of high-speed internet connections and higher advert sales. TimeWarner said fourth quarter sales rose 2% to $11.1bn from $10.9bn. Its profits were buoyed by one-off gains which offset a profit dip at Warner Bros, and less users for AOL.Time Warner said on Friday that it now owns 8% of search-engine Google. But its own internet business, AOL, had has mixed fortunes. It lost 464,000 subscribers in the fourth quarter profits were lower than in the preceding three quarters. However, the company said AOL's underlying profit before exceptional items rose 8% on the back of stronger internet advertising revenues. It hopes to increase subscribers by offering the online service free to TimeWarner internet customers and will try to sign up AOL's existing customers for high-speed broadband. TimeWarner also has to restate 2000 and 2003 results following a probe by the US Securities Exchange Commission (SEC), which is close to concluding.Time Warner's fourth quarter profits were slightly better than analysts' expectations. But its film division saw profits slump 27% to $284m, helped by box-office flops Alexander and Catwoman, a sharp contrast to year-earlier, when the third and final film in the Lord of the Rings trilogy boosted results. For the full-year, TimeWarner posted a profit of $3.36bn, up 27% from its 2003 performance, while revenues grew 6.4% to $42.09bn. "Our financial performance was strong, meeting or exceeding all of our full-year objectives and greatly enhancing our flexibility," chairman and chief executive Richard Parsons said. For 2005, TimeWarner is projecting operating earnings growth of around 5%, and also expects higher revenue and wider profit margins. TimeWarner is to restate its accounts as part of efforts to resolve an inquiry into AOL by US market regulators. It has already offered to pay $300m to settle charges, in a deal that is under review by the SEC. The company said it was unable to estimate the amount it needed to set aside for legal reserves, which it previously set at $500m. It intends to adjust the way it accounts for a deal with German music publisher Bertelsmann's purchase of a stake in AOL Europe, which it had reported as advertising revenue. It will now book the sale of its stake in AOL Europe as a loss on the value of that stake."""

In [24]:
prompt = prompt_template.format(text=text)

In [37]:
payload = {
    "inputs": prompt,
    "parameters":{
        "do_sample": True,
        "top_p": 0.9,
        "temperature": 0.7,
        "max_new_tokens": 1024,
    }
}

In [38]:
print(prompt)

Summarize the following text:
Ad sales boost Time Warner profit. Quarterly profits at US media giant TimeWarner jumped 76% to $1.13bn (£600m) for the three months to December, from $639m year-earlier.The firm, which is now one of the biggest investors in Google, benefited from sales of high-speed internet connections and higher advert sales. TimeWarner said fourth quarter sales rose 2% to $11.1bn from $10.9bn. Its profits were buoyed by one-off gains which offset a profit dip at Warner Bros, and less users for AOL.Time Warner said on Friday that it now owns 8% of search-engine Google. But its own internet business, AOL, had has mixed fortunes. It lost 464,000 subscribers in the fourth quarter profits were lower than in the preceding three quarters. However, the company said AOL's underlying profit before exceptional items rose 8% on the back of stronger internet advertising revenues. It hopes to increase subscribers by offering the online service free to TimeWarner internet customers a

In [39]:
print(f"original model's response\n:{query_llm(payload, original_model_endpoint_name)}")

--- query_llm -- 33.63211417198181 seconds ---
original model's response
:["Summary:\n- Time Warner's fourth quarter profits were slightly better than analysts' expectations. But its film division saw profits slump 27% to $284m, helped by box-office flops.\n- Time Warner said on Friday that it now owns 8% of search-engine Google.\n- AOL's underlying profit before exceptional items rose 8% on the back of stronger internet advertising revenues.\n- TimeWarner also has to restate 2000 and 2003 results following a probe by the US Securities Exchange Commission (SEC), which is close to concluding.\n- TimeWarner is projecting operating earnings growth of around 5%, and also expects higher revenue and wider profit margins.\n- TimeWarner is to restate its accounts as part of efforts to resolve an inquiry into AOL by US market regulators.\n- TimeWarner intends to adjust the way it accounts for a deal with German music publisher Bertelsmann's purchase of a stake in AOL Europe, which it had report

In [40]:
print(f"Domain tuned/adapted model's response\n:{query_llm(payload, endpoint_name_domain)}")

--- query_llm -- 35.98628759384155 seconds ---
Domain tuned/adapted model's response
:['Summarize the following text:\nAd sales boost Time Warner profit. Quarterly profits at US media giant TimeWarner jumped 76% to $1.13bn (£600m) for the three months to December, from $639m year-earlier.The firm, which is now one of the biggest investors in Google, benefited from sales of high-speed internet connections and higher advert sales. TimeWarner said fourth quarter sales rose 2% to $11.1bn from $10.9bn. Its profits were buoyed by one-off gains which offset a profit dip at Warner Bros, and less users for AOL.Time Warner said on Friday that it now owns 8% of search-engine Google. But its own internet business, AOL, had has mixed fortunes. It lost 464,000 subscribers in the fourth quarter profits were lower than in the preceding three quarters. However, the company said AOL\'s underlying profit before exceptional items rose 8% on the back of stronger internet advertising revenues. It hopes to i

In [42]:
print(f"Instruct tuned model's response\n:{query_llm(payload, endpoint_name_instruct)}")

ModelError: An error occurred (ModelError) when calling the InvokeEndpoint operation: Received server error (0) from primary with message "Your invocation timed out while waiting for a response from container primary. Review the latency metrics for each container in Amazon CloudWatch, resolve the issue, and try again.". See https://us-east-1.console.aws.amazon.com/cloudwatch/home?region=us-east-1#logEventViewer:group=/aws/sagemaker/Endpoints/falcon-7b-tuned-instruct-2023-07-31-00-56-09 in account 327216439222 for more information.

In [None]:
%store original_model_endpoint_name
%store endpoint_name_domain
%store endpoint_name_instruct