## In-context learning with FLAN-T5-XL 

#### Imports 

In [2]:
from sagemaker.predictor import Predictor
from sagemaker import get_execution_role
from sagemaker.model import Model
from sagemaker import script_uris
from sagemaker import image_uris 
from sagemaker import model_uris
import sagemaker
import logging
import boto3
import time
import json
import datetime

#### Setup essentials 

In [3]:
logger = logging.getLogger('sagemaker')
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler())

In [4]:
logger.info(f'Using sagemaker=={sagemaker.__version__}')
logger.info(f'Using boto3=={boto3.__version__}')

Using sagemaker==2.143.0
Using boto3==1.26.103


In [25]:
MODEL_ID = 'huggingface-text2text-flan-t5-xl'  # this is hard-coded
MODEL_VERSION = '*'
INSTANCE_TYPE = 'ml.p3.2xlarge'
INSTANCE_COUNT = 1
IMAGE_SCOPE = 'inference'
MODEL_DATA_DOWNLOAD_TIMEOUT = 3600  # in seconds
CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT = 3600
EBS_VOLUME_SIZE = 256  # in GB
CONTENT_TYPE = 'application/json'

# set up roles and clients 
client = boto3.client('sagemaker-runtime')
ROLE = get_execution_role()
logger.info(f'Role => {ROLE}')

Role => arn:aws:iam::106877348565:role/service-role/AmazonSageMaker-ExecutionRole-20230120T102711


In [6]:
current_time = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
endpoint_name = f'genai-paris-{MODEL_ID}-{current_time}'
logger.info(f'Endpoint name: {endpoint_name}')

Endpoint name: genai-paris-huggingface-text2text-flan-t5-xl-20230403220849


#### I. Deploy FLAN-T5-XL out-of-the-box instruction-tuned model as a SageMaker endpoint

In [11]:
deploy_image_uri = image_uris.retrieve(
    region=None, 
    framework=None, 
    image_scope=IMAGE_SCOPE, 
    model_id=MODEL_ID, 
    model_version=MODEL_VERSION, 
    instance_type=INSTANCE_TYPE
)
logger.info(f'Deploy image URI => {deploy_image_uri}')

model_uri = model_uris.retrieve(
    model_id=MODEL_ID, 
    model_version=MODEL_VERSION, 
    model_scope=IMAGE_SCOPE
)

logger.info(f'Model URI => {model_uri}')

env = {
    'SAGEMAKER_MODEL_SERVER_TIMEOUT': str(3600),
    'MODEL_CACHE_ROOT': '/opt/ml/model', 
    'SAGEMAKER_ENV': '1',
    'SAGEMAKER_SUBMIT_DIRECTORY': '/opt/ml/model/code/',
    'SAGEMAKER_PROGRAM': 'inference.py',
    'SAGEMAKER_MODEL_SERVER_WORKERS': '1',
    'TS_DEFAULT_WORKERS_PER_MODEL': '1', 
}

model = Model(
    image_uri=deploy_image_uri, 
    model_data=model_uri, 
    role=ROLE, 
    predictor_cls=Predictor, 
    name=endpoint_name, 
    env=env
)

Deploy image URI => 763104351884.dkr.ecr.eu-central-1.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-gpu-py38-cu113-ubuntu20.04
Model URI => s3://jumpstart-cache-prod-eu-central-1/huggingface-infer/prepack/v1.0.3/infer-prepack-huggingface-text2text-flan-t5-xl.tar.gz


In [12]:
%%time

_ = model.deploy(
    initial_instance_count=INSTANCE_COUNT, 
    instance_type=INSTANCE_TYPE, 
    endpoint_name=endpoint_name, 
    volume_size=EBS_VOLUME_SIZE, 
    model_data_download_timeout=MODEL_DATA_DOWNLOAD_TIMEOUT, 
    container_startup_health_check_timeout=CONTAINER_STARTUP_HEALTH_CHECK_TIMEOUT
)

Creating model with name: genai-paris-huggingface-text2text-flan-t5-xl-20230403212649
CreateModel request: {
    "ModelName": "genai-paris-huggingface-text2text-flan-t5-xl-20230403212649",
    "ExecutionRoleArn": "arn:aws:iam::106877348565:role/service-role/AmazonSageMaker-ExecutionRole-20230120T102711",
    "PrimaryContainer": {
        "Image": "763104351884.dkr.ecr.eu-central-1.amazonaws.com/huggingface-pytorch-inference:1.10.2-transformers4.17.0-gpu-py38-cu113-ubuntu20.04",
        "Environment": {
            "SAGEMAKER_MODEL_SERVER_TIMEOUT": "3600",
            "MODEL_CACHE_ROOT": "/opt/ml/model",
            "SAGEMAKER_ENV": "1",
            "SAGEMAKER_SUBMIT_DIRECTORY": "/opt/ml/model/code/",
            "SAGEMAKER_PROGRAM": "inference.py",
            "SAGEMAKER_MODEL_SERVER_WORKERS": "1",
            "TS_DEFAULT_WORKERS_PER_MODEL": "1"
        },
        "ModelDataUrl": "s3://jumpstart-cache-prod-eu-central-1/huggingface-infer/prepack/v1.0.3/infer-prepack-huggingface-text2tex

------------!CPU times: user 233 ms, sys: 18 ms, total: 251 ms
Wall time: 6min 33s


#### II. Invoke the SageMaker endpoint to test the deployed model for natural language understanding (NLU) and natural language generation (NLG) tasks

In [22]:
context = """
Customer: Hi there, I'm having a problem with my iPhone.
Agent: Hi! I'm sorry to hear that. What's happening?
Customer: The phone is not charging properly, and the battery seems to be draining very quickly. I've tried different charging cables and power adapters, but the issue persists.
Agent: Hmm, that's not good. Let's try some troubleshooting steps. Can you go to Settings, then Battery, and see if there are any apps that are using up a lot of battery life?
Customer: Yes, there are some apps that are using up a lot of battery.
Agent: Okay, try force quitting those apps by swiping up from the bottom of the screen and then swiping up on the app to close it.
Customer: I did that, but the issue is still there.
Agent: Alright, let's try resetting your iPhone's settings to their default values. This won't delete any of your data. Go to Settings, then General, then Reset, and then choose Reset All Settings.
Customer: Okay, I did that. What's next?
Agent: Now, let's try restarting your iPhone. Press and hold the power button until you see the "slide to power off" option. Slide to power off, wait a few seconds, and then turn your iPhone back on.
Customer: Alright, I restarted it, but it's still not charging properly.
Agent: I see. It looks like we need to run a diagnostic test on your iPhone. Please visit the nearest Apple Store or authorized service provider to get your iPhone checked out.
Customer: Do I need to make an appointment?
Agent: Yes, it's always best to make an appointment beforehand so you don't have to wait in line. You can make an appointment online or by calling the Apple Store or authorized service provider.
Customer: Okay, will I have to pay for the repairs?
Agent: That depends on whether your iPhone is covered under warranty or not. If it is, you won't have to pay anything. However, if it's not covered under warranty, you will have to pay for the repairs.
Customer: How long will it take to get my iPhone back?
Agent: It depends on the severity of the issue, but it usually takes 1-2 business days.
Customer: Can I track the repair status online?
Agent: Yes, you can track the repair status online or by calling the Apple Store or authorized service provider.
Customer: Alright, thanks for your help.
Agent: No problem, happy to help. Is there anything else I can assist you with?
Customer: No, that's all for now.
Agent: Alright, have a great day and good luck with your iPhone!
"""

Generation configuration 

In [24]:
MAX_LENGTH = 256
NUM_RETURN_SEQUENCES = 1
TOP_K = 0
TOP_P = 0.7
DO_SAMPLE = True 


sm = boto3.client('sagemaker')
r = client.list_endpoints(StatusEquals='InService')
endpoint_name = [ ep['EndpointName'] for ep in r['Endpoints'] if  'flan-t5' in ep['EndpointName'] ][0]

#### A. Text Summarization 

In [26]:
query = 'write a summary'
prompt = f'{context}\n{query}'

payload = {
    'text_inputs': prompt, 
    'max_length': MAX_LENGTH, 
    'num_return_sequences': NUM_RETURN_SEQUENCES,
    'top_k': TOP_K,
    'top_p': TOP_P,
    'do_sample': DO_SAMPLE
}

payload = json.dumps(payload).encode('utf-8')

response = client.invoke_endpoint(
    EndpointName=endpoint_name, 
    ContentType=CONTENT_TYPE, 
    Body=payload
)

model_predictions = json.loads(response['Body'].read())
generated_text = model_predictions['generated_texts'][0]
logger.info(f'Response: {generated_text}')

Response: Customer's iPhone is not charging properly, and the battery is draining very quickly. Agent: Can you go to Settings, then Battery, and see if there are any apps that are using up a lot of battery? Force quit apps that are using up a lot of battery. Reset your iPhone's settings to their default values. Restart your iPhone. Contact Apple Store or authorized service provider. Make an appointment to get your iPhone checked out.


#### B. Abstractive Question Answering 

##### Q1

In [27]:
query = 'What troubleshooting steps were suggested to the customer to fix their iPhone charging issue?'

prompt = f'{context}\n{query}'

payload = {
    'text_inputs': prompt, 
    'max_length': MAX_LENGTH, 
    'num_return_sequences': NUM_RETURN_SEQUENCES,
    'top_k': TOP_K,
    'top_p': TOP_P,
    'do_sample': DO_SAMPLE
}

payload = json.dumps(payload).encode('utf-8')

response = client.invoke_endpoint(
    EndpointName=endpoint_name, 
    ContentType=CONTENT_TYPE, 
    Body=payload
)

model_predictions = json.loads(response['Body'].read())
generated_text = model_predictions['generated_texts'][0]
logger.info(f'Response: {generated_text}')

Response: Force quit apps using a lot of battery. Reset your iPhone's settings to default. Restart your iPhone. Get your iPhone checked out.


Q2

In [28]:
query = 'Was resetting the iPhone to its default settings able to solve the charging issue and battery drain problem?'

prompt = f'{context}\n{query}'

payload = {
    'text_inputs': prompt, 
    'max_length': MAX_LENGTH, 
    'num_return_sequences': NUM_RETURN_SEQUENCES,
    'top_k': TOP_K,
    'top_p': TOP_P,
    'do_sample': DO_SAMPLE
}

payload = json.dumps(payload).encode('utf-8')

response = client.invoke_endpoint(
    EndpointName=endpoint_name, 
    ContentType=CONTENT_TYPE, 
    Body=payload
)

model_predictions = json.loads(response['Body'].read())
generated_text = model_predictions['generated_texts'][0]
logger.info(f'Response: {generated_text}')

Response: No, the problem persists.


Q3

In [29]:
query = 'What steps can the customer take to make an appointment at the nearest Apple Store or authorized service provider for iPhone repair?'

prompt = f'{context}\n{query}'

payload = {
    'text_inputs': prompt, 
    'max_length': MAX_LENGTH, 
    'num_return_sequences': NUM_RETURN_SEQUENCES,
    'top_k': TOP_K,
    'top_p': TOP_P,
    'do_sample': DO_SAMPLE
}

payload = json.dumps(payload).encode('utf-8')

response = client.invoke_endpoint(
    EndpointName=endpoint_name, 
    ContentType=CONTENT_TYPE, 
    Body=payload
)

model_predictions = json.loads(response['Body'].read())
generated_text = model_predictions['generated_texts'][0]
logger.info(f'Response: {generated_text}')

Response: Customer should make an appointment online or by calling the Apple Store or authorized service provider.


#### C. Sentiment Analysis

In [34]:
query = 'What is the overall sentiment and sentiment score of the conversation between the customer and the agent'
prompt = f'{context}\n{query}'

payload = {
    'text_inputs': prompt, 
    'max_length': MAX_LENGTH, 
    'num_return_sequences': NUM_RETURN_SEQUENCES,
    'top_k': TOP_K,
    'top_p': TOP_P,
    'do_sample': DO_SAMPLE
}

payload = json.dumps(payload).encode('utf-8')

response = client.invoke_endpoint(
    EndpointName=endpoint_name, 
    ContentType=CONTENT_TYPE, 
    Body=payload
)

model_predictions = json.loads(response['Body'].read())
sentiment = model_predictions['generated_texts'][0]
logger.info(f'Response: {sentiment}')

Response: positive


#### D. Sentiment Phrase Extraction

In [37]:
query = f'identify any specific words, phrases, or context that influenced the {sentiment} sentiment'

prompt = f'{context}\n{query}'

payload = {'text_inputs': prompt, 
           'max_length': MAX_LENGTH, 
           'num_return_sequences': NUM_RETURN_SEQUENCES,
           'top_k': TOP_K,
           'top_p': TOP_P,
           'do_sample': DO_SAMPLE}

payload = json.dumps(payload).encode('utf-8')

response = client.invoke_endpoint(EndpointName=endpoint_name, 
                                  ContentType=CONTENT_TYPE, 
                                  Body=payload)

model_predictions = json.loads(response['Body'].read())
generated_text = model_predictions['generated_texts'][0]
logger.info(f'Response: {generated_text}')

Response: agent
