In [1]:
from transformers import AutoTokenizer,AutoModelForSeq2SeqLM
from transformers import pipeline
import torch
import base64

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
checkpoint='MBZUAI/LaMini-T5-738M'

In [4]:
tokenizer=AutoTokenizer.from_pretrained(checkpoint)
base_model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint, torch_dtype=torch.float32)

In [None]:
from langchain.llms import HuggingFacePipeline

In [None]:
def llm_pipeline():
    pipe=pipeline(
        'text2text-generation',
        model=base_model,
        tokenizer=tokenizer,
        max_length=256,
        do_sample=True,
        temperature=0.3,
        top_p=0.95
    )
    local_llm = HuggingFacePipeline(pipeline=pipe)
    return local_llm

In [None]:
input_prompt = 'Write an article on Artificial Intelligence'
model = llm_pipeline()
generated_text = model(input_prompt)
generated_text

In [None]:
import json
import sagemaker
import boto3
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client('iam')
    role = iam.get_role(RoleName='sagemaker_execution_role')['Role']['Arn']

In [None]:
from sagemaker.huggingface.model import HuggingFaceModel

hub = {
    'HF_MODEL_ID':checkpoint, # model id from hf.co/models
    'HF_TASK': 'text2text-generation',  # NLP task you want to use for predictions
    'torch_dtype':'torch.float32'
}

# Create huggingface model class
huggingface_model = HuggingFaceModel(
    env=hub,
    role=role,
    transformers_version='4.26',
    pytorch_version='1.13',
    py_version='py310',
    image_uri=get_huggingface_llm_image_uri('huggingface', version='0.8.2')
)

# Deploy model to Sagemaker Inference
predictor = huggingface_model.deploy(
    initial_instance_count=1,
    isinstance_type='m1.m5.xlarge',
    container_startup_health_check_timeout=300
)

# example request: you always need to define 'inputs'
data = {
    'inputs': {
        'question': 'What I used to teach?',
        'context': 'My Name is kK and I live in Bangalore. I used to teach data science.'
    }
}
#request
predictor.predict(data)

In [None]:
#define payload
prompt = '''
You are helpful assistant, called Falcon. Knowing everything about AWS.
User: Can you tell me something about Amazon SageMaker?
Falcon:
'''
# hyperparameters for llm
payload = {
    'inputs': prompt,
    'parameters': {
        'do_sample': True,
        'top_p': 0.9,
        'temperature': 0.8,
        'max_new_tokens': 1024,
        'repetition_penalty': 1.03,
        'stop': ['\nUser:','<|endoftext|>','']
    }
}
# send request to endpoint
response = huggingface_model.prompt(payload)

for seq in response:
    print(f'Result: {seq['generated_text']}')

In [None]:
endpoint='huggingface-python-tgi-inference-2023-07-01-14-10-51-753'
import boto3

In [None]:
runtime = boto3.client('runtime.sagemaker')
response=runtime.invoke_endpoint(EndpointName=endpoint, ContentType='application/json', Body=json.dumps(payload))
print(response)

In [None]:
prediction = json.loads(response['Body'].read().decode('utf-8'))
prediction
prediction[0]['generated_text']

In [None]:
 ## Lambda function

import json
import boto3

ENDPOINT = 'huggingface-python-tgi-inference-2023-07-01-14-10-51-753'
runtime = boto3.clinet('runtime.sagemaker')

def lambda_handler(event, context):
    query_params = event['queryStringParameters']
    query = query_params.get('query')
    
    payload = {
    'inputs': query,
    'parameters': {
        'do_sample': True,
        'top_p': 0.9,
        'temperature': 0.8,
        'max_new_tokens': 1024,
        'repetition_penalty': 1.03    
    }
    }

    response=runtime.invoke_endpoint(EndpointName=endpoint, ContentType='application/json', Body=json.dumps(payload))
    prediction = json.loads(response['Body'].read().decode('utf-8'))

    final_result = prediction[0]['generated_text']
    
    return {
        'statuscode': 200,
        'body': json.dumps(final_result)
    }