In [None]:
# https://docs.aws.amazon.com/sagemaker/latest/dg/sagemaker-roles.html
import locale
def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

!pip install "boto3"--upgrade --quiet
!pip install "langchain" --upgrade --quiet
!pip install "sagemaker>=2.163.0" --upgrade --quiet

In [None]:
import json
import os

import boto3
import sagemaker
from kaggle_secrets import UserSecretsClient
from sagemaker.huggingface import HuggingFaceModel, get_huggingface_llm_image_uri

user_secrets = UserSecretsClient()
os.environ["AWS_ACCESS_KEY_ID"] = user_secrets.get_secret("aws_access_key_id")
os.environ["AWS_SECRET_ACCESS_KEY"] = user_secrets.get_secret("aws_secret_access_key")
os.environ["AWS_DEFAULT_REGION"] = user_secrets.get_secret("aws_default_region")

try:
    role = sagemaker.get_execution_role()
except ValueError:
    iam = boto3.client("iam")
    role = iam.get_role(RoleName="AmazonSageMakerFullAccess")["Role"]["Arn"]
    print(f"iam: {iam}\nrole: {role}")

huggingface_model = HuggingFaceModel(
    image_uri=get_huggingface_llm_image_uri("huggingface", version="0.8.2"),
    env={
        "HF_MODEL_ID": f"{PEFT_MODEL}-merged",
        "SM_NUM_GPUS": json.dumps([1, 4][0]),
    },
    role=role,
)
predictor = huggingface_model.deploy(
    endpoint_name=f"{PEFT_MODEL.split('/')[1]}-merged-endpoint",
    instance_type=["ml.g5.2xlarge", "ml.g5.12xlarge"][0],
    initial_instance_count=1,
    container_startup_health_check_timeout=600,
)


In [None]:
import os

import boto3
from kaggle_secrets import UserSecretsClient
from langchain import FewShotPromptTemplate, PromptTemplate
from sagemaker import Session
from sagemaker.huggingface.model import HuggingFacePredictor

user_secrets = UserSecretsClient()
os.environ["AWS_ACCESS_KEY_ID"] = user_secrets.get_secret("aws_access_key_id")
os.environ["AWS_SECRET_ACCESS_KEY"] = user_secrets.get_secret("aws_secret_access_key")
os.environ["AWS_DEFAULT_REGION"] = user_secrets.get_secret("aws_default_region")

sess = Session(boto_session=boto3.Session())
predictor = HuggingFacePredictor(
    endpoint_name=f"{PEFT_MODEL.split('/')[1]}-endpoint",
    sagemaker_session=sess,
)

question = input("question: ")
CENITEX = not question.startswith("/")
if not CENITEX:
    question = question[1:]
PREFIX = [
    "Your task is to provide accurate and detailed answers to the following question.",
    "You are Knowledge Base Chatbot for Cenitex. Your task is to provide accurate and detailed answers to the following question.",
][CENITEX]
EXAMPLES = [[], []][CENITEX]
template = """\nquestion: {question}\nanswer: {answer}"""
example_prompt = PromptTemplate(
    input_variables=["question", "answer"], template=template
)
prefix = f"{PREFIX}"
suffix = """\nquestion: {question}\nanswer: """.strip()
prompt_template = FewShotPromptTemplate(
    examples=EXAMPLES,
    example_prompt=example_prompt,
    prefix=prefix,
    suffix=suffix,
    input_variables=["question"],
    example_separator="\n",
)
prompt = prompt_template.format(question=question)
output = predictor.predict(
    {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": [1, 50, 100, 200][-1],
            "temperature": [0.1, 0.7, 1.0][0],
            "top_p": 0.7,
        },
    }
)[0]["generated_text"]
start = output.find("answer: ") + len("answer: ")
output = output[start:].strip()
print(f"predictor: {predictor}")
print(prompt)
print(output)
