In [1]:
!pip install -U sagemaker
!pip install -U -r requirements.txt

In [2]:
!pip freeze | grep -E "sagemaker|boto3|haystack|opensearch|transformers|torch"

boto3 @ file:///home/conda/feedstock_root/build_artifacts/boto3_1683763173043/work
farm-haystack==1.21.0
opensearch-py==2.3.1
sagemaker==2.188.0
sagemaker-experiments==0.1.43
sagemaker-pytorch-training==2.8.0
sagemaker-training==4.5.0
sentence-transformers==2.2.2
smdebug @ file:///tmp/sagemaker-debugger
torch==2.0.0
torchaudio==2.0.1
torchdata @ file:///opt/conda/conda-bld/torchdata_1679615656247/work
torchtext==0.15.1
torchvision==0.15.1
transformers==4.32.1


In [3]:
import boto3

AWS_REGION_NAME = boto3.session.Session().region_name
AWS_REGION_NAME

In [4]:
import boto3
import json


def get_opensearch_endpoint(stack_name: str, region_name: str = 'us-east-1'):
    cf_client = boto3.client('cloudformation', region_name=region_name)
    response = cf_client.describe_stacks(StackName=stack_name)
    outputs = response["Stacks"][0]["Outputs"]

    ops_endpoint = [e for e in outputs if e['ExportName'] == 'OpenSearchDomainEndpoint'][0]
    ops_endpoint_name = ops_endpoint['OutputValue']
    return ops_endpoint_name



def get_secret_name(stack_name: str, region_name: str = 'us-east-1'):
    cf_client = boto3.client('cloudformation', region_name=region_name)
    response = cf_client.describe_stacks(StackName=stack_name)
    outputs = response["Stacks"][0]["Outputs"]

    secrets = [e for e in outputs if e['ExportName'] == 'MasterUserSecretId'][0]
    secret_name = secrets['OutputValue']
    return secret_name


def get_secret(secret_name: str, region_name: str = 'us-east-1'):
    client = boto3.client('secretsmanager', region_name=region_name)
    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    secret = get_secret_value_response['SecretString']

    return json.loads(secret)

In [5]:
stack_name = 'RAGHaystackOpenSearchStack'
secret_name = get_secret_name(stack_name, region_name=AWS_REGION_NAME)
secret = get_secret(secret_name, region_name=AWS_REGION_NAME)
display(secret)

opensearch_endpoint = get_opensearch_endpoint(stack_name, region_name=AWS_REGION_NAME)
display(opensearch_endpoint)

In [6]:
OPENSEARCH_HOST = opensearch_endpoint
OPENSEARCH_PORT = 443
OPENSEARCH_USERNAME = secret['username']
OPENSEARCH_PASSWORD = secret['password']

In [7]:
SAGEMAKER_MODEL_ENDPOINT = 'llama-2-7b'
SAGEMAKER_MODEL_ENDPOINT

In [8]:
import warnings
warnings.filterwarnings("ignore")  # avoid printing out absolute paths

import urllib3
urllib3.disable_warnings()

from haystack.document_stores import OpenSearchDocumentStore

doc_store = OpenSearchDocumentStore(host=OPENSEARCH_HOST,
                                    port=OPENSEARCH_PORT,
                                    username=OPENSEARCH_USERNAME,
                                    password=OPENSEARCH_PASSWORD,
                                    embedding_dim=384)

In [9]:
payload = {
    "inputs": "Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.\nDaniel: Hello, Girafatron!\nGirafatron:",
    "parameters":{
        "max_new_tokens": 50,
        "return_full_text": False,
        "do_sample": True,
        "top_k":10
    }
}

newline, bold, unbold = '\n', '\033[1m', '\033[0m'
endpoint_name = SAGEMAKER_MODEL_ENDPOINT

def query_endpoint(payload, endpont_name=endpoint_name, region_name='us-east-1'):
    client = boto3.client("sagemaker-runtime", region_name=region_name)
    response = client.invoke_endpoint(
        EndpointName=endpont_name,
        ContentType="application/json",
        Body=json.dumps(payload).encode('utf-8'),
        CustomAttributes="accept_eula=true", # eula: End User Licence Agreement
    )
    model_predictions = json.loads(response['Body'].read().decode("utf8"))
    generated_text = model_predictions[0]['generation']
    print(
        f"Input Text: {payload['inputs']}{newline}"
        f"Generated Text: {bold}{generated_text}{unbold}{newline}")

query_endpoint(payload)

Input Text: Girafatron is obsessed with giraffes, the most glorious animal on the face of this Earth. Giraftron believes all other animals are irrelevant when compared to the glorious majesty of the giraffe.
Daniel: Hello, Girafatron!
Girafatron:
Generated Text: [1m Hello! Thank you for visiting my page!
Daniel: So, what are your favorite giraffe facts?
Girafatron: The giraffe has the highest blood pressure.
Daniel: That's an excellent fact![0m



In [10]:
!aws --version

aws-cli/1.27.132 Python/3.10.8 Linux/4.14.322-244.539.amzn2.x86_64 botocore/1.29.132


In [11]:
!aws configure list

      Name                    Value             Type    Location
      ----                    -----             ----    --------
   profile                <not set>             None    None
access_key     ****************CMMA   container-role    
secret_key     ****************pGUv   container-role    
    region                us-east-1              env    AWS_DEFAULT_REGION


In [12]:
from haystack.nodes import (
    AnswerParser,
    EmbeddingRetriever,
    PromptNode,
    PromptTemplate
)

model_name_or_path = SAGEMAKER_MODEL_ENDPOINT
model_kwargs = {
  "aws_region_name": "us-east-1",
  "aws_custom_attributes": {"accept_eula": "true"}
}

question_answering = PromptTemplate(prompt="Given the context please answer the question. If the answer is not contained within the context below, say 'I don't know'.\n" 
                                            "Context: {join(documents)};\n Question: {query};\n Answer: ",
                                    output_parser=AnswerParser(reference_pattern=r"Document\[(\d+)\]"))

gen_qa_with_references = PromptNode(default_prompt_template=question_answering,
                                    model_name_or_path=model_name_or_path,
                                    model_kwargs=model_kwargs)

In [13]:
retriever = EmbeddingRetriever(document_store=doc_store,
                               embedding_model="sentence-transformers/all-MiniLM-L12-v2",
                               devices=["cpu"],
                               top_k=5)

In [14]:
from haystack import Pipeline

pipe = Pipeline()
pipe.add_node(component=retriever, name='Retriever', inputs=['Query'])
pipe.add_node(component=gen_qa_with_references, name='GenQAWithRefPromptNode', inputs=['Retriever'])

In [15]:
%%time
from haystack.utils import print_answers

result = pipe.run("What is the opensearch?",
                 params={"Retriever": {"top_k": 3}})

In [16]:
print_answers(results=result, details="minimum")

In [17]:
from haystack.pipelines import ExtractiveQAPipeline
from haystack.utils import print_answers

p_extractive_premade = ExtractiveQAPipeline(reader=gen_qa_with_references, retriever=retriever)
res = p_extractive_premade.run(
    query="What is the opensearch?", params={"Retriever": {"top_k": 3}}
)
print_answers(res, details="minimum")