In [1]:
import boto3
from langchain.embeddings import BedrockEmbeddings
import ipywidgets as ipw
from IPython.display import display, clear_output
boto3_bedrock = boto3.client("bedrock-runtime")

br_embeddings = BedrockEmbeddings(model_id="amazon.titan-embed-g1-text-02", client=boto3_bedrock)

In [3]:
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

# s3_path = "s3://jumpstart-cache-prod-us-east-2/training-datasets/Amazon_SageMaker_FAQs/Amazon_SageMaker_FAQs.csv"
# !aws s3 cp $s3_path ./rag_data/Amazon_SageMaker_FAQs.csv

loader = CSVLoader("../data/sample.sql") # --- > 219 docs with 400 chars, each row consists in a question column and an answer column
documents_aws = loader.load() #
print(f"Number of documents={len(documents_aws)}")

docs = CharacterTextSplitter(chunk_size=500, chunk_overlap=0, separator=",").split_documents(documents_aws)

print(f"Number of documents after split and chunking={len(docs)}")

vectorstore_faiss_aws = FAISS.from_documents(
    documents=docs,
    embedding = br_embeddings
)

print(f"vectorstore_faiss_aws: number of elements in the index={vectorstore_faiss_aws.index.ntotal}::")

Number of documents=891
Number of documents after split and chunking=891
vectorstore_faiss_aws: number of elements in the index=891::


In [9]:
question = "How many survived"
v = br_embeddings.embed_query(question)
print(v[0:10])
results = vectorstore_faiss_aws.similarity_search_by_vector(v, k=4)
for r in results:
    print(r.page_content)
    print('----')

[-1.0625, -0.7421875, -0.25585938, -0.048095703, -0.37890625, 1.3515625, -0.0006904602, -0.0007019043, -0.064453125, 0.02722168]
Survived: 1
PassengerId: 371
Pclass: 1
Name: Harder, Mr. George Achilles
Sex: male
Age: 25
SibSp: 1
Parch: 0
Fare: 55.4417
Embarked: C
----
Survived: 1
PassengerId: 797
Pclass: 1
Name: Leader, Dr. Alice (Farnham)
Sex: female
Age: 49
SibSp: 0
Parch: 0
Fare: 25.9292
Embarked: S
----
Survived: 1
PassengerId: 633
Pclass: 1
Name: Stahelin-Maeglin, Dr. Max
Sex: male
Age: 32
SibSp: 0
Parch: 0
Fare: 30.5
Embarked: C
----
Survived: 1
PassengerId: 126
Pclass: 3
Name: Nicola-Yarred, Master. Elias
Sex: male
Age: 12
SibSp: 1
Parch: 0
Fare: 11.2417
Embarked: C
----


In [10]:
results

[Document(page_content='Survived: 1\nPassengerId: 371\nPclass: 1\nName: Harder, Mr. George Achilles\nSex: male\nAge: 25\nSibSp: 1\nParch: 0\nFare: 55.4417\nEmbarked: C', metadata={'source': '../data/titanic.csv', 'row': 370}),
 Document(page_content='Survived: 1\nPassengerId: 797\nPclass: 1\nName: Leader, Dr. Alice (Farnham)\nSex: female\nAge: 49\nSibSp: 0\nParch: 0\nFare: 25.9292\nEmbarked: S', metadata={'source': '../data/titanic.csv', 'row': 796}),
 Document(page_content='Survived: 1\nPassengerId: 633\nPclass: 1\nName: Stahelin-Maeglin, Dr. Max\nSex: male\nAge: 32\nSibSp: 0\nParch: 0\nFare: 30.5\nEmbarked: C', metadata={'source': '../data/titanic.csv', 'row': 632}),
 Document(page_content='Survived: 1\nPassengerId: 126\nPclass: 3\nName: Nicola-Yarred, Master. Elias\nSex: male\nAge: 12\nSibSp: 1\nParch: 0\nFare: 11.2417\nEmbarked: C', metadata={'source': '../data/titanic.csv', 'row': 125})]

In [12]:
import json

input = """

Human:  Answer the question based on the context provided
Context = {results}
Question: {question}

Assistant:
"""

body = json.dumps({
                    "prompt": input,
                    "max_tokens_to_sample":4096,
                    "temperature":0.5,
                    "top_k":250,
                    "top_p":0.5,
                    "stop_sequences": ["\n\nHuman:"]
                  }) 


modelId = 'anthropic.claude-instant-v1'
#modelId = 'anthropic.claude-v2' # change this to use a different version from the model provider
accept = 'application/json'
contentType = 'application/json'

response = boto3_bedrock.invoke_model(body=body, modelId=modelId, accept=accept, contentType=contentType)
response_body = json.loads(response.get('body').read())

print(response_body.get('completion'))

 I'm afraid I don't have enough context to answer the question. Could you please provide more details about the results and/or question?
