In [None]:
pip install sentence_transformers

The embeddings are created and stored locally for retrival later. This allows us to generate the embeddings once and re-use it without having to regenerate it again.

Now that the embeddings have been generated and stored locally, we can run inferences on the model using RAG. In this process, we first will perform a similarity search on FIASS to generate context for the query, and then submit the query and the generated machine learning model to answer the query. 

In [1]:
FAISS_INDEX_DIR = "faiss_index"

In [2]:
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings()

new_db = FAISS.load_local(FAISS_INDEX_DIR, embeddings)

In [9]:
#query = "what is the concept for tokens"
query = """my airpods are not connecting to the iphone, how do I fix that """


In [10]:
from typing import List
from typing import Dict
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain import SagemakerEndpoint
from langchain.llms.sagemaker_endpoint import LLMContentHandler
from langchain.chains.question_answering import load_qa_chain
import json

from langchain.docstore.document import Document

class ContentHandler(LLMContentHandler):
    content_type = "application/json"
    accepts = "application/json"

    def transform_input(self, inputs: list[str], model_kwargs: Dict) -> bytes:
        input_str = json.dumps({"inputs": inputs, **model_kwargs})
        return input_str.encode("utf-8")

    def transform_output(self, output: bytes) -> List[List[float]]:
        response_json = json.loads(output.read().decode("utf-8"))
#        return response_json["vectors"]
        return response_json[0]["generated_text"]


content_handler = ContentHandler()



In [11]:
llm2 = SagemakerEndpoint(
    endpoint_name="hf-llm-falcon-40b-instruct-bf16-2023-06-23-19-34-33-102",
    #endpoint_name="hf-llm-falcon-7b-bf16-2023-06-24-20-08-14-262",
    #endpoint_name="hf-llm-falcon-40b-bf16-2023-06-24-20-20-44-608",
    model_kwargs={
         "parameters" : {"do_sample": False,
        "top_p": 0.9,
        "temperature": 0.1,
        "max_new_tokens": 400
                  }},
    region_name="us-east-1",
    content_handler=content_handler
)



In [12]:
#docs = [Document(page_content=example_doc,)]


#llm_query = """Using the text provided above answer the question """ + query
llm_query= query

prompt_template = """
{context}
>>QUESTION<<: {question}
>>ANSWER<<:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)



In [13]:
#output = llm2(prompt)
#print (output)

chain = load_qa_chain(llm=llm2, prompt=PROMPT)

#uncomment the below line to see how the model answers the question when not providing context 
#docs = [Document(page_content=" ",    )]
docs = new_db.similarity_search(llm_query)

output = chain({"input_documents":docs, "question": llm_query}, return_only_outputs=False)
print(output)


{'input_documents': [Document(page_content="If you need help connecting to your AirPods, learn what to do.\n\n## If you can't connect to your iPhone, iPad, or iPod touch\n\n  1. Make sure that your iPhone or iPod touch has the latest version of iOS or that your iPad has the latest version of iPadOS.\n  2. Put both AirPods in the charging case and make sure that both AirPods are charging.\n  3. To make sure that Bluetooth is on, go to Settings > Bluetooth.\n  4. If your AirPods are connected, make sure that they're selected as your audio device. If your AirPods appear in the list of devices but they don't connect, go to the next step.\n  5. Close the lid, wait 15 seconds, then open the lid. Press and hold the setup button on the back of the charging case for up to 10 seconds. The status light on the front of the charging case should flash white, which means that your AirPods are ready to connect.", metadata={'source': 'alliphonedocs.txt'}), Document(page_content="6. Hold the charging ca