In [1]:
import logging

from langchain.chat_models import AzureChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS

In [2]:
loader = TextLoader("../../state_of_the_union.txt")

In [3]:
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(documents)

In [4]:
model_name = "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': 'cuda'}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [5]:
vector_db = FAISS.from_documents(docs, embeddings)

In [6]:
vector_db.save_local("faiss_index")

In [7]:
question = "What did the president say about economics?"
docs = vector_db.similarity_search(question)
docs[0].page_content

'We’re going after the criminals who stole billions in relief money meant for small businesses and millions of Americans.  \n\nAnd tonight, I’m announcing that the Justice Department will name a chief prosecutor for pandemic fraud. \n\nBy the end of this year, the deficit will be down to less than half what it was before I took office.  \n\nThe only president ever to cut the deficit by more than one trillion dollars in a single year. \n\nLowering your costs also means demanding more competition. \n\nI’m a capitalist, but capitalism without competition isn’t capitalism. \n\nIt’s exploitation—and it drives up prices. \n\nWhen corporations don’t have to compete, their profits go up, your prices go up, and small businesses and family farmers and ranchers go under. \n\nWe see it happening with ocean carriers moving goods in and out of America. \n\nDuring the pandemic, these foreign-owned companies raised prices by as much as 1,000% and made record profits.'

In [49]:
logging.basicConfig()
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vector_db.as_retriever(),
    llm=AzureChatOpenAI(temperature=0,
                        openai_api_key="02e3dbabaf334ccb959cbeadbd3f99c3",
                        openai_api_base="https://llm-x-gpt.openai.azure.com/",
                        deployment_name='LLM-X-GPT35-TURBO',
                        openai_api_version="2023-03-15-preview"
                        )
)

unique_docs = retriever_from_llm.get_relevant_documents(query=question)
len(unique_docs)

INFO:langchain.retrievers.multi_query:Generated queries: ["1. Can you provide any information on the president's statements regarding the field of economics?", "2. I'm interested in knowing the president's views and comments on the subject of economics. Could you share any relevant information?", '3. Could you please share any insights or remarks made by the president in relation to economics?']


6

In [55]:
INFO:langchain.retrievers.multi_query

NameError: name 'langchain' is not defined

In [53]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=AzureChatOpenAI(temperature=0,
                        openai_api_key="02e3dbabaf334ccb959cbeadbd3f99c3",
                        openai_api_base="https://llm-x-gpt.openai.azure.com/",
                        deployment_name='LLM-X-GPT35-TURBO',
                        openai_api_version="2023-03-15-preview"
                        ),
    retriever=vector_db.as_retriever())

qa_chain({"query": question})

{'query': 'What did the president say about economics?',
 'result': "The president discussed several economic topics in the given context. Here are some key points he made:\n\n1. The president announced the appointment of a chief prosecutor for pandemic fraud, indicating a focus on combating the theft of relief money meant for small businesses and Americans.\n\n2. He highlighted his achievement of reducing the deficit by more than one trillion dollars in a single year, emphasizing his commitment to fiscal responsibility.\n\n3. The president emphasized the importance of competition in capitalism, stating that when corporations don't have to compete, it leads to exploitation, higher prices, and negative impacts on small businesses and farmers.\n\n4. He mentioned the issue of inflation and outlined his plan to fight it, which includes cutting the cost of prescription drugs, increasing domestic production, investing in infrastructure, and reducing the deficit.\n\n5. The president discussed