In [None]:
import os
import pickle
import langchain
from langchain.llms import OpenAI
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.chains.qa_with_sources.loading import load_qa_with_sources_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

In [None]:
class DictDocumentStore:
    def __init__(self, documents):
        self.documents = documents

    def search(self, doc_id):
        return self.documents.get(doc_id, None)

In [None]:
os.environ['OPENAI_API_KEY'] = 'xxxxxxxxxxxxxxxxxxxxxx'

In [None]:
llm = OpenAI(temperature=0.9, max_tokens=100)

In [None]:
loaders = UnstructuredURLLoader(urls=[
    "https://finance.yahoo.com/news/jpmorgan-says-sp-500-will-fall-next-year-amid-challenging-macro-backdrop-201119875.html",
    "https://finance.yahoo.com/news/richest-country-world-now-bankrupt-141245951.html",
    "https://finance.yahoo.com/news/buffetts-berkshire-hathaway-says-haslams-192514140.html"
])
data = loaders.load() 
len(data)

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
        separators=['\n\n', '\n', '.', ','],
        chunk_size=1000,
        chunk_overlap=300
    )
docs = text_splitter.split_documents(data)

In [None]:
embeddings = OpenAIEmbeddings()

vectorindex_openai = FAISS.from_documents(docs, embeddings)

In [None]:
import faiss 
index = vectorindex_openai.index
metadata = {
   "d": index.d,
   "ntotal": index.ntotal
}

file_path = "vector_index.pkl"
with open(file_path, "wb") as f:
    pickle.dump((index, metadata), f)

In [None]:
file_path = "vector_index.pkl"
with open(file_path, "rb") as f:
    loaded_index, loaded_metadata = pickle.load(f)


docstore = DictDocumentStore({i: doc for i, doc in enumerate(docs)})
index_to_docstore_id = {i: i for i in range(len(docs))}

embedding_function = OpenAIEmbeddings()

vectorindex_openai = FAISS(index=loaded_index, docstore=docstore, index_to_docstore_id=index_to_docstore_id, embedding_function=embedding_function)

In [None]:
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorindex_openai.as_retriever())
chain

In [None]:
query = " who led the JPMorgan equity strategists?"
chain({"question": query}, return_only_outputs=True)

In [None]:
query = " what does the robert kiyosakhi tells about USA economy"
chain({"question": query}, return_only_outputs=True)