### Import OpenAI API key

In [2]:
import os
from secret_key import openai_key

os.environ['OPENAI_API_KEY'] = openai_key


### Data Extraction

In [1]:
from langchain.document_loaders import UnstructuredURLLoader

url_loader = UnstructuredURLLoader(urls=['https://en.wikipedia.org/wiki/Finance'])

data = url_loader.load()

### Text splitter

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_chunks = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

docs = text_chunks.split_documents(data)

In [25]:
len(docs)

65

In [26]:
docs[60].page_content

"Buffett, W.; Cunningham, L.A. (2009). The Essays of Warren Buffett: Lessons for Investors and Managers. John Wiley & Sons (Asia) Pte Limited. ISBN 978-0-470-82441-2.\n\nStanley, Thomas J.; Danko, W.D. (1998). The Millionaire Next Door. Gallery Books. ISBN 978-0-671-01520-6. LCCN 98046515.\n\nSoros, George (1988). The Alchemy of Finance: Reading the Mind of the Market. A Touchstone book. Simon & Schuster. ISBN 978-0-671-66238-7. LCCN 87004745.\n\nFisher, Philip Arthur (1996). Common Stocks and Uncommon Profits and Other Writings. Wiley Investment Classics. Wiley. ISBN 978-0-471-11927-2. LCCN 95051449.\n\nExternal links\n\n[edit]\n\nFinance at Wikipedia's sister projects\n\nDefinitions from Wiktionary\n\nMedia from Commons\n\nNews from Wikinews\n\nQuotations from Wikiquote\n\nTexts from Wikisource\n\nTextbooks from Wikibooks\n\nResources from Wikiversity\n\nFinance Definition (Investopedia)\n\nHypertextual Finance Glossary (Campbell Harvey)\n\nGlossary of financial risk management terms

### Create Embeddings and save it to FAISS

In [11]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS

# Create Embeddings of chunks using OpenAI embeddings
embeddings  = OpenAIEmbeddings()

# Pass the documents and embeddings inorder to create FAISS vector index
vectorstore_openai = FAISS.from_documents(docs, embeddings)

# Save FAISS index locally
file_path = 'faiss_store'
vectorstore_openai.save_local(file_path)

### RAG for information extraction

In [13]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.llms import OpenAI

# define llm
llm = OpenAI(temperature=0.7, max_tokens=500)

# Vector Database
vectorStore = FAISS.load_local(file_path, embeddings, allow_dangerous_deserialization=True)

# create RAG chain
chain = RetrievalQAWithSourcesChain.from_llm(llm=llm, retriever=vectorStore.as_retriever())



### Query to extract info

In [29]:
query = "what is finance?"
#query = 'Summarize the content'

result = chain({"question": query}, return_only_outputs=True)

In [30]:
result['answer']

' Finance is the management of monetary resources and the study of money, currency, assets and liabilities. It involves the raising and administering of funds and the relationships between profit-seeking enterprises and fund suppliers. It also includes various subfields such as asset management, money management, risk management, investment management, and financial analysis. Its history dates back to ancient civilizations and has evolved into a complex and multidisciplinary field. \n'