In [24]:
from langchain.document_loaders import HuggingFaceDatasetLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from transformers import AutoTokenizer, pipeline
from langchain.chains import RetrievalQA
import sys
import pandas as pd
from langchain_huggingface import HuggingFacePipeline
from langchain_huggingface import HuggingFaceEmbeddings
import torch
import os
from langchain_core.documents import Document

## Load Documents

In [2]:
directory = "../utils/investopedia-dictionary"
documents = []
for filename in os.listdir(directory):
    if filename.endswith(".txt"):
        file_path = os.path.join(directory, filename)
        with open(file_path, 'r', encoding='utf-8') as file:
            document = Document(page_content=file.read(), metadata={"source": filename[:-4]})
            documents.append(document)

print(f'Loaded {len(documents)} documents\n')
# Print the first few entries as a sample
for i, document in enumerate(documents[:5]):
    print(f"Text {i+1}:\n{document.page_content[:256]}...\n")  # Print the first 256 characters for preview

Loaded 6286 documents

Text 1:
What Is the Volcker Rule? The Volcker Rule is a federal regulation that generally prohibits banks from conducting certain investment activities with their own accounts and limits their dealings with hedge funds and privateequity funds, also called covered ...

Text 2:
What Is a Global Registered Share (GRS)? A global registered share (GRS), or a global share, is a security that is issued in the United States, but it is registered in multiple markets around the world and trades in multiple currencies. With global shares,...

Text 3:
Volatility is a statistical measure of returns for a given security or market index.What Is Volatility? Volatility is a statistical measure of the dispersion of returns for a given security or market index. It is often measured from either the standard dev...

Text 4:
What Is a Bid? The term bid refers to an offer made by an individual orcorporationto purchase an asset. Buyers commonly make bids at auctions and in various marke

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
fragments = text_splitter.split_documents(documents)
print(f'Created {len(fragments)} fragments')

Created 50676 fragments


## Document Transformation

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-l12-v2",     # Provide the pre-trained model's path
    model_kwargs={'device':device}, # Pass the model configuration options
    encode_kwargs={'normalize_embeddings': False} # Pass the encoding options
)

db = FAISS.from_documents(fragments, embeddings)
# db = FAISS.load_local(save_directory, embeddings) ##untested##

  return torch._C._cuda_getDeviceCount() > 0


### Test Document Retrieval

In [5]:
question = "Are mutual funds safe investment?"
searchDocs = db.similarity_search(question)
print(searchDocs[0].page_content)

from the income the fund has earned by the securities they hold. If the fund holds bonds, then it will earn interest on them.However, returns are not guaranteed, and the performance of a mutual fund depends on market conditions, the fund's management, what assets it holds, and its investment strategy.What Are the Risks of Mutual Funds?Depending on the assets they hold, mutual funds carry several investment risks, including market, interest rate, andmanagement risk. Market risk arises from the potential decline in the value of the securities within the fund. Interest rate risk affects funds holding bonds and other fixed-income securities, as rising interest rates can lead to a decrease in bond prices.Management risk is linked to the performance of the fund's management team. You are putting your money in their hands, and poor investment decisions will negatively impact your returns. Before investing, it's important for investors to carefully review the fund's prospectus and consider


## Preparing the LLM Model

In [9]:
llm_model_name = "Intel/dynamic_tinybert"

# Create a tokenizer object by loading the pretrained "Intel/dynamic_tinybert" tokenizer.
tokenizer = AutoTokenizer.from_pretrained(llm_model_name)

# Create a question-answering model object by loading the pretrained "Intel/dynamic_tinybert" model.
model = AutoModelForQuestionAnswering.from_pretrained(llm_model_name)

# Define a question-answering pipeline using the model and tokenizer
question_answerer = pipeline(
    "question-answering", 
    model=llm_model_name, 
    tokenizer=tokenizer,
    return_tensors='pt',
    device=device
)

# Create an instance of the HuggingFacePipeline, which wraps the question-answering pipeline
# with additional model-specific arguments (temperature and max_length)
llm = HuggingFacePipeline(
    pipeline=question_answerer,
    model_kwargs={"temperature": 0.9, "max_length": 512},
)

### Retrieval QA Chain

In [10]:
# Create a retriever object from the 'db' with a search configuration where it retrieves up to 4 relevant splits/documents.
retriever = db.as_retriever(search_kwargs={"k": 4})

# Create a question-answering instance (qa) using the RetrievalQA class.
# It's configured with a language model (llm), a chain type "refine," the retriever we created, and an option to not return source documents.
qa = RetrievalQA.from_chain_type(llm=llm, chain_type="refine", retriever=retriever, return_source_documents=False)

In [11]:
def err_remove(er):
    lin = "------------"
    er = str(er)
    start_index = er.find(lin) + len(lin)
    end_index = er.rfind(lin)
    answer = er[start_index:end_index].strip()
    return answer

In [20]:
question = "What is stock exchange?"
answer = None
try:
    result = qa.invoke({"query": question})
    answer = result["result"]
except:
    _,error,_ = sys.exc_info()
    #answer = err_remove(error).encode('utf-8').decode('unicode_escape')[1:]
    answer = err_remove(error)

print(answer)

The stock market is made up of investors buying, selling, and trading shares of companies, reflecting these firms' collective value and performance.The stock market as a whole is an exchange mechanism that helps investors buy and sell shares in publicly traded companies. Though you can visit the New York Stock Exchange (NYSE) and offices of the NASDAQ, these are just components in a broader marketplace. Trades are conducted mostly through electronic means between participants who are remote from each other. The mechanism is an excellent means for businesses to raise capital from investors. Additionally, analysts closely examine its traded prices for signals of economic strength or weakness.Key TakeawaysThe stock market is defined as the collective trading network involving company shares and their derivatives.The stock market, is a central part of modern economies since it's where companies raise vast sums of money to accelerate successful startups, expand existing businesses, or


## Save DB

In [23]:
db.save_local('./models/investopedia_faiss')