In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
from dotenv import load_dotenv, find_dotenv
import nest_asyncio
import warnings

_ = load_dotenv(find_dotenv())
nest_asyncio.apply()
warnings.filterwarnings('ignore')

## Instantiate signature

In [22]:
import dspy

bedrock = dspy.Bedrock(region_name="us-east-1")
lm = dspy.AWSAnthropic(bedrock, 
                       "anthropic.claude-3-5-sonnet-20240620-v1:0")
dspy.settings.configure(lm=lm)

In [4]:
class GenerateAnswer(dspy.Signature):
    """Answers questions with short factoid answers."""
    
    context_str = dspy.InputField(desc="contains relevant facts")
    query_str = dspy.InputField()
    answer = dspy.OutputField(desc = "Often between 3 to 5 sentences.")

## Instantiate retriever

In [5]:
from llama_index.readers.web import SimpleWebPageReader
from llama_index.core import (
    Settings,
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage
)
from llama_index.llms.bedrock_converse import BedrockConverse
from llama_index.embeddings.bedrock import BedrockEmbedding
from IPython.display import Markdown, display

## Creating Retriever

In [6]:
Settings.llm = BedrockConverse(
    model = "anthropic.claude-3-5-sonnet-20240620-v1:0",
    aws_access_key_id = os.environ["AWS_ACCESS_KEY"],
    aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"],
    region_name = "us-east-1"
)
Settings.embed_model = BedrockEmbedding(
    model = "amazon.titan-embed-text-v1",
    aws_access_key_id = os.environ["AWS_ACCESS_KEY"],
    aws_secret_access_key = os.environ["AWS_SECRET_ACCESS_KEY"],
    aws_region_name = os.environ["AWS_DEFAULT_REGION"]
)

In [7]:
from llama_index.readers.web import SimpleWebPageReader
import requests
from llama_index.core import Document

class CustomWebPageReader(SimpleWebPageReader):
    """
    Many websites, including Investopedia, require headers like User-Agent to be set in the request to return the correct content.
    To fix this, we'll modify the load_data method in the SimpleWebPageReader class to include appropriate headers.
    """
    
    def load_data(self, urls):
        if not isinstance(urls, list):
            raise ValueError("urls must be a list of strings.")
        documents = []
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
        }
        for url in urls:
            response = requests.get(url, headers=headers).text
            if self.html_to_text:
                import html2text
                response = html2text.html2text(response)

            metadata = None
            if self._metadata_fn is not None:
                metadata = self._metadata_fn(url)

            documents.append(Document(text=response, id_=url, metadata=metadata or {}))

        return documents

links = [
    "https://www.investopedia.com/terms/s/stockmarket.asp",
    "https://www.investopedia.com/ask/answers/difference-between-options-and-futures/",
    "https://www.investopedia.com/financial-edge/0411/5-essential-things-you-need-to-know-about-every-stock-you-buy.aspx",
    "https://www.investopedia.com/articles/fundamental/04/063004.asp",
    "https://www.investopedia.com/terms/t/technicalanalysis.asp",
    "https://www.investopedia.com/terms/i/ichimoku-cloud.asp",
    "https://www.investopedia.com/terms/a/aroon.asp",  
    "https://www.investopedia.com/terms/b/bollingerbands.asp",
    "https://www.investopedia.com/articles/forex/05/macddiverge.asp",
    "https://www.investopedia.com/terms/a/accumulationdistribution.asp",
    "https://www.investopedia.com/terms/s/stochasticoscillator.asp",
    "https://www.investopedia.com/terms/s/stochrsi.asp",
    "https://www.investopedia.com/terms/p/price-earningsratio.asp",
    "https://www.investopedia.com/terms/p/price-to-bookratio.asp",
    "https://www.investopedia.com/terms/p/price-to-salesratio.asp",
    "https://www.investopedia.com/terms/q/quickratio.asp"
]

In [8]:
docs = CustomWebPageReader(
    html_to_text=True
).load_data(urls=links)

## Qdrant 

In [10]:
import qdrant_client
from llama_index.vector_stores.qdrant import QdrantVectorStore

In [11]:
client = qdrant_client.QdrantClient("http://localhost:6333")
aclient = qdrant_client.AsyncQdrantClient("http://localhost:6333")

if client.collection_exists("investopedia"):
    client.delete_collection("investopedia")

vector_store = QdrantVectorStore(
    collection_name="investopedia",
    client=client,
    aclient=aclient,
    fastembed_sparse_model="Qdrant/bm42-all-minilm-l6-v2-attentions",
)

Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

In [12]:
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    docs,
    embed_model = Settings.embed_model,
    storage_context = storage_context
)

Test

In [15]:
qe = index.as_query_engine(similarity_top_k = 4, sparse_top_k = 12, vector_store_query_mode = "hybrid")
response = qe.query("Which is more important, the Aroon indicator or the stochastic RSI?")
display(Markdown(f"<b>{response}</b>"))

<b>It's difficult to say that one indicator is definitively more important than the other, as they serve different purposes and can be useful in different situations. 

The Stochastic RSI is a momentum oscillator that measures the level of the RSI relative to its high-low range over a set time period. It can be helpful for identifying overbought and oversold conditions as well as potential trend reversals.

The Aroon indicator, on the other hand, is designed to identify the beginning of new trends and measure the strength of a trend. It consists of two lines - the Aroon Up and Aroon Down - which can signal when an uptrend or downtrend may be starting.

Both indicators can provide valuable information to traders, but they analyze different aspects of price action. The choice between them would depend on a trader's specific strategy, the market being analyzed, and the type of signals they are looking for. Many traders use multiple indicators in combination to get a more comprehensive view of market conditions.

Ultimately, the importance of any indicator depends on how well it aligns with an individual trader's needs and trading style. It's generally recommended to thoroughly test and understand any indicator before relying on it for trading decisions.</b>

In [9]:
# index = VectorStoreIndex.from_documents(
#     docs,
#     embed_model=Settings.embed_model,
# )

In [8]:
# storage_dir = "./VectorIndex"

In [36]:
# index.storage_context.persist(
#     persist_dir = storage_dir
# )

In [11]:
# def load_index(persist_dir=storage_dir, links=links):
#     if os.path.exists(persist_dir):
#         storage_context = StorageContext.from_defaults(
#             persist_dir = storage_dir 
#         )
#         return load_index_from_storage(storage_context)
    
#     docs = CustomWebPageReader(
#         html_to_text=True
#     ).load_data(urls=links)
#     return VectorStoreIndex.from_documents(docs)

In [30]:
# index = load_index()

def load_index(embed_model,
               collection_name: str = "investopedia", 
               client_port: int = 6333):
    import qdrant_client
    from llama_index.vector_stores.qdrant import QdrantVectorStore
    
    client = qdrant_client.QdrantClient(f"http://localhost:{client_port}")
    aclient = qdrant_client.AsyncQdrantClient(f"http://localhost:{client_port}")
    vector_store = QdrantVectorStore(
        collection_name = collection_name,
        client = client,
        aclient = aclient,
        fastembed_sparse_model="Qdrant/bm42-all-minilm-l6-v2-attentions",
    )
    return VectorStoreIndex.from_vector_store(
        vector_store,
        embed_model = embed_model
    )
    
index = load_index(
    embed_model = Settings.embed_model,
)

Both client and aclient are provided. If using `:memory:` mode, the data between clients is not synced.


Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

Fetching 6 files:   0%|          | 0/6 [00:00<?, ?it/s]

In [31]:
retriever = index.as_retriever(
    similarity_top_k = 10,
    sparse_top_k =10,
)

## Build query pipeline

In [17]:
from llama_index.core.query_pipeline import (
    QueryPipeline as QP,
    InputComponent,
    FnComponent
)
from dspy.predict.llamaindex import (
    DSPyComponent,
    LlamaIndexModule
)

In [18]:
contexts = retriever.retrieve("What is technical analysis")

In [19]:
"\n\n".join([n.get_content() for n in contexts])

'* [Technical analysis](https://www.investopedia.com/terms/t/technical-analyst.asp) may be contrasted with fundamental analysis, which focuses on a company\'s financials rather than historical price patterns or stock trends.\n  * Technical analysis was introduced by Charles Dow.\n\n![Technical\nAnalysis](https://www.investopedia.com/thmb/IZujvEzbNudSvo4Z1vXcHHvgzBE=/1500x0/filters:no_upscale\\(\\):max_bytes\\(150000\\):strip_icc\\(\\)/Technical_Analysis_Final-4a96fc1863cf4dbc8a5c6f315ee49871.jpg)\n\nInvestopedia / Candra Huff\n\n##  Understanding Technical Analysis\n\nTechnical analysis is used to scrutinize the ways supply and demand for a\nsecurity affect changes in price, volume, and implied volatility. It assumes\nthat past trading activity and price changes of a\n[security](https://www.investopedia.com/terms/s/security.asp) can be valuable\nindicators of the security\'s future price movements when paired with\nappropriate investing or trading rules.\n\nTechnical analysis\' various

In [23]:
dspy_component = DSPyComponent(
    dspy.ChainOfThought(GenerateAnswer)
)
retriever_post = FnComponent(
    lambda contexts: "\n\n".join([n.get_content("") for n in contexts])
)
p = QP(verbose=True)
p.add_modules(
    {
        "input": InputComponent(),
        "retriever": retriever,
        "retriever_post": retriever_post,
        "synthesizer": dspy_component,
    }
)
p.add_link("input", "retriever")
p.add_link("retriever", "retriever_post")
p.add_link("input", "synthesizer", dest_key="query_str")
p.add_link("retriever_post", "synthesizer", dest_key="context_str")

dspy_qp = LlamaIndexModule(p)

In [24]:
output = dspy_qp(
    query_str = "What is the difference between an option and a future?"
)
output

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What is the difference between an option and a future?

[0m[1;3;38;2;155;135;227m> Running module retriever with input: 
input: What is the difference between an option and a future?

[0m[1;3;38;2;155;135;227m> Running module retriever_post with input: 
contexts: [NodeWithScore(node=TextNode(id_='557709ad-c288-404d-92d1-48d1c072549d', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.S...

[0m[1;3;38;2;155;135;227m> Running module synthesizer with input: 
query_str: What is the difference between an option and a future?
context_str: Futures: What’s the Difference?
](https://www.investopedia.com/ask/answers/difference-between-options-and-
futures/)

Partner Links

Related Terms

[Sector Breakdown: What It Is and How It's
Used](htt...

[0m

Prediction(
    answer='The key difference is that options give the buyer the right, but not the obligation, to buy or sell an asset at a specific price before expiration, while futures create an obligation for both parties to transact at the agreed price on the expiration date. Options require paying a premium upfront and have limited downside risk for buyers, while futures have lower initial costs but potentially unlimited risk. Options are more flexible in how they can be settled, while futures contracts must be fulfilled or offset at expiration. Options are available on a wider range of underlying assets compared to futures.'
)

## Optimize query pipeline

Harvested q&A training examples

In [25]:
from dspy import Example

train_examples = [
    Example(
        query_str = "What is the difference between an option and a future?",
        answer = """An option gives the buyer the right, but not the obligation, to buy (or sell) an asset at a specific price at any time during the life of the contract.
        A futures contract obligates the buyer to purchase a specific asset, and the seller to sell and deliver that asset, at a specific future date
        """
    ),
    Example(
        query_str = "What are the similarities between an option and a future?",
        answer = """Futures and options positions may be traded and closed ahead of expiration, but the parties to the futures contracts for commodities are typically obligated to make and accept deliveries on the settlement date."""
    ),
    Example(
        query_str = "What is an option?",
        answer = "Options are financial derivatives. An options contract gives an investor the right to buy or sell the underlying instrument at a specific price while the contract is in effect. Investors may choose not to exercise their options. Option holders do not own the underlying shares or enjoy shareholder rights unless they exercise an option to buy stock."
    ),
    Example(
        query_str = "What are the different options?",
        answer = "There are only two kinds of options: Call options and put options. A call option confers the right to buy a stock at the strike price before the agreement expires. A put option gives the holder the right to sell a stock at a specific price."
    ),
    Example(
        query_str="What's P/E ratio?",
        answer = """ This ratio is used to measure a company's current share price relative to its per-share earnings. The company can be compared to other, similar corporations so that analysts and investors can determine its relative value. So if a company has a P/E ratio of 20, this means investors are willing to pay $20 for every $1 per earnings. That might seem expensive but not if the company is growing fast. The P/E can be found by comparing the current market price to the cumulative earnings of the last four quarters."""
    ),
    Example(
        query_str="What's a dividend?",
        answer = """Dividends are like interest in a savings account—you get paid regardless of the stock price. Dividends are distributions made by a company to its shareholders as a reward from its profits. The amount of the dividend is decided by its board of directors and are generally issued in cash, though it isn't uncommon for some companies to issue dividends in the form of stock shares."""
    ),
    Example(
        query_str="What's a balance sheet?",
        answer=" A balance sheet is a financial statement that reports a company's assets, liabilities and shareholder equity at a specific point in time"
    ),
    Example(
        query_str="What's a current ratio?",
        answer = "It's the total current assets divided by total current liabilities, commonly used by analysts to assess the ability of a company to meet its short-term obligations"
    ),
    Example(
        query_str = "What are stocks?",
        answer = "When you buy a stock or a share, you're getting a piece of that company. Owning shares gives you the right to part of the company's profits, often paid as dividends, and sometimes the right to vote on company matters"
    ),
    Example(
        query_str = "What are REITs?",
        answer="Real estate investment trusts (REITs) are companies that own, manage, or finance real estate. Investors can buy shares in them, and they legally must provide 90% of their profits as dividends each year."
    ),
    Example(
        query_str = "What are brokers?",
        answer = "Brokers in the stock market play the same role as in insurance and elsewhere, acting as a go-between for investors and the securities markets. They are licensed organizations that buy and sell stocks and other securities for individual and institutional clients."
    ),
    Example(
        query_str = "What is technical analysis?",
        answer = "Technical analysis is used to scrutinize the ways supply and demand for a security affect changes in price, volume, and implied volatility. It assumes that past trading activity and price changes of a security can be valuable indicators of the security's future price movements when paired with appropriate investing or trading rules."
    ),
    Example(
        query_str="What is the difference between fundamental and technical analysis?",
        answer = "Fundamental analysis is a method of evaluating securities by attempting to measure the intrinsic value of a stock. Fundamental analysts study everything from the overall economy and industry conditions to the financial condition and management of companies. Technical analysis differs from fundamental analysis in that the stock's price and volume are the only inputs. The core assumption is that all publicly known fundamentals have factored into price; thus, there is no need to pay close attention to them. Technical analysts do not attempt to measure a security's intrinsic value, but instead, use stock charts to identify patterns and trends that suggest how a stock's price will move in the future."
    )
]

In [26]:
train_examples = [t.with_inputs("query_str") for t in train_examples]

Finetune

In [27]:
from dspy.teleprompt import BootstrapFewShot
from llama_index.core.evaluation import SemanticSimilarityEvaluator

evaluator = SemanticSimilarityEvaluator(similarity_threshold=0.5)

def validate_context_and_answer(example, pred, trace=None):
    result = evaluator.evaluate(
        response = pred.answer,
        reference=example.answer
    )
    return result.passing

In [28]:
teleprompter = BootstrapFewShot(
    max_labeled_demos=0,
    metric=validate_context_and_answer
)
compiled_dspy_qp = teleprompter.compile(dspy_qp, trainset=train_examples)

  0%|          | 0/13 [00:00<?, ?it/s]

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What is the difference between an option and a future?

[0m[1;3;38;2;155;135;227m> Running module retriever with input: 
input: What is the difference between an option and a future?

[0m

[2m2024-07-23T00:00:49.497497Z[0m [[31m[1merror    [0m] [1mFailed to run or to evaluate example Example({'query_str': 'What is the difference between an option and a future?', 'answer': 'An option gives the buyer the right, but not the obligation, to buy (or sell) an asset at a specific price at any time during the life of the contract.\n        A futures contract obligates the buyer to purchase a specific asset, and the seller to sell and deliver that asset, at a specific future date\n        '}) (input_keys={'query_str'}) with <function validate_context_and_answer at 0x7ff7d01ada80> due to 'QdrantVectorStore' object has no attribute '_client'.[0m [[0m[1m[34mdspy.teleprompt.bootstrap[0m][0m [36mfilename[0m=[35mbootstrap.py[0m [36mlineno[0m=[35m211[0m
  8%|▊         | 1/13 [00:01<00:14,  1.21s/it]

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What are the similarities between an option and a future?

[0m[1;3;38;2;155;135;227m> Running module retriever with input: 
input: What are the similarities between an option and a future?

[0m

[2m2024-07-23T00:00:49.793248Z[0m [[31m[1merror    [0m] [1mFailed to run or to evaluate example Example({'query_str': 'What are the similarities between an option and a future?', 'answer': 'Futures and options positions may be traded and closed ahead of expiration, but the parties to the futures contracts for commodities are typically obligated to make and accept deliveries on the settlement date.'}) (input_keys={'query_str'}) with <function validate_context_and_answer at 0x7ff7d01ada80> due to 'QdrantVectorStore' object has no attribute '_client'.[0m [[0m[1m[34mdspy.teleprompt.bootstrap[0m][0m [36mfilename[0m=[35mbootstrap.py[0m [36mlineno[0m=[35m211[0m
 15%|█▌        | 2/13 [00:01<00:07,  1.48it/s]

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What is an option?

[0m[1;3;38;2;155;135;227m> Running module retriever with input: 
input: What is an option?

[0m

[2m2024-07-23T00:00:50.314657Z[0m [[31m[1merror    [0m] [1mFailed to run or to evaluate example Example({'query_str': 'What is an option?', 'answer': 'Options are financial derivatives. An options contract gives an investor the right to buy or sell the underlying instrument at a specific price while the contract is in effect. Investors may choose not to exercise their options. Option holders do not own the underlying shares or enjoy shareholder rights unless they exercise an option to buy stock.'}) (input_keys={'query_str'}) with <function validate_context_and_answer at 0x7ff7d01ada80> due to 'QdrantVectorStore' object has no attribute '_client'.[0m [[0m[1m[34mdspy.teleprompt.bootstrap[0m][0m [36mfilename[0m=[35mbootstrap.py[0m [36mlineno[0m=[35m211[0m
 23%|██▎       | 3/13 [00:02<00:06,  1.66it/s]

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What are the different options?

[0m[1;3;38;2;155;135;227m> Running module retriever with input: 
input: What are the different options?

[0m

[2m2024-07-23T00:00:50.860204Z[0m [[31m[1merror    [0m] [1mFailed to run or to evaluate example Example({'query_str': 'What are the different options?', 'answer': 'There are only two kinds of options: Call options and put options. A call option confers the right to buy a stock at the strike price before the agreement expires. A put option gives the holder the right to sell a stock at a specific price.'}) (input_keys={'query_str'}) with <function validate_context_and_answer at 0x7ff7d01ada80> due to 'QdrantVectorStore' object has no attribute '_client'.[0m [[0m[1m[34mdspy.teleprompt.bootstrap[0m][0m [36mfilename[0m=[35mbootstrap.py[0m [36mlineno[0m=[35m211[0m
 31%|███       | 4/13 [00:02<00:05,  1.72it/s]

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What's P/E ratio?

[0m[1;3;38;2;155;135;227m> Running module retriever with input: 
input: What's P/E ratio?

[0m

 31%|███       | 4/13 [00:02<00:06,  1.40it/s]


AttributeError: 'QdrantVectorStore' object has no attribute '_client'

In [48]:
compiled_dspy_qp(query_str="What is a candlestick chart?")

[1;3;38;2;155;135;227m> Running module input with input: 
query_str: What is a candlestick chart?

[0m[1;3;38;2;155;135;227m> Running module retriever with input: 
input: What is a candlestick chart?

[0m[1;3;38;2;155;135;227m> Running module retriever_post with input: 
contexts: [NodeWithScore(node=TextNode(id_='a39a4a89-6346-4f02-a1ce-72026c743e78', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.S...

[0m[1;3;38;2;155;135;227m> Running module synthesizer with input: 
query_str: What is a candlestick chart?
context_str: [A trader looks at
computers at the New York Stock Exchange
](https://www.investopedia.com/thmb/21-q3vCQLoG4FvwkkF1IwTkSr9o=/400x300/filters:no_upscale\(\):max_bytes\(150000\):strip_icc\(\)/GettyImage...

[0m

Prediction(
    answer='A candlestick chart is a type of financial chart used in technical analysis that displays the high, low, open, and close prices of a security for a given time period. The key features of a candlestick chart are:\n\n1. The "body" of the candlestick represents the opening and closing prices, with the body being filled ('
)

## Get finetuned prompt and port it over to LlamaIndex 

In [56]:
from dspy.predict.llamaindex import DSPyPromptTemplate

qa_prompt_tmpl = DSPyPromptTemplate(
    compiled_dspy_qp.query_pipeline.module_dict["synthesizer"].predict_module
)

In [59]:
from IPython.display import display, Markdown

In [79]:
query_engine = index.as_query_engine(
    text_qa_template = qa_prompt_tmpl
)

response = query_engine.query(
    "What do investors look out for in a P/E ratio?"
)

display(Markdown(f"<b>{response}</b>"))

<b>The key points about P/E ratio from the context are:

1. The P/E (price-to-earnings) ratio measures a company's current share price relative to its per-share earnings. 

2. It is calculated by dividing a company's market value per share by its earnings per share.

3. The P/E ratio allows investors to compare the valuation of different companies. A higher P/E ratio generally indicates a company's stock is more expensive compared to its earnings.

4. A company with a higher P/E ratio than its peers may be justified if it is growing faster. A lower P/E ratio with fast growth could also be an attractive investment.

5. Comparing a company's P/E ratio to those of similar companies in the same industry can help determine if it is under- or over-valued.

In summary, the P/E ratio is a key metric used by investors to assess a company's valuation relative to its earnings performance and compared to its peers.</b>

In [82]:
with open("prompt.txt", "w") as file:
    file.write(qa_prompt_tmpl.get_template())
    file.close()

In [83]:
from llama_index.core import PromptTemplate

file = open("prompt.txt", "r")
tmpl = PromptTemplate(file.read())

qe = index.as_query_engine(
    text_qa_template = tmpl
)
response = query_engine.query(
    "What do investors look out for in a P/E ratio?"
)

display(Markdown(f"<b>{response}</b>"))

<b>The key points about P/E ratio from the context are:

1. The P/E (price-to-earnings) ratio measures a company's current share price relative to its per-share earnings. 

2. It is calculated by dividing a company's market value per share by its earnings per share.

3. The P/E ratio allows investors to compare the valuation of different companies. A higher P/E ratio generally indicates a company's stock is more expensive compared to its earnings.

4. A company with a higher P/E ratio than its peers may be justified if it is growing faster. A lower P/E ratio with fast growth could also be an attractive investment.

5. Comparing a company's P/E ratio to those of similar companies in the same industry helps determine if it is under- or over-valued.

In summary, the P/E ratio is a key metric used by investors to assess a company's valuation relative to its earnings performance and compared to its peers.</b>

In [88]:
from llama_index.core.tools import (
    QueryEngineTool,
    ToolMetadata
)

query_engine_tool = [
    QueryEngineTool(
        query_engine=query_engine,
        metadata = ToolMetadata(
            name="investopedia_tool",
            description = (
                "Provides conceptual information relating to the stock market, "
                "stock market instruments such as options and futures, "
                "as well as conceptual understanding for metrics used in "
                "technical analysis and fundamental analysis."
            )
        )
    )
]

In [91]:
from llama_index.core.agent import ReActAgent

agent = ReActAgent.from_tools(query_engine_tool, llm=Settings.llm, verbose=True)
response = agent.chat("What is the difference between technical and fundamental analysis")
display(Markdown(f"<b>{response}</b>"))

[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.
Action: investopedia_tool
Action Input: {'input': 'difference between technical and fundamental analysis'}
[0m[1;3;34mObservation: I do not have enough context to provide a meaningful answer to your query. The provided text discusses technical analysis, its underlying assumptions, and the differences between technical analysis and fundamental analysis. However, there is no specific question asked that I can directly respond to. If you have a more specific question related to the information provided, please feel free to ask, and I'll do my best to provide a helpful answer.
[0m[1;3;38;5;200mThought: I can answer the question about the difference between technical and fundamental analysis without using any more tools.
Answer: The main difference between technical analysis and fundamental analysis is the approach they take to evaluating and making investment decis

<b>The main difference between technical analysis and fundamental analysis is the approach they take to evaluating and making investment decisions.

Technical analysis focuses on studying the historical price and volume data of a security, with the goal of identifying patterns and trends that can be used to predict future price movements. Technical analysts believe that past market data, such as stock prices and trading volume, can be used to forecast future activity.

On the other hand, fundamental analysis looks at the underlying economic and financial factors that can influence a security's value, such as a company's financial statements, management, competitive position, and macroeconomic conditions. Fundamental analysts try to determine the intrinsic value of a security based on these factors, and then compare that to the current market price to identify undervalued or overvalued securities.

In summary, technical analysis is focused on the price action and trading patterns of a security, while fundamental analysis is focused on the underlying business and economic factors that can impact a security's value.</b>

## Export

In [93]:
%%writefile ../tools/prompt_optimization.py

import os
from dotenv import load_dotenv, find_dotenv
from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage
)
from llama_index.core.evaluation import SemanticSimilarityEvaluator
from llama_index.core.query_pipeline import (
    QueryPipeline as QP,
    InputComponent,
    FnComponent
)

import dspy
from dspy import Example
from dspy.predict.llamaindex import (
    DSPyComponent,
    LlamaIndexModule
)
from dspy.teleprompt import BootstrapFewShot
from dspy.predict.llamaindex import DSPyPromptTemplate

import sys
__curdir__ = os.getcwd()
if "tools" in __curdir__:
    sys.path.append(os.path.join(
        __curdir__,
        "../src"
    ))
    storage_dir = "../VectorIndex/"

else:
    sys.path.append("./src")
    storage_dir = "VectorIndex"
    
from utils import CustomWebPageReader
from llamaindex_config import llm, embed_model

### Define variables needed ###
_ = load_dotenv(find_dotenv())

bedrock = dspy.Bedrock(region_name="us-west-2")
lm = dspy.AWSAnthropic(bedrock, 
                       "anthropic.claude-3-haiku-20240307-v1:0")
dspy.settings.configure(lm=lm)
llm = llm
embed_model = embed_model
links = [
    "https://www.investopedia.com/terms/s/stockmarket.asp",
    "https://www.investopedia.com/ask/answers/difference-between-options-and-futures/",
    "https://www.investopedia.com/financial-edge/0411/5-essential-things-you-need-to-know-about-every-stock-you-buy.aspx",
    "https://www.investopedia.com/articles/fundamental/04/063004.asp",
    "https://www.investopedia.com/terms/t/technicalanalysis.asp"  
]
evaluator = SemanticSimilarityEvaluator(similarity_threshold=0.5)

### Define utility functions ###
class GenerateAnswer(dspy.Signature):
    """Answers questions with short factoid answers."""
    
    context_str = dspy.InputField(desc="contains relevant facts")
    query_str = dspy.InputField()
    answer = dspy.OutputField(desc = "Often between 3 to 5 sentences.")
    
def load_index(persist_dir=storage_dir, 
               links=links,
               embed_model=embed_model):
    """Helper function to create an index from data, persist an index 
    and load an index from storage"""
    if os.path.exists(persist_dir):
        storage_context = StorageContext.from_defaults(
            persist_dir = storage_dir 
        )
        return load_index_from_storage(storage_context)
    
    docs = CustomWebPageReader(
        html_to_text=True
    ).load_data(urls=links)
    index = VectorStoreIndex.from_documents(docs,
                                            embed_model=embed_model)
    index.storage_context.persist(persist_dir=storage_dir)
    return index

### Prompt Optimization Metric ###
def validate_context_and_answer(example, pred, trace=None):
    """The metric used to validate query engine results. Used as the
    optimization metric for prompt opetimization"""
    result = evaluator.evaluate(
        response = pred.answer,
        reference=example.answer
    )
    return result.passing

### Main function to execute ###
def main():
    """Main function used to train a text_qa_template for a query engine"""
    
    index = load_index()
    retriever = index.as_retriever(similarity_top_k = 2)
    dspy_component = DSPyComponent(
        dspy.ChainOfThought(GenerateAnswer)
    )
    retriever_post = FnComponent(
        lambda contexts: "\n\n".join([n.get_content() for n in contexts])
    )
    p = QP(verbose=True)
    p.add_modules(
        {
            "input": InputComponent(),
            "retriever": retriever,
            "retriever_post": retriever_post,
            "synthesizer": dspy_component,
        }
    )
    p.add_link("input", "retriever")
    p.add_link("retriever", "retriever_post")
    p.add_link("input", "synthesizer", dest_key="query_str")
    p.add_link("retriever_post", "synthesizer", dest_key="context_str")

    dspy_qp = LlamaIndexModule(p)
    
    train_examples = [
        Example(
            query_str = "What is the difference between an option and a future?",
            answer = """An option gives the buyer the right, but not the obligation, to buy (or sell) an asset at a specific price at any time during the life of the contract.
            A futures contract obligates the buyer to purchase a specific asset, and the seller to sell and deliver that asset, at a specific future date
            """
        ),
        Example(
            query_str = "What are the similarities between an option and a future?",
            answer = """Futures and options positions may be traded and closed ahead of expiration, but the parties to the futures contracts for commodities are typically obligated to make and accept deliveries on the settlement date."""
        ),
        Example(
            query_str = "What is an option?",
            answer = "Options are financial derivatives. An options contract gives an investor the right to buy or sell the underlying instrument at a specific price while the contract is in effect. Investors may choose not to exercise their options. Option holders do not own the underlying shares or enjoy shareholder rights unless they exercise an option to buy stock."
        ),
        Example(
            query_str = "What are the different options?",
            answer = "There are only two kinds of options: Call options and put options. A call option confers the right to buy a stock at the strike price before the agreement expires. A put option gives the holder the right to sell a stock at a specific price."
        ),
        Example(
            query_str="What's P/E ratio?",
            answer = """ This ratio is used to measure a company's current share price relative to its per-share earnings. The company can be compared to other, similar corporations so that analysts and investors can determine its relative value. So if a company has a P/E ratio of 20, this means investors are willing to pay $20 for every $1 per earnings. That might seem expensive but not if the company is growing fast. The P/E can be found by comparing the current market price to the cumulative earnings of the last four quarters."""
        ),
        Example(
            query_str="What's a dividend?",
            answer = """Dividends are like interest in a savings account—you get paid regardless of the stock price. Dividends are distributions made by a company to its shareholders as a reward from its profits. The amount of the dividend is decided by its board of directors and are generally issued in cash, though it isn't uncommon for some companies to issue dividends in the form of stock shares."""
        ),
        Example(
            query_str="What's a balance sheet?",
            answer=" A balance sheet is a financial statement that reports a company's assets, liabilities and shareholder equity at a specific point in time"
        ),
        Example(
            query_str="What's a current ratio?",
            answer = "It's the total current assets divided by total current liabilities, commonly used by analysts to assess the ability of a company to meet its short-term obligations"
        ),
        Example(
            query_str = "What are stocks?",
            answer = "When you buy a stock or a share, you're getting a piece of that company. Owning shares gives you the right to part of the company's profits, often paid as dividends, and sometimes the right to vote on company matters"
        ),
        Example(
            query_str = "What are REITs?",
            answer="Real estate investment trusts (REITs) are companies that own, manage, or finance real estate. Investors can buy shares in them, and they legally must provide 90% of their profits as dividends each year."
        ),
        Example(
            query_str = "What are brokers?",
            answer = "Brokers in the stock market play the same role as in insurance and elsewhere, acting as a go-between for investors and the securities markets. They are licensed organizations that buy and sell stocks and other securities for individual and institutional clients."
        ),
        Example(
            query_str = "What is technical analysis?",
            answer = "Technical analysis is used to scrutinize the ways supply and demand for a security affect changes in price, volume, and implied volatility. It assumes that past trading activity and price changes of a security can be valuable indicators of the security's future price movements when paired with appropriate investing or trading rules."
        ),
        Example(
            query_str="What is the difference between fundamental and technical analysis?",
            answer = "Fundamental analysis is a method of evaluating securities by attempting to measure the intrinsic value of a stock. Fundamental analysts study everything from the overall economy and industry conditions to the financial condition and management of companies. Technical analysis differs from fundamental analysis in that the stock's price and volume are the only inputs. The core assumption is that all publicly known fundamentals have factored into price; thus, there is no need to pay close attention to them. Technical analysts do not attempt to measure a security's intrinsic value, but instead, use stock charts to identify patterns and trends that suggest how a stock's price will move in the future."
        )
    ]
    train_examples = [t.with_inputs("query_str") for t in train_examples]
    
    teleprompter = BootstrapFewShot(
        max_labeled_demos=0,
        metric=validate_context_and_answer
    )
    compiled_dspy_qp = teleprompter.compile(dspy_qp, trainset=train_examples)
    qa_prompt_tmpl = DSPyPromptTemplate(
        compiled_dspy_qp.query_pipeline.module_dict["synthesizer"].predict_module
    )
    with open("prompt.txt", "w") as file:
        file.write(qa_prompt_tmpl.get_template())
        file.close()

if __name__ == "__main__":
    main()
    print("Execution complete. Optimized prompt saved to 'prompt.txt'")

Overwriting prompt_optimization.py


In [92]:
%%writefile ../tools/rag_tools.py

import os
from .prompt_optimization import load_index

import sys
__curdir__ = os.getcwd()
if "tools" in __curdir__:
    sys.path.append(os.path.join(
        __curdir__,
        "../src"
    ))
else:
    sys.path.append("./src")
from llamaindex_config import llm, embed_model

from llama_index.core.tools import (
    QueryEngineTool,
    ToolMetadata
)
from llama_index.core import PromptTemplate

def get_rag_tools(prompt_path: str):
    """Returns a query engine tool with a DSPy optimized prompt template"""
    
    file = open(prompt_path, "r")
    file = open("prompt.txt", "r")
    tmpl = PromptTemplate(file.read())
    index = load_index()
    query_engine = index.as_query_engine(
        text_qa_tmpl = tmpl,
        llm = llm,
        embed_model=embed_model
    )
    query_engine_tool = [
        QueryEngineTool(
            query_engine=query_engine,
            metadata = ToolMetadata(
                name="investopedia_tool",
                description = (
                    "Provides conceptual information relating to the stock market, "
                    "stock market instruments such as options and futures, "
                    "as well as conceptual understanding for metrics used in "
                    "technical analysis and fundamental analysis."
                )
            )
        )
    ]
    return query_engine_tool


Writing rag_tools.py
