In [1]:
import logging
import os
from typing import Iterator, Optional
import openai

from llama_index.query_engine.retriever_query_engine import RetrieverQueryEngine
from llama_index.callbacks.base import CallbackManager
from langchain.chat_models import ChatOpenAI
import chainlit as cl
from dotenv import load_dotenv

from llama_index.retrievers import VectorIndexRetriever
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.indices.postprocessor import SimilarityPostprocessor
from llama_index.chat_engine.types import BaseChatEngine, ChatMode

from llama_index import (
    LLMPredictor,
    ServiceContext,
    StorageContext,
    load_index_from_storage,
    set_global_service_context,
)
from llama_index import (
    VectorStoreIndex,
    get_response_synthesizer,
)

from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader

from llama_index.node_parser import SimpleNodeParser
from llama_index.node_parser.extractors import (
    MetadataExtractor,
    SummaryExtractor,
    QuestionsAnsweredExtractor,
    TitleExtractor,
    KeywordExtractor,
    EntityExtractor,
)
from llama_index.text_splitter import TokenTextSplitter
from llama_index import ServiceContext
from llama_index.llms import OpenAI
from llama_index.schema import MetadataMode
from llama_index import VectorStoreIndex
from llama_index.query_engine import SubQuestionQueryEngine
from llama_index.tools import QueryEngineTool, ToolMetadata




In [2]:
# obtain gpt model name from environment variables
gpt_model = "gpt-4"
gpt_temperature = 0.7

In [3]:
document_list = [
	'../quant_scraper/docs/vbt_pro/cookbook.md',
	# '../quant_scraper/docs/vbt_pro/documentation.md',
]

## With Metadata Extraction

In [4]:
llm_indexer = OpenAI(temperature=0.1, model="gpt-3.5-turbo", max_tokens=512)

text_splitter = TokenTextSplitter(separator="## ", chunk_size=1024, chunk_overlap=128)

metadata_extractor = MetadataExtractor(
    extractors=[
        # TitleExtractor(nodes=3, llm=llm_indexer),
        # KeywordExtractor(keywords=3, llm=llm_indexer),
        # EntityExtractor(prediction_threshold=0.5, llm=llm_indexer),
        # SummaryExtractor(summaries=["prev", "self"], llm=llm_indexer),
        QuestionsAnsweredExtractor(questions=5, llm=llm_indexer),
    ],
)

In [5]:
node_parser = SimpleNodeParser.from_defaults(
    text_splitter=text_splitter,
    metadata_extractor=metadata_extractor,
)

documents = SimpleDirectoryReader(input_files=document_list).load_data()

In [6]:
index_nodes = node_parser.get_nodes_from_documents(documents, show_progress=True)

  from .autonotebook import tqdm as notebook_tqdm
Parsing documents into nodes: 100%|██████████| 35/35 [00:00<00:00, 330.47it/s]
Extracting questions: 100%|██████████| 62/62 [02:17<00:00,  2.22s/it]


In [7]:
print(index_nodes[0].metadata)
print(index_nodes[-1].metadata)

{'file_path': '../quant_scraper/docs/vbt_pro/cookbook.md', 'creation_date': '2023-11-05', 'last_modified_date': '2023-11-05', 'last_accessed_date': '2023-11-05', 'questions_this_excerpt_can_answer': '1. What is the purpose of the "Cookbook" repository?\n2. What is the recommended way to import the required libraries for the code examples?\n3. How are the examples in the "Cookbook" repository different from the tutorials and documentation?\n4. How can users contribute to the documentation in the "Cookbook" repository?\n5. What is the file path of the "cookbook.md" file in the "quant_scraper" directory?'}
{'file_path': '../quant_scraper/docs/vbt_pro/cookbook.md', 'creation_date': '2023-11-05', 'last_modified_date': '2023-11-05', 'last_accessed_date': '2023-11-05', 'questions_this_excerpt_can_answer': "1. How can I measure the execution time of a code block by running it only once?\n2. How can I exclude compilation time from the estimate when measuring execution time?\n3. How can I measur

In [8]:
# show all unique metadata entities fields
unique_entities = set()
for node in index_nodes:
	print(node.metadata['questions_this_excerpt_can_answer'])


1. What is the purpose of the "Cookbook" repository?
2. What is the recommended way to import the required libraries for the code examples?
3. How are the examples in the "Cookbook" repository different from the tutorials and documentation?
4. How can users contribute to the documentation in the "Cookbook" repository?
5. What is the file path of the "cookbook.md" file in the "quant_scraper" directory?
1. What is the function `vbt.phelp()` used for in the Python package?
2. How can we list the attributes of a Python object using the `vbt.pdir()` function?
3. How can we print the specification of the TA-Lib's ATR using the `vbt.phelp()` function?
4. How can we print the properties and methods of the Portfolio class using the `vbt.pdir()` function?
5. How can we expand and pretty-format a vectorbtpro object to examine its contents using the `vbt.pprint()` function?
1. What is the file path of the cookbook.md file in the quant_scraper/docs/vbt_pro directory?
2. When was the cookbook.md fil

In [9]:
index = VectorStoreIndex(
    nodes=index_nodes,
    service_context=ServiceContext.from_defaults(llm=OpenAI(model="gpt-4")),
	show_progress=True,
)

index.storage_context.persist("../index_notebook")

Generating embeddings: 100%|██████████| 62/62 [00:04<00:00, 14.82it/s]


In [10]:
engine = index.as_query_engine(
    similarity_top_k=10,
    node_postprocessors=[
        SimilarityPostprocessor(similarity_cutoff=0.7)
    ]
)

final_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=[
        QueryEngineTool(
            query_engine=engine,
            metadata=ToolMetadata(
                name="quant_knowledge_base",
                description="technical documentation for vectorbt pro",
            ),
        )
    ],
)

In [11]:
test_query = """
In my strategy I have 4 take profits and stop loss. How can I move stop loss to breakeven after the first take profit is hit?
"""

# result = await final_engine.aquery(test_query)

In [12]:
# result

In [13]:
final_llm = LLMPredictor(
	llm=ChatOpenAI(
		temperature=gpt_temperature,
		model_name=gpt_model,
		max_tokens=2048,
		streaming=True,
	),
)


service_context = ServiceContext.from_defaults(
    llm_predictor=final_llm,
    chunk_size=1024,
)


index2 = VectorStoreIndex(
    nodes=index_nodes,
    service_context=service_context,
	show_progress=True,
)


response_synthesizer = get_response_synthesizer(
	response_mode="tree_summarize", service_context=service_context)

retriever = VectorIndexRetriever(
	index=index,
	similarity_top_k=10,
)

# assemble query engine
query_engine2 = RetrieverQueryEngine.from_args(
	streaming=True,
	retriever=retriever,
	response_synthesizer=response_synthesizer,
	service_context=service_context,
	node_postprocessors=[
		SimilarityPostprocessor(similarity_cutoff=0.7)
	]
)

Generating embeddings: 100%|██████████| 62/62 [00:03<00:00, 17.41it/s]


In [14]:
result = await query_engine2.aquery(test_query)

2023-11-06 03:33:51 - message='OpenAI API response' path=https://api.openai.com/v1/embeddings processing_ms=18 request_id=8211a9c25a57b7cbd49191cb8474c233 response_code=200


In [15]:
result.response

'To move your stop loss to breakeven after the first take profit is hit in your strategy, you can create a function using the `adjust_func_nb` feature. In this function, you need to set a condition to check if the first take profit has been hit. This can be done using the `vbt.pf_nb.is_stop_info_ladder_active_nb` function and `tp_info["step"]` where `tp_info` holds the information about the take profit. If the first take profit is hit (i.e., `tp_info["step"] == 0`), you adjust your stop loss to the entry price, effectively moving your stop loss to breakeven. Here is an example code snippet:\n\n```python\n@njit\ndef adjust_func_nb(c, atr, multipliers, exit_sizes):\n    tp_info = c.last_tp_info[c.col]\n    if vbt.pf_nb.is_stop_info_ladder_active_nb(tp_info):\n        if tp_info["step"] == 0:  # first take profit hit\n            sl_info = c.last_sl_info[c.col]\n            sl_info.stop = tp_info["entry_price"]  # adjust stop loss to entry price (breakeven)\n        if np.isnan(tp_info["s

In [16]:
# iterate all result.source_nodes and print score

for node in result.source_nodes:
	print(node.score)
	print(node.metadata['questions_this_excerpt_can_answer'])


0.7639840126631462
1. What happens if the number of active positions exceeds the maximum allowed?
2. How can you disable an entry signal?
3. How can you form a group out of all assets?
4. How can you activate a stop loss (SL) once a certain condition is met?
5. How can you dynamically set a ladder based on ATR multipliers?
0.7581331321547004
1. What is the purpose of the `adjust_func_nb` function in the code?
2. How is the target size for each trade dynamically determined and applied?
3. What is the risk management strategy used in the code?
4. How is the stop loss (SL) price calculated and activated?
5. How can you check if a signal has been executed at the end of a bar in the code?
0.7460657524914192
1. What is the exit size type used in the code?
2. What is the purpose of the adjust_func_nb function in the code?
3. How can position metrics such as open P&L and return be accessed in the code?
4. How can a stop loss be set based on a certain return percentage in the code?
5. What is t