In [1]:
import yaml, os, camelot
from typing import List, Dict
from llama_index.schema import IndexNode
from llama_index.llms import AzureOpenAI
from llama_index.llm_predictor import LLMPredictor
from llama_index import set_global_service_context
from llama_index.node_parser import SimpleNodeParser
from llama_index.retrievers import RecursiveRetriever
from llama_hub.file.pymu_pdf.base import PyMuPDFReader
from llama_index.embeddings import HuggingFaceEmbedding
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.response_synthesizers import get_response_synthesizer
from llama_index.query_engine import PandasQueryEngine, RetrieverQueryEngine
from llama_index import StorageContext, ServiceContext, load_index_from_storage
from vectorstore import *

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
with open('/Users/1zuu/Desktop/LLM RESEARCH/LLMPro/cadentials.yaml') as f:
    credentials = yaml.load(f, Loader=yaml.FullLoader)

os.environ['AD_OPENAI_API_KEY'] = credentials['AD_OPENAI_API_KEY']

In [3]:
embedding_llm = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
llm=AzureOpenAI(
                deployment_name=credentials['AD_DEPLOYMENT_ID'],
                model=credentials['AD_ENGINE'],
                api_key=credentials['AD_OPENAI_API_KEY'],
                api_version=credentials['AD_OPENAI_API_VERSION'],
                azure_endpoint=credentials['AD_OPENAI_API_BASE']
                )
chat_llm = LLMPredictor(llm)

service_context = ServiceContext.from_defaults(
                                                embed_model=embedding_llm,
                                                llm_predictor=chat_llm
                                                )
set_global_service_context(service_context)

In [4]:
vector_index_text = load_index_from_storage(StorageContext.from_defaults(persist_dir="./db/text_index"))
vector_index = load_index_from_storage(StorageContext.from_defaults(persist_dir="./db/recursive_index"))

In [5]:
vector_query_engine_text = vector_index_text.as_query_engine()
vector_retriever = vector_index.as_retriever(similarity_top_k=1)

In [7]:
## Recursive Retriever (Recursively Query on each table to find relevant table )
recursive_retriever = RecursiveRetriever(
                                        "vector",
                                        retriever_dict={
                                                        "vector": vector_retriever
                                                        },
                                        query_engine_dict=df_id_query_engine_mapping,
                                        verbose=True
                                        )

response_synthesizer = get_response_synthesizer(response_mode="compact")

query_engine = RetrieverQueryEngine(
                                    retriever=recursive_retriever, 
                                    response_synthesizer=response_synthesizer
                                    )

In [8]:
response = query_engine.query(
                            "What's the net worth of the second richest billionaire in 2023?"
                            )
str(response)

[1;3;34mRetrieving with query id None: What's the net worth of the second richest billionaire in 2023?
[0m[1;3;38;5;200mRetrieved node with id, entering: pandas0
[0m[1;3;34mRetrieving with query id pandas0: What's the net worth of the second richest billionaire in 2023?
[0m[1;3;32mGot response: $180 billion
[0m

'The net worth of the second richest billionaire in 2023 is $180 billion.'

### Ask a question related to text content on both `text query engine` and `recursive query engine`

In [8]:
response = vector_query_engine_text.query("How many billionaires were there in 2009?")
print(str(response))

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Sorry, I cannot answer the query as there is no information provided in the given context about the number of billionaires in 2009.


In [9]:
response = query_engine.query("How many billionaires were there in 2009?")
print(str(response))

[1;3;34mRetrieving with query id None: How many billionaires were there in 2009?
[0m[1;3;38;5;200mRetrieving text node: 7/1/23, 11:31 PM
The World's Billionaires - Wikipedia
https://en.wikipedia.org/wiki/The_World%27s_Billionaires
5/33
In the 34th annual Forbes list of the world's billionaires, the list included 2,095 billionaires with a
total net wealth of $8 trillion, down 58 members and $700 billion from 2019; 51% of these billionaires
had less wealth than they possessed last year.[13] The list was finalized as of 18 March, thus was
already partially influenced by the COVID-19 pandemic.[13]
No.
Name
Net worth (USD)
Age
Nationality
Source(s) of wealth
1 
Jeff Bezos
$113 billion 
56
 United States
Amazon
2 
Bill Gates
$98 billion 
64
 United States
Microsoft
3 
Bernard Arnault & family
$76 billion 
71
 France
LVMH
4 
Warren Buffett
$67.5 billion 
89
 United States
Berkshire Hathaway
5 
Larry Ellison
$59 billion 
75
 United States
Oracle Corporation
6 
Amancio Ortega
$55.1 billion 
