In [1]:
from dotenv import load_dotenv ## loads API keys
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores.chroma import Chroma
from langchain_text_splitters import RecursiveCharacterTextSplitter
import os
from collections import OrderedDict

load_dotenv()

os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")

In [2]:
# loader = PyPDFLoader("../grades_trim.pdf")
# loader = PyPDFLoader("../speech.pdf")
loader = PyPDFLoader("../econs.pdf")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
texts = text_splitter.split_documents(docs)

## Initialize embedding model
embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embed_model = HuggingFaceEmbeddings(model_name="meta-llama/Meta-Llama-3-8B-Instruct")


  embed_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=300)
texts = text_splitter.split_documents(docs)

# print(texts[1].page_content)
# print(len(texts[1].page_content))
# print("\n")
# print(texts[2].page_content)
# print(len(texts[2].page_content))
# print("\n")
# print(texts[3].page_content)
# print(len(texts[3].page_content))

In [4]:
try:
    embeds = embed_model.embed_documents([doc.page_content for doc in texts])
    print("Vectors done!!!")
except Exception as e:
    print(f"Error in embed process: {e}")

Vectors done!!!


In [5]:
## Vector store
vector_store = Chroma(embedding_function=embed_model, persist_directory="data")

_ = vector_store.add_documents(documents=texts)

  vector_store = Chroma(embedding_function=embed_model, persist_directory="data")


In [6]:
try:
    test_query = "what is the view on inflation?"#"what is the state of the economy"
    # test_query = "what are the stress levels for the class?"
    # test_query = "whats current liquidity landscape?"
    results = vector_store.search(query=test_query, search_type='similarity')

    unique_results = OrderedDict()
    for doc in results:
        if doc.page_content not in unique_results:
            unique_results[doc.page_content] = doc
    
    final_results = list(unique_results.values())[:3]
    print(f"Top query results:\n{final_results[0].page_content}")
except Exception as e:
    print(f"Error during test query: {e}")

Top query results:
Feb-25
120
130
140
150
160
170
180
190
200
Feb-23
Apr-23
Jun-23
Aug-23
Oct-23
Dec-23
Feb-24
Apr-24
Jun-24
Aug-24
Oct-24
Dec-24
Feb-25
0
20
40
60
80
100
120
140
160
0
10
20
30
40
50
60
70
80
Feb-23
Apr-23
Jun-23
Aug-23
Oct-23
Dec-23
Feb-24
Apr-24
Jun-24
Aug-24
Oct-24
Dec-24
Feb-25
35.2
193.8
170.3
154.9
24.1
20.7
Chart III.30: Spatial Distribution of Inflation
January 2025
(CPI-Combined, y-o-y), (per cent)
Note: Map is for illustrative purposes only.
Sources: NSO; and RBI Staff estimates.
<4 4-6 6-8


In [7]:
len(final_results)

3

In [8]:
for i in final_results:
    print("\n\tContent\n")
    print(i.page_content)
# print(final_results[1].page_content,"\n")
# print(final_results[2].page_content,"\n")


	Content

Feb-25
120
130
140
150
160
170
180
190
200
Feb-23
Apr-23
Jun-23
Aug-23
Oct-23
Dec-23
Feb-24
Apr-24
Jun-24
Aug-24
Oct-24
Dec-24
Feb-25
0
20
40
60
80
100
120
140
160
0
10
20
30
40
50
60
70
80
Feb-23
Apr-23
Jun-23
Aug-23
Oct-23
Dec-23
Feb-24
Apr-24
Jun-24
Aug-24
Oct-24
Dec-24
Feb-25
35.2
193.8
170.3
154.9
24.1
20.7
Chart III.30: Spatial Distribution of Inflation
January 2025
(CPI-Combined, y-o-y), (per cent)
Note: Map is for illustrative purposes only.
Sources: NSO; and RBI Staff estimates.
<4 4-6 6-8

	Content

ARTICLE
RBI Bulletin February 2025
25
State of the Economy
cent (m-o-m) in January amidst fresh US sanctions on 
Russia’s energy sector and cold weather pushing up 
demand (Chart II.7b). Base metal prices also increased 
in January, after China, the world’s largest consumer of 
base metals, indicated additional stimulus measures 
to boost their economy. Precious metals, particularly 
gold prices, recorded sharp increases in January, 
supported by safe-haven demand amids

In [37]:
from langchain_huggingface import HuggingFaceEndpoint

hf_hub_llm = HuggingFaceEndpoint(repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
                                 temperature= 0.8, 
                                 max_new_tokens= 1024)

In [31]:
# from langchain_huggingface.embeddings.huggingface import HuggingFaceEmbeddings

# help(HuggingFaceEmbeddings)

In [36]:
# from langchain_huggingface.embeddings.huggingface import HuggingFaceEmbeddings

# hf_hub_llm = HuggingFaceEmbeddings(repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
#                                            model_kwargs={"temperature": 0.8, "max_new_tokens": 1024}
#                                            )

In [32]:
# help(HuggingFaceEndpointEmbeddings)

In [40]:
from langchain_core.prompts import PromptTemplate

prompt_template = """
You are a highly educated economist and central bank policy assistant, your role is to accurately interpret queries on the economy 
and provide responses using the specialized database provided in the Context.
Do not mention anything about charts or graphs. Provided your analysis as text response with facts from the data provided.

Context: {context}

Question: {question}

Answer:
"""

# prompt_template = """
# You are a highly educated class teacher and your role is to accurately interpret situation in the class based on the specialized database.

# Query: {context}

# Question: {question}

# Answer:
# """

custom_prompt = PromptTemplate(input_variables=["context","question"], template=prompt_template)

In [41]:
from langchain.chains.retrieval_qa.base import RetrievalQA

rag_chain = RetrievalQA.from_chain_type(llm=hf_hub_llm, 
                                        chain_type="stuff", 
                                        retriever = vector_store.as_retriever(search_kwargs={'k': 3}),
                                        chain_type_kwargs={"prompt": custom_prompt},return_source_documents=True)

In [42]:
def get_response(question):
    result = rag_chain({"query": question})
    response_text = result["result"]
    answer_start = response_text.find("Answer:") + len("Answer:")
    answer = response_text[answer_start:].strip()
    return answer

In [43]:
# print(rag_chain({"query": "what is the view on inflation?"}))

In [44]:
# print(get_response("whats current liquidity landscape?"))
# print(get_response("what are the stress levels for the class?"))
# print(get_response("what does the document say about union budget 2025-26?"))
print(get_response("what is the view on inflation?"))



on the data provided in the RBI Bulletin February 2025, the view on inflation is that headline inflation, as measured by y-o-y changes in the all-India consumer price index (CPI), declined to a five-month low of 4.3 per cent in January 2025 from 5.2 per cent in December 2024. The decline in inflation was primarily due to a negative price momentum of around 100 bps (m-o-m), partially offset by an adverse base effect of around 10 bps. Food inflation decelerated to 5.7 per cent in January from 7.7 per cent (y-o-y) in December. Fuel and light deflation was (-)1.4 per cent in January as compared with (-)1.3 per cent in December. Core inflation increased to 3.7 per cent in January 2025 from 3.6 per cent in December. Overall, the inflation rate is expected to remain under control due to the decline in food and fuel prices, but it may experience some upward pressure due to the increase in core inflation. The regional distribution of inflation also shows that rural and urban inflation was at 4.