# Importing Libraries

In [1]:
# Importing Libs
import os
from dotenv import load_dotenv
from pprint import pprint
from IPython.display import Markdown
import time
import tqdm

# For data
import pandas as pd
from PyPDF2 import PdfReader
import chromadb
from chromadb import Documents, EmbeddingFunction, Embeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

# For AI
import google.generativeai as genai


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
load_dotenv()

api_key = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=api_key)

In [3]:
# Available embedding models
for m in genai.list_models():
    if "embedContent" in m.supported_generation_methods:
        print(m.name)


models/embedding-001
models/text-embedding-004
models/gemini-embedding-exp-03-07
models/gemini-embedding-exp


# Extracting Knowledge from PDF

In [4]:
# Dealing with PDF Knowledge
def extract_text_from_pdf(file_path, starting_page, ending_page):
    pdf_reader = PdfReader(file_path)
    # num_pages = len(pdf_reader.pages)
    
    text = ""
    
    for page in range(starting_page, ending_page+1):
        text += pdf_reader.pages[page].extract_text()
    
    return text

text = extract_text_from_pdf("./context_docs/tdr2024_en.pdf", 14, 190)
print(text)

Trade and development report 2024
Rethinking development in the age of discontent
Chapter I
The 
macroeconomics 
of discontent
Global output growth shows signs of stabilizing at rates below those registered in 
the years prior to the pandemic, which itself marked a period of unsatisfactory global 
growth. Current growth trajectories are insufficient to meet global development and 
climate challenges and goals.
Prevailing global conditions are particularly worrisome in terms of debt dynamics 
as the combination of low growth and high interest rates exacerbates debt burdens. 
High public debt ratios in many economies are equally concerning. A hallmark of the 
new, post-pandemic norm, they heighten the risk of a return to austerity as a policy 
guideline.
The post-pandemic inflation spike was largely a supply issue, created by bottlenecks 
in global value chains and excessive concentration in key sectors. Overreliance on 
prolonged monetary tightening as the sole policy tool to lower infl

In [5]:
def cleaned_extracted_text(text):
    cleaned_text = ""
    
    for i, line in enumerate(text.split("\n")):
        if len(line) > 10:
            cleaned_text += line + "\n"
    
    cleaned_text = cleaned_text.replace(".", "")
    cleaned_text = cleaned_text.replace("~", "")
    cleaned_text = cleaned_text.replace("©", "")
    cleaned_text = cleaned_text.replace("_", "")
    cleaned_text = cleaned_text.replace(";", "")
    cleaned_text = cleaned_text.replace("...", "")
    return cleaned_text

In [6]:
cleaned_text = cleaned_extracted_text(text)
len(cleaned_text)

479362

In [7]:
# Split the cleaned text and store into vector db

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=100,
    length_function=len,
    add_start_index=True
)

In [8]:
texts = text_splitter.create_documents([cleaned_text])
pprint(texts[0].page_content)

('Trade and development report 2024\n'
 'Rethinking development in the age of discontent\n'
 'macroeconomics \n'
 'of discontent\n'
 'Global output growth shows signs of stabilizing at rates below those '
 'registered in \n'
 'the years prior to the pandemic, which itself marked a period of '
 'unsatisfactory global \n'
 'growth Current growth trajectories are insufficient to meet global '
 'development and \n'
 'climate challenges and goals\n'
 'Prevailing global conditions are particularly worrisome in terms of debt '
 'dynamics \n'
 'as the combination of low growth and high interest rates exacerbates debt '
 'burdens \n'
 'High public debt ratios in many economies are equally concerning A hallmark '
 'of the \n'
 'new, post-pandemic norm, they heighten the risk of a return to austerity as '
 'a policy \n'
 'The post-pandemic inflation spike was largely a supply issue, created by '
 'bottlenecks \n'
 'in global value chains and excessive concentration in key sectors '
 'Overreliance

In [9]:
len(texts)

533

In [10]:
# adding chunks to documents list

documents = []

for chunk in texts:
    documents.append(chunk.page_content)

pprint(documents[0])

('Trade and development report 2024\n'
 'Rethinking development in the age of discontent\n'
 'macroeconomics \n'
 'of discontent\n'
 'Global output growth shows signs of stabilizing at rates below those '
 'registered in \n'
 'the years prior to the pandemic, which itself marked a period of '
 'unsatisfactory global \n'
 'growth Current growth trajectories are insufficient to meet global '
 'development and \n'
 'climate challenges and goals\n'
 'Prevailing global conditions are particularly worrisome in terms of debt '
 'dynamics \n'
 'as the combination of low growth and high interest rates exacerbates debt '
 'burdens \n'
 'High public debt ratios in many economies are equally concerning A hallmark '
 'of the \n'
 'new, post-pandemic norm, they heighten the risk of a return to austerity as '
 'a policy \n'
 'The post-pandemic inflation spike was largely a supply issue, created by '
 'bottlenecks \n'
 'in global value chains and excessive concentration in key sectors '
 'Overreliance

In [11]:
len(documents)

533

# Embedding Database

In [12]:
class GeminiEmbeddingFunction(EmbeddingFunction):
    def __call__(self, input: Documents) -> Embeddings:
        model = "models/text-embedding-004"
        
        # For better results, try to provide a title for each input if
        # the corpus covers a lot of domains
        
        title = "UN Trade and Development Report 2024"
        
        return genai.embed_content(
            model=model,
            content=input,
            task_type="retrieval_document",
            title=title)["embedding"]

In [68]:
def create_chroma_db(documents, name):
    chroma_client = chromadb.PersistentClient(path="./chroma_db")
    
    db = chroma_client.get_or_create_collection(
        name=name, embedding_function=GeminiEmbeddingFunction()
    )
    
    initial_size = db.count()
    
    for i, d in tqdm.tqdm(enumerate(documents), total=len(documents), desc="Creating Chroma DB"):
        db.add(
            documents=d,
            ids=str(i +initial_size)
        )
        time.sleep(1)
    return db

def get_chroma_db(name):
    chroma_client = chromadb.PersistentClient(path="./chroma_db")
    return chroma_client.get_collection(name=name, embedding_function=GeminiEmbeddingFunction())

In [16]:
db = create_chroma_db(documents, "trade_exports_db")
db.count()

  name=name, embedding_function=GeminiEmbeddingFunction()
Creating Chroma DB: 100%|██████████| 533/533 [16:59<00:00,  1.91s/it]  


539

In [19]:
db.peek(5)

{'ids': ['0', '1', '2', '3', '4'],
 'embeddings': array([[ 0.0013985 ,  0.02768529, -0.02378366, ..., -0.01452798,
          0.00418022, -0.04044196],
        [ 0.01977349,  0.03362851, -0.01529946, ..., -0.03913468,
         -0.02793716, -0.04625257],
        [ 0.03947063,  0.00363292, -0.02916559, ...,  0.00249584,
          0.00548787, -0.02123189],
        [ 0.03334087,  0.03045027, -0.03231112, ..., -0.01027207,
         -0.01276664, -0.05301384],
        [ 0.013999  ,  0.00221735, -0.03786378, ..., -0.02200131,
          0.01343829, -0.04302583]], shape=(5, 768)),
 'documents': ['Trade and development report 2024\nRethinking development in the age of discontent\nmacroeconomics \nof discontent\nGlobal output growth shows signs of stabilizing at rates below those registered in \nthe years prior to the pandemic, which itself marked a period of unsatisfactory global \ngrowth Current growth trajectories are insufficient to meet global development and \nclimate challenges and goals\nPr

# Querying DB

In [81]:
def get_relevant_passages(query, db, n_results=5):
    passages = db.query(query_texts=[query], n_results=n_results)['documents'][0]
    return passages

In [82]:
question = "How many percent did the US primary balance fell due to COVID-19?"
passages = get_relevant_passages(question, db, n_results=5)
Markdown(passages[0])

United States, the primary balance fell from a deficit of 35 per 
cent of GDP in 2019 to almost 12 per cent of GDP in 2020 The 
fiscal consolidation taking place since then is expected to bring 
fiscal accounts back to a primary deficit of 4 per cent of GDP in 
2024 Comparing the response in the United States to the global 
financial crisis and the COVID-19 shock, the fiscal impulse or 
change in the primary deficit was more significant in 2020 than in 
2009 (figure I12)
The euro area’s fiscal response to the global 
financial crisis and the COVID-19 shock was 
also a stimulus but of a smaller magnitude 
and with a faster budgetary consolidation 
than in the United States In recent years, 
the primary balance has fallen from a 
surplus of 07 per cent of GDP in 2019 to a 
deficit of approximately 6 per cent in 2020 Fiscal consolidation started in 2021 and is 
expected to bring the euro area to a deficit 
of roughly 1 per cent of GDP in 2024
In Asia, Japan responded to both the global

# Prompting Gemini Model

In [83]:
def make_prompt(query, relevant_passage):
    escaped = relevant_passage.replace("'", "").replace('"', "")
    
    prompt = f"""Question: {query}.\n
    Supplementary Information: {escaped}\n
    Answer the question according to your knowledge and supplemented with the supplementary information provided. If the question is not within the scope of the supplementary information, you should say "Good Question! But regretfully, this is out of my scope. I will still answer to the best of my knowledge" and then proceed answering only with your knowledge.\n
    If you refer to the supplementary information, you don't need to say "according to the supplementary information", instead say "according to my augmented knowledge on trade and exports".\n
    Your response:
    """
    
    return prompt

In [84]:
def convert_passages_to_string(passages):
    context = ""
    
    for passage in passages:
        context += passage + "\n"
        
    return context

In [85]:
prompt = make_prompt(question, convert_passages_to_string(passages))
prompt

'Question: How many percent did the US primary balance fell due to COVID-19?.\n\n    Supplementary Information: United States, the primary balance fell from a deficit of 35 per \ncent of GDP in 2019 to almost 12 per cent of GDP in 2020 The \nfiscal consolidation taking place since then is expected to bring \nfiscal accounts back to a primary deficit of 4 per cent of GDP in \n2024 Comparing the response in the United States to the global \nfinancial crisis and the COVID-19 shock, the fiscal impulse or \nchange in the primary deficit was more significant in 2020 than in \n2009 (figure I12)\nThe euro area’s fiscal response to the global \nfinancial crisis and the COVID-19 shock was \nalso a stimulus but of a smaller magnitude \nand with a faster budgetary consolidation \nthan in the United States In recent years, \nthe primary balance has fallen from a \nsurplus of 07 per cent of GDP in 2019 to a \ndeficit of approximately 6 per cent in 2020 Fiscal consolidation started in 2021 and is \ne

In [86]:
Markdown(prompt)

Question: How many percent did the US primary balance fell due to COVID-19?.

    Supplementary Information: United States, the primary balance fell from a deficit of 35 per 
cent of GDP in 2019 to almost 12 per cent of GDP in 2020 The 
fiscal consolidation taking place since then is expected to bring 
fiscal accounts back to a primary deficit of 4 per cent of GDP in 
2024 Comparing the response in the United States to the global 
financial crisis and the COVID-19 shock, the fiscal impulse or 
change in the primary deficit was more significant in 2020 than in 
2009 (figure I12)
The euro area’s fiscal response to the global 
financial crisis and the COVID-19 shock was 
also a stimulus but of a smaller magnitude 
and with a faster budgetary consolidation 
than in the United States In recent years, 
the primary balance has fallen from a 
surplus of 07 per cent of GDP in 2019 to a 
deficit of approximately 6 per cent in 2020 Fiscal consolidation started in 2021 and is 
expected to bring the euro area to a deficit 
of roughly 1 per cent of GDP in 2024
In Asia, Japan responded to both the global
to change in 2015, when the primary 
deficit started to increase By 2019, the 
deficit had reached 5 per cent of GDP the 
COVID-19 shock saw it deepen to almost 9 per cent of GDP in 2020 For 2024–2025, 
the expectation is for the primary deficit 
to stabilize at approximately 6 per cent of 
GDP  As outlined below, increases in primary 
deficits are reflected in the swelling of public 
debt stocks 
As expected after a recession, the 
COVID-19 shock raised public debt in 
almost all Group of 20 economies (table I2) 
The exceptions were Brazil and Türkiye, for The COVID-19 
shock raised public 
debt in almost all 
economies of the 
Group of 20
 Republic of Korea
 Russian F ederation
 Saudi Arabia
 South Africa
 United Kingdom
 United States
Countr y2019 2020 2021 2022 2023aChange
COVID-19 shock period
Public debt levels spiked in the aftermath of the COVID-19 shock 
General government gross debt in the economies of the Group of 20
(Percentage of GDP)
squeezing fiscal accounts and heightening 
potential risks to financial stabilityFigure I11
Several developing economies have suffered sharp depreciations of their 
currencies in 2024
Bilateral exchange rate depreciations relative to the United States dollar in nominal terms, 
selected developing countries, January–August 2024
(Percentage)
Source: UNCTAD based on London Stock Exchange Group Eikon
Note: Percentage change in the nominal exchange rate against the United States dollar between 1 January 
2024 and 22 August 2024Ethiopia 
Ongoing tight 
international 
conditions have 
put additional 
pressure on 
developing country 
currencies19
The macroeconomics of discontentE The phasing out of fiscal stimulus 
and increased public debt levels
The largest economies of the Group of 20 responded to the 
COVID-19 shock with fiscal stimuli of very different sizes In the 
United States, the primary balance fell from a deficit of 35 per
of roughly 1 per cent of GDP in 2024
In Asia, Japan responded to both the global 
financial crisis and COVID-19 shock with a 
similar fiscal impulse, a 5-percentage-point 
increase in the ratio of the primary deficit to 
Figure I12
Diverging fiscal balance dynamics among the world’s largest economies
Primary fiscal balance in selected economies of the Group of 20
(Percentage of GDP)
Source: IMF World Economic Outlook, April 2024
a Estimate2001 2005 2010 2015 2020-12-10-8-6-4-202
StatesEuro area
2024a2023a20
Trade and development report 2024
Rethinking development in the age of discontentChapter I
The macroeconomics of discontentGDP  In contrast, China became much more 
fiscally active after the pandemic compared 
to the global financial crisis In 2009, China 
had a temporary and small primary deficit, 
followed by an almost balanced primary 
budget in 2010–2014 The situation started 
to change in 2015, when the primary 
deficit started to increase By 2019, the
the COVID-19 crisis Global debt in 2019 
stood at a record 233 per cent of GDP 
and government debt at an historic 84 per 
cent The debt of developing countries 
totalled 180 per cent of GDP , led by private 
debt, which rose to 126 per cent of GDP  
Four fifths of developing countries had 
higher debt, both domestic and external, 
than in 2010 (Kose et al, 2021) 
Overall, the impact of the global financial 
crisis on the South, while not immediate, 
was much more severe than on the North 
because it fundamentally changed the 
growth trajectory (figure IV3) From 2001 
to 2008, the average annual growth of real GDP was about 67 per cent for 
developing countries, surpassing that 
of developed countries (23 per cent) by 
44 percentage points In the aftermath 
of the global financial crisis, from 2008 to 
2019, average growth dropped for both 
developing countries (50 per cent) and 
developed countries (15 per cent), but the 
gap narrowed to 35 percentage points


    Answer the question according to your knowledge and supplemented with the supplementary information provided. If the question is not within the scope of the supplementary information, you should say "Good Question! But regretfully, this is out of my scope. I will still answer to the best of my knowledge" and then proceed answering only with your knowledge.

    If you refer to the supplementary information, you don't need to say "according to the supplementary information", instead say "according to my augmented knowledge on trade and exports".

    Your response:
    

# Generating GEMINI Response

In [87]:
model = genai.GenerativeModel("gemini-2.0-flash")

In [88]:
answer = model.generate_content(prompt)
answer.text

'According to my augmented knowledge on trade and exports, the US primary balance fell from a deficit of 3.5 percent of GDP in 2019 to almost 12 percent of GDP in 2020 due to COVID-19.\n'

In [89]:
Markdown(answer.text)

According to my augmented knowledge on trade and exports, the US primary balance fell from a deficit of 3.5 percent of GDP in 2019 to almost 12 percent of GDP in 2020 due to COVID-19.


# The Pipeline

Pipeline flow:
1. Provide question
2. Search the Chroma database
3. Convert the passages from a list to a string
4. Create the prompt
5. Give the question + context to the model
6. Get the answer

In [90]:
# Step 1
# Example irrelevant question
# question = "What are the furthest countries from Indonesia?" 

# Example of relevant question
question = "What threatens to replace exports from developing countries with local production in advanced economies?"

# Step 2
db = get_chroma_db("trade_exports_db")
passages = get_relevant_passages(question, db, n_results=5)

# Step 3
context = convert_passages_to_string(passages)

# Step 4
prompt = make_prompt(question, context)

# Step 5
model = genai.GenerativeModel("gemini-2.0-flash")
answer = model.generate_content(prompt)

# Step 6
Markdown(answer.text)

  return chroma_client.get_collection(name=name, embedding_function=GeminiEmbeddingFunction())


According to my augmented knowledge on trade and exports, several factors threaten to replace exports from developing countries with local production in advanced economies:

*   **Changing technology:** Disruptive technologies, especially the combination of artificial intelligence and new additive manufacturing processes like 3D printing, enable rapid prototyping and customization. This allows for the production of goods closer to the end user, reducing the need for imports from developing countries.

*   **Trade Fragmentation:** Policy-driven changes in global trade flows and the rearrangement of global value chains, often guided by strategic considerations, can shift production back to advanced economies.

*   **Increased Logistics Costs:** Rising logistics costs make local production in advanced economies more competitive compared to importing from developing countries.

In summary, advancements in manufacturing technologies and shifts in global trade policies are incentivizing advanced economies to prioritize local production, posing a significant threat to the export-led growth strategies of developing nations.
