In [None]:
# Install all required packages
%pip install docling google-generativeai python-dotenv pillow langchain==0.3.12 langchain-community==0.3.12 langchain-chroma langchain-ibm langchain-docling langchain-core==0.3.21 langchain-experimental
print("\n✓ Installation complete! Restart kernel and re-run all cells.")

# RAG Annual Report Analysis
**Setup Instructions:**
1. Select the Python 3.12.7 (.venv) kernel from the kernel selector (top-right)
2. Run all cells from top to bottom

**Required PDFs:** Place 3 PDF annual reports in `Company Annual Report/input/`

In [None]:
import docling_util

In [None]:
# provide input folder path and image annotation option
docling_util.process_documents_to_md("Company Annual Report/input", "no image annotation")
docling_util.process_markdown_folder('output_md') # process_markdown_folder always dump to 'outputs' folder

In [None]:
# markdown files to vector db
import glob
import os

# Get all markdown files from output_md folder
md_files = glob.glob("output_md/*.md")
print(f"Found {len(md_files)} markdown files:")
for file in md_files:
    print(f"  - {file}")

if md_files:
    docling_util.create_chroma_vectordb(file_paths=md_files)
else:
    print("No markdown files found. Please run cell 2 first to process PDFs.")


In [None]:
# Reload query_util to pick up changes
import importlib
from dotenv import load_dotenv
import query_util_exp as query_util
import os

# Load environment variables from .env file
load_dotenv()

import query_util
importlib.reload(query_util)

print(f"WATSONX_APIKEY: {os.environ.get('WATSONX_APIKEY')}")
print(f"WATSONX_PROJECT_ID: {os.environ.get('WATSONX_PROJECT_ID')}")
qa_chain_from_path = query_util.setup_qa_chain(local_vector_store_path="./chroma_db")

In [None]:
# retrival and agent
import query_util_exp as query_util
import os
print(os.environ.get("WATSONX_APIKEY"))
qa_chain_from_path = query_util.setup_qa_chain(local_vector_store_path="./chroma_db")

In [None]:
## EXAMPLE ##
question = 'Replace your questions here'
print(query_util.ask_question(qa_chain_from_path, question))


import query_util_exp as query_util

questions = [
    'Who is the CEO/Managing Director of the company?' ,
    'What is the company’s primary business activity?' ,
    'In which year was the company was incorporated?',
    'Describe markets or regions does the company operate in?',
    'What are the company’s three main business segments?',
    'Who is the Chairman of the Board?',
    'What is the company’s mission or vision statement?',
    'What were the key highlights mentioned in the Chairman’s statement?',
    'Who are the Board Members and describe their responsibilities' ,
    'What major strategic priority was announced for the upcoming financial year?' ,
    'What is the total revenue for FY2023?' ,
    'What is the net profit after tax for FY2023?',
    'What is the Earnings Per Share (EPS) for FY2023?',
    'What is the Total Assets value for FY2023?',
    'What is the Total Equity attributable to shareholders?',
    'What is the Net Asset Value (NAV) per share?',
    'What is the debt-to-equity ratio listed in the Key Financial Ratios table?',
    'Which business segment contributed the highest revenue?',
    'What is the percentage increase/decrease in revenue compared to FY2023?',
    'What is the total dividend declared for the reporting year?',
    'According to the revenue trend chart, which year recorded the highest revenue?',
    'Based on the profit margin chart, what is the approximate net profit margin for FY2023?',
    'From the segment revenue bar chart, which segment experienced the largest year-on-year growth?',
    'According to the geographical sales pie chart, which region contributes the largest share?',
    'Based on the cost breakdown chart, which cost category increased the most year-on-year?',
    'Summarise the main points from the CEO’s message in three sentences.',
    'Summarise the company’s sustainability/ESG commitments for the reporting year.',
    'What key risks are highlighted in the Risk Management section?',
    'Summarise the business strategy outlined for the next 2–3 years.',
    'What operational achievements are described in the Operations Review section?'
]

for q in questions:
    print(f"Q: {q}")
    answer = query_util.ask_question(qa_chain_from_path, q)
    print(f"A: {answer}\n")

In [None]:
print(query_util.ask_question(qa_chain_from_path, "What were Singapura Finance’s main sources of revenue this year?"))
print(query_util.ask_question(qa_chain_from_path, "How did Singapura Finance’s net profit change compared to the previous year?"))
print(query_util.ask_question(qa_chain_from_path, "What are the key risks highlighted in Singapura Finance’s annual report?"))
print(query_util.ask_question(qa_chain_from_path, "How much debt does Singapura Finance hold, and has it increased or decreased?"))
print(query_util.ask_question(qa_chain_from_path, "What were Singapura Finance’s major expenditures or investments this year?"))
print(query_util.ask_question(qa_chain_from_path, "How did each of Singapura Finance’s business segments perform financially?"))
print(query_util.ask_question(qa_chain_from_path, "What strategic initiatives did Singapura Finance implement this year?"))
print(query_util.ask_question(qa_chain_from_path, "What is Singapura Finance’s outlook or guidance for the next financial year?"))
print(query_util.ask_question(qa_chain_from_path, "How did Singapura Finance’s cash flow from operations compare to its net income?"))
print(query_util.ask_question(qa_chain_from_path, "What sustainability or ESG commitments did Singapura Finance report?"))

In [None]:
print (query_util.ask_question(qa_chain_from_path, "what is the symbol of Singapore airlines?"))

In [None]:
print (query_util.ask_question(qa_chain_from_path, "General purpose food additives"))

In [None]:
print (query_util.ask_question(qa_chain_from_path, "What is the total numnber of passengers carried by Scoot and Singapore Airlines combined"))

In [None]:
print (query_util.ask_question(qa_chain_from_path, "Why is there difference between the logos of Scoot and Singapore Airlines?"))

In [None]:
print (query_util.ask_question(qa_chain_from_path, "Compare the performance of the Cathay Pacific and Singapore Airlines"))