In [None]:
import fitz  # PyMuPDF
import re
import os
import pandas as pd
import openai



# --- Financial patterns for Emirates NBD Q1 format ---
patterns = {
    "Interest and Similar Income": r"Interest and similar income\s+([\d,]+)",
    "Net Interest Income": r"Net interest income\s+([\d,]+)",
    "Islamic Financing Income": r"Income from Islamic financing and investment products\s+([\d,]+)",
    "Total Operating Income": r"Total operating income\s+([\d,]+)",
    "General and Administrative Expenses": r"General and administrative expenses\s+\(([\d,]+)\)",
    "Operating Profit Before Impairment": r"Operating profit before impairment\s+([\d,]+)",
    "Profit for the Period": r"Profit for the period\s+([\d,]+)",
    "Earnings Per Share (AED)": r"Earnings per share \(AED\)\s+([\d.]+)"
}

def extract_metrics(text):
    results = {}
    for label, pattern in patterns.items():
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            try:
                results[label] = float(match.group(1).replace(",", ""))
            except:
                results[label] = None
        else:
            results[label] = None
    return results

def parse_pdf(file_path):
    with fitz.open(file_path) as doc:
        full_text = "\n".join(page.get_text() for page in doc)
    return extract_metrics(full_text)

def safe_div(a, b):
    return round(a / b, 4) if a is not None and b not in (None, 0) else None

# --- Main execution ---
pdf_path = "emirates_nbd_financial_statements_q2_2025_english.pdf"  # Change to your PDF file path

metrics = parse_pdf(pdf_path)

# Compute financial ratios
ratios = [
    {
        "name": "Net Profit Margin",
        "formula": "Profit for the Period / Total Operating Income",
        "calc": f"{metrics['Profit for the Period']} / {metrics['Total Operating Income']}",
        "value": safe_div(metrics["Profit for the Period"], metrics["Total Operating Income"]),
    },
    {
        "name": "Operating Efficiency Ratio",
        "formula": "G&A Expenses / Total Operating Income",
        "calc": f"{metrics['General and Administrative Expenses']} / {metrics['Total Operating Income']}",
        "value": safe_div(metrics["General and Administrative Expenses"], metrics["Total Operating Income"]),
    },
    {
        "name": "Return on Operating Income",
        "formula": "Operating Profit Before Impairment / Total Operating Income",
        "calc": f"{metrics['Operating Profit Before Impairment']} / {metrics['Total Operating Income']}",
        "value": safe_div(metrics["Operating Profit Before Impairment"], metrics["Total Operating Income"]),
    }
]

# Prepare metrics and ratios strings
metrics_str = "\n".join([f"{k}: {v}" for k, v in metrics.items()])
ratios_str = "\n".join([f"{r['name']}: {r['calc']} = {r['value']}" for r in ratios])

# --- User prompt ---
user_prompt = input("Enter your question or instruction for the LLM (e.g., 'Summarize the financial health and give recommendations'): ")

# Compose the final prompt for OpenAI
prompt = f"""
You are a financial analyst. Here are extracted metrics and ratios from Emirates NBD Q1 2025 financial statement:

Metrics:
{metrics_str}

Ratios:
{ratios_str}

{user_prompt}
"""

# Get recommendation from OpenAI
response = openai.chat.completions.create(
    model="gpt-4",
    messages=[
        {"role": "system", "content": "You are a helpful financial analyst."},
        {"role": "user", "content": prompt}
    ],
    max_tokens=400
)

print("\nLLM-Generated Response:\n")
print(response.choices[0].message.content)

In [None]:
import fitz  # PyMuPDF
import re
import os
from llama_cpp import Llama

# --- Financial patterns for Emirates NBD Q1 format ---
patterns = {
    "Interest and Similar Income": r"Interest and similar income\s+([\d,]+)",
    "Net Interest Income": r"Net interest income\s+([\d,]+)",
    "Islamic Financing Income": r"Income from Islamic financing and investment products\s+([\d,]+)",
    "Total Operating Income": r"Total operating income\s+([\d,]+)",
    "General and Administrative Expenses": r"General and administrative expenses\s+\(([\d,]+)\)",
    "Operating Profit Before Impairment": r"Operating profit before impairment\s+([\d,]+)",
    "Profit for the Period": r"Profit for the period\s+([\d,]+)",
    "Earnings Per Share (AED)": r"Earnings per share \(AED\)\s+([\d.]+)"
}

def extract_metrics(text):
    results = {}
    for label, pattern in patterns.items():
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            try:
                results[label] = float(match.group(1).replace(",", ""))
            except:
                results[label] = None
        else:
            results[label] = None
    return results

def parse_pdf(file_path):
    with fitz.open(file_path) as doc:
        full_text = "\n".join(page.get_text() for page in doc)
    return extract_metrics(full_text)

def safe_div(a, b):
    return round(a / b, 4) if a is not None and b not in (None, 0) else None

# --- Main execution ---
pdf_path = "Emirates_NBD_Q1_2025.pdf"  # Change to your PDF file path

metrics = parse_pdf(pdf_path)

# Compute financial ratios
ratios = [
    {
        "name": "Net Profit Margin",
        "formula": "Profit for the Period / Total Operating Income",
        "calc": f"{metrics['Profit for the Period']} / {metrics['Total Operating Income']}",
        "value": safe_div(metrics["Profit for the Period"], metrics["Total Operating Income"]),
    },
    {
        "name": "Operating Efficiency Ratio",
        "formula": "G&A Expenses / Total Operating Income",
        "calc": f"{metrics['General and Administrative Expenses']} / {metrics['Total Operating Income']}",
        "value": safe_div(metrics["General and Administrative Expenses"], metrics["Total Operating Income"]),
    },
    {
        "name": "Return on Operating Income",
        "formula": "Operating Profit Before Impairment / Total Operating Income",
        "calc": f"{metrics['Operating Profit Before Impairment']} / {metrics['Total Operating Income']}",
        "value": safe_div(metrics["Operating Profit Before Impairment"], metrics["Total Operating Income"]),
    }
]

# Prepare prompt for local LLM
metrics_str = "\n".join([f"{k}: {v}" for k, v in metrics.items()])
ratios_str = "\n".join([f"{r['name']}: {r['calc']} = {r['value']}" for r in ratios])

prompt = f"""
You are a financial analyst. Here are extracted metrics and ratios from Emirates NBD Q1 2025 financial statement:

Metrics:
{metrics_str}

Ratios:
{ratios_str}

Based on these, provide a concise, insightful analysis and actionable recommendations for the bank's management. Highlight strengths, weaknesses, and any notable trends.
"""

# --- Run local LLM (Llama 2 or compatible GGUF model) ---
# Download a GGUF model (e.g., from https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF)
# and set the path below:
llm = Llama(model_path="llama-2-7b-chat.Q4_K_M.gguf", n_ctx=2048, n_threads=4)

output = llm(
    prompt,
    max_tokens=400,
    stop=["</s>"],
    echo=False,
    temperature=0.7,
)

print("LLM-Generated Financial Analysis & Recommendations:\n")
print(output["choices"][0]["text"])

In [None]:
# import fitz  # PyMuPDF
# import re
# from sentence_transformers import SentenceTransformer
# from sklearn.metrics.pairwise import cosine_similarity
# from llama_cpp import Llama

# # 1. Load and split PDF into chunks
# def extract_chunks(pdf_path, chunk_size=800):
#     with fitz.open(pdf_path) as doc:
#         text = "\n".join(page.get_text() for page in doc)
#     # Simple split by paragraphs
#     paras = [p.strip() for p in text.split('\n') if p.strip()]
#     # Merge paragraphs into chunks
#     chunks = []
#     chunk = ""
#     for para in paras:
#         if len(chunk) + len(para) < chunk_size:
#             chunk += " " + para
#         else:
#             chunks.append(chunk.strip())
#             chunk = para
#     if chunk:
#         chunks.append(chunk.strip())
#     return chunks

# pdf_path = "emirates_nbd_financial_statements_q2_2025_english.pdf"  # Change to your PDF file
# chunks = extract_chunks(pdf_path)

# # 2. Embed chunks
# embedder = SentenceTransformer("all-MiniLM-L6-v2")
# chunk_embeddings = embedder.encode(chunks)

# # 3. User question
# user_query = input("Ask a question about the financial statement: ")
# query_embedding = embedder.encode([user_query])

# # 4. Retrieve top-k relevant chunks
# sims = cosine_similarity(query_embedding, chunk_embeddings)[0]
# top_k = 3
# top_indices = sims.argsort()[-top_k:][::-1]
# retrieved = "\n\n".join([chunks[i] for i in top_indices])

# # 5. Compose prompt for Llama
# prompt = f"""You are a financial analyst. Use the following context from a financial statement to answer the question.

# Context:
# {retrieved}

# Question: {user_query}

# Answer:"""

# # 6. Run local Llama (no OpenAI key needed)
# llm = Llama(model_path="llama-2-7b-chat.Q4_K_M.gguf", n_ctx=2048, n_threads=4)  # Update path to your GGUF model

# output = llm(
#     prompt,
#     max_tokens=400,
#     stop=["</s>"],
#     echo=False,
#     temperature=0.7,
# )

# print("\nLLM-Generated Answer:\n")
# print(output["choices"][0]["text"])

In [None]:
#!pip install keras==2.11.0
#!pip install tf-keras
#!pip install --upgrade pip setuptools urllib3 certifi
#!pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org tf-keras

In [None]:
import fitz  # PyMuPDF
import re
import os
import pandas as pd
import openai

# Set your OpenAI API key
openai.api_key = ""  # <-- Enter your key here

# --- Financial patterns for Emirates NBD Q1 format ---
patterns = {
    "Interest and Similar Income": r"Interest and similar income\s+([\d,]+)",
    "Net Interest Income": r"Net interest income\s+([\d,]+)",
    "Islamic Financing Income": r"Income from Islamic financing and investment products\s+([\d,]+)",
    "Total Operating Income": r"Total operating income\s+([\d,]+)",
    "General and Administrative Expenses": r"General and administrative expenses\s+\(([\d,]+)\)",
    "Operating Profit Before Impairment": r"Operating profit before impairment\s+([\d,]+)",
    "Profit for the Period": r"Profit for the period\s+([\d,]+)",
    "Earnings Per Share (AED)": r"Earnings per share \(AED\)\s+([\d.]+)"
}

def extract_metrics(text):
    results = {}
    for label, pattern in patterns.items():
        match = re.search(pattern, text, re.IGNORECASE)
        if match:
            try:
                results[label] = float(match.group(1).replace(",", ""))
            except:
                results[label] = None
        else:
            results[label] = None
    return results

def parse_pdf(file_path):
    with fitz.open(file_path) as doc:
        full_text = "\n".join(page.get_text() for page in doc)
    return extract_metrics(full_text)

def safe_div(a, b):
    return round(a / b, 4) if a is not None and b not in (None, 0) else None

# --- Main execution ---
pdf_path = "emirates_nbd_financial_statements_q2_2025_english.pdf"  # Change to your PDF file path

metrics = parse_pdf(pdf_path)

# Compute financial ratios
ratios = [
    {
        "name": "Net Profit Margin",
        "formula": "Profit for the Period / Total Operating Income",
        "calc": f"{metrics['Profit for the Period']} / {metrics['Total Operating Income']}",
        "value": safe_div(metrics["Profit for the Period"], metrics["Total Operating Income"]),
    },
    {
        "name": "Operating Efficiency Ratio",
        "formula": "G&A Expenses / Total Operating Income",
        "calc": f"{metrics['General and Administrative Expenses']} / {metrics['Total Operating Income']}",
        "value": safe_div(metrics["General and Administrative Expenses"], metrics["Total Operating Income"]),
    },
    {
        "name": "Return on Operating Income",
        "formula": "Operating Profit Before Impairment / Total Operating Income",
        "calc": f"{metrics['Operating Profit Before Impairment']} / {metrics['Total Operating Income']}",
        "value": safe_div(metrics["Operating Profit Before Impairment"], metrics["Total Operating Income"]),
    }
]

# Prepare metrics and ratios strings
metrics_str = "\n".join([f"{k}: {v}" for k, v in metrics.items()])
ratios_str = "\n".join([f"{r['name']}: {r['calc']} = {r['value']}" for r in ratios])

# --- Loop for user prompt ---
while True:
    user_prompt = input("\nEnter your question or instruction for the LLM (type 'q' to quit): ")
    if user_prompt.strip().lower() == 'q':
        print("Exiting. Goodbye!")
        break

    prompt = f"""
You are a financial analyst. Here are extracted metrics and ratios from Emirates NBD Q1 2025 financial statement:

Metrics:
{metrics_str}

Ratios:
{ratios_str}

{user_prompt}
"""

    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful financial analyst."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=400
    )

    print("\nLLM-Generated Response:\n")
    print(response.choices[0].message.content)