In [6]:
import fitz  # PyMuPDF
import re
import os
import openai
from dotenv import load_dotenv

# Load .env file from the specified path
load_dotenv("C:\\EUacademy\\.env")

# -------- Helpers --------
def to_float(s):
    try:
        return float(s.replace(",", ""))
    except Exception:
        return None

def safe_div(a, b):
    try:
        return round(a / b, 4) if a is not None and b not in (None, 0) else None
    except Exception:
        return None

def fmt_pct(x):
    return f"{x*100:.2f}%" if x is not None else "N/A"

# -------- Patterns --------
patterns_dual = {
    "Interest and Similar Income": r"Interest and similar income\s+([\d,]+)\s+([\d,]+)",
    "Interest and Similar Expense": r"Interest and similar expense\s+\(([\d,]+)\)\s+\(([\d,]+)\)",
    "Net Interest Income": r"Net interest income\s+([\d,]+)\s+([\d,]+)",
    "Islamic Financing Income": r"Income from Islamic financing and investment products\s+([\d,]+)\s+([\d,]+)",
    "Distribution on Islamic Deposits": r"Distribution on Islamic deposits and profit paid to Sukuk holders\s+\(([\d,]+)\)\s+\(([\d,]+)\)",
    "Net Income from Islamic": r"Net income from Islamic financing and investment products\s+([\d,]+)\s+([\d,]+)",
    "Net Fees and Commission": r"Net fee and commission income\s+([\d,]+)\s+([\d,]+)",
    "Net Gain on Trading Securities": r"Net gain on trading securities\s+([\d,]+)\s+([\d,]+)",
    "Other Operating Income": r"Other operating income.*?\s+([\d,]+)\s+([\d,]+)",
    "Total Operating Income": r"Total operating income\s+([\d,]+)\s+([\d,]+)",
    "General and Administrative Expenses": r"General and administrative expenses\s+\(([\d,]+)\)\s+\(([\d,]+)\)",
    "Operating Profit Before Impairment": r"Operating profit before impairment\s+([\d,]+)\s+([\d,]+)",
    "Net Impairment Reversal": r"Net impairment (?:reversal|loss)\s+([\-\d,]+)\s+([\-\d,]+)",
    "Profit Before Tax": r"Profit for the period before taxation\s+([\d,]+)\s+([\d,]+)",
    "Taxation Charge": r"Taxation charge\s+\(([\d,]+)\)\s+\(([\d,]+)\)",
    "Profit for the Period": r"Profit for the period\s+([\d,]+)\s+([\d,]+)",
    "Earnings Per Share (AED)": r"Earnings per share\s*\(AED\)\s+([\d\.]+)\s+([\d\.]+)",
}

patterns_single = {
    "Gross Loans and Receivables": r"Gross loans and receivables\s+([\d,]+)\s+[\d,]+",
    "Expected Credit Losses (Loans)": r"Less:\s*Expected credit losses\s+\(([\d,]+)\)\s+\([\d,]+\)",
    "Net Loans and Receivables": r"Net loans and receivables\s+([\d,]+)\s+[\d,]+",
    "Credit-Impaired Loans (NPLs)": r"Total of credit impaired loans and receivables\s+([\d,]+)\s+[\d,]+",
    "Total Assets": r"Segment Assets[\s\S]*?(\d{1,3}(?:,\d{3})+)\s*\n\s*Segment Liabilities",
    "Fee and Commission Income": r"Fee and commission income\s+([\d,]+)\s+[\d,]+",
    "Fee and Commission Expense": r"Fee and commission expense\s+\(([\d,]+)\)\s+\([\d,]+\)",
    "FX & Derivative Income": r"Foreign exchange and derivative income.*?\s+([\d,]+)\s+[\d,]+",
}

def extract_dual(text):
    out = {}
    for label, pat in patterns_dual.items():
        m = re.search(pat, text, re.IGNORECASE)
        if m:
            curr = to_float(m.group(1))
            prior = to_float(m.group(2)) if m.lastindex and m.lastindex >= 2 else None
            out[label] = {"current": curr, "prior": prior}
        else:
            out[label] = {"current": None, "prior": None}
    return out

def extract_single(text):
    out = {}
    for label, pat in patterns_single.items():
        m = re.search(pat, text, re.IGNORECASE)
        out[label] = to_float(m.group(1)) if m else None
    return out

def parse_pdf(file_path):
    with fitz.open(file_path) as doc:
        full_text = "\n".join(page.get_text() for page in doc)
    dual = extract_dual(full_text)
    single = extract_single(full_text)
    return dual, single

def get_ratios_and_metrics(dual, single):
    def cur(k): return (dual.get(k) or {}).get("current")
    def prv(k): return (dual.get(k) or {}).get("prior")

    toi_c = cur("Total Operating Income")
    toi_p = prv("Total Operating Income")
    ga_c  = cur("General and Administrative Expenses")
    opb_c = cur("Operating Profit Before Impairment")
    pbt_c = cur("Profit Before Tax")
    tax_c = cur("Taxation Charge")
    pat_c = cur("Profit for the Period")
    eps_c = cur("Earnings Per Share (AED)")
    eps_p = prv("Earnings Per Share (AED)")

    nfee_c = cur("Net Fees and Commission")
    trading_c = cur("Net Gain on Trading Securities")
    other_c = cur("Other Operating Income")
    fx_c = single.get("FX & Derivative Income")

    gross_loans = single.get("Gross Loans and Receivables")
    ecl_loans   = single.get("Expected Credit Losses (Loans)")
    net_loans   = single.get("Net Loans and Receivables")
    npls        = single.get("Credit-Impaired Loans (NPLs)")

    total_assets = single.get("Total Assets")

    ratios = []
    ratios.append({
        "name": "Cost-to-Income",
        "formula": "G&A Expenses / Total Operating Income",
        "calc": f"{ga_c} / {toi_c}",
        "value": safe_div(ga_c, toi_c)
    })
    ratios.append({
        "name": "Net Profit Margin",
        "formula": "Profit for the Period / Total Operating Income",
        "calc": f"{pat_c} / {toi_c}",
        "value": safe_div(pat_c, toi_c)
    })
    ratios.append({
        "name": "Pre-Impairment Operating Margin",
        "formula": "Operating Profit Before Impairment / Total Operating Income",
        "calc": f"{opb_c} / {toi_c}",
        "value": safe_div(opb_c, toi_c)
    })
    ratios.append({
        "name": "Fee Income Mix",
        "formula": "Net Fee & Commission / Total Operating Income",
        "calc": f"{nfee_c} / {toi_c}",
        "value": safe_div(nfee_c, toi_c)
    })
    mix_numer = fx_c if fx_c is not None else ((trading_c or 0) + (other_c or 0))
    ratios.append({
        "name": "Markets & Other Income Mix",
        "formula": "(FX & Derivatives OR Trading + Other) / Total Operating Income",
        "calc": f"{mix_numer} / {toi_c}",
        "value": safe_div(mix_numer, toi_c)
    })
    ratios.append({
        "name": "NPL Ratio",
        "formula": "Credit-Impaired Loans / Gross Loans",
        "calc": f"{npls} / {gross_loans}",
        "value": safe_div(npls, gross_loans)
    })
    ratios.append({
        "name": "Coverage Ratio",
        "formula": "Loan Loss Provisions (ECL) / NPLs",
        "calc": f"{ecl_loans} / {npls}",
        "value": safe_div(ecl_loans, npls)
    })
    ratios.append({
        "name": "ECL / Gross Loans",
        "formula": "Total ECL (Loans) / Gross Loans",
        "calc": f"{ecl_loans} / {gross_loans}",
        "value": safe_div(ecl_loans, gross_loans)
    })
    ratios.append({
        "name": "Effective Tax Rate",
        "formula": "Taxation Charge / Profit Before Tax",
        "calc": f"{tax_c} / {pbt_c}",
        "value": safe_div(tax_c, pbt_c)
    })
    ratios.append({
        "name": "ROA (Quarter, Approx.)",
        "formula": "Profit for the Period / Total Assets",
        "calc": f"{pat_c} / {total_assets}",
        "value": safe_div(pat_c, total_assets)
    })
    eps_yoy = safe_div((eps_c - eps_p) if (eps_c is not None and eps_p is not None) else None, eps_p)
    ratios.append({
        "name": "EPS YoY Change",
        "formula": "(EPS 2025 - EPS 2024) / EPS 2024",
        "calc": f"{eps_c} - {eps_p} over {eps_p}",
        "value": eps_yoy
    })

    metrics_table = []
    for k, v in dual.items():
        metrics_table.append((k, v.get("current"), v.get("prior")))
    for k in [
        "Gross Loans and Receivables", "Expected Credit Losses (Loans)", "Net Loans and Receivables",
        "Credit-Impaired Loans (NPLs)", "Total Assets", "Fee and Commission Income",
        "Fee and Commission Expense", "FX & Derivative Income"
    ]:
        metrics_table.append((k, single.get(k), None))

    return ratios, metrics_table

def build_context(ratios, metrics_table):
    context = "Metrics:\n"
    for k, curv, prv in metrics_table:
        context += f"{k}: {curv} (2025), {prv} (2024)\n"
    context += "\nKey Ratios:\n"
    for r in ratios:
        context += f"{r['name']}: {fmt_pct(r['value'])} (Formula: {r['formula']})\n"
    return context

def ask_openai(context, user_query, api_key):
    openai.api_key = api_key
    prompt = (
        f"{context}\n\n"
        f"User question: {user_query}\n"
        "Answer:"
    )
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a financial analyst assistant."},
            {"role": "user", "content": prompt}
        ],
        max_tokens=400,
        temperature=0.3
    )
    return response.choices[0].message.content.strip()

if __name__ == "__main__":
    pdf_path = input("Enter PDF filename (in current directory): ").strip()
    if not os.path.exists(pdf_path):
        print("File not found.")
        exit(1)
    api_key = os.getenv("OPENAI_API_KEY")
    if not api_key:
        print("OPENAI_API_KEY environment variable not set. Exiting.")
        exit(1)
    dual, single = parse_pdf(pdf_path)
    ratios, metrics_table = get_ratios_and_metrics(dual, single)
    context = build_context(ratios, metrics_table)
    print("\nMetrics and ratios extracted. You can now ask questions (type 'q' to quit):\n")
    while True:
        user_query = input("Your question (or 'q' to quit): ").strip()
        if user_query.lower() == "q":
            print("Goodbye!")
            break
        try:
            answer = ask_openai(context, user_query, api_key)
            print("\nOpenAI Answer:\n", answer, "\n")
        except Exception as e:
            print("Error with OpenAI API:", e)

Enter PDF filename (in current directory):  emirates_nbd_financial_statements_q1_2025_english.pdf



Metrics and ratios extracted. You can now ask questions (type 'q' to quit):



Your question (or 'q' to quit):  comment on profitability 



OpenAI Answer:
 The profitability of the company has shown a mixed trend. The Net Profit Margin has increased from 2024 to 2025, indicating that the company has been more efficient in converting its revenue into actual profit. This is a positive sign as it shows the company is managing its costs effectively. 

However, the Profit for the Period has decreased from 6716.0 in 2024 to 6219.0 in 2025. This could be due to a variety of factors such as increased costs, decreased revenue, or both. It's worth noting that despite this decrease, the company still managed to maintain a high net profit margin.

The Earnings Per Share (EPS) has seen a significant increase, from 0.96 in 2024 to 18.0 in 2025. This is a very positive sign as it indicates that the company's profitability per share has greatly improved. The increase in EPS could be due to increased net income, a decrease in the number of outstanding shares, or a combination of both. 

The company's Pre-Impairment Operating Margin has al

Your question (or 'q' to quit):  q


Goodbye!
