In [1]:
import subprocess
import sys
import os

def install_packages():
    print("Installing/Updating libraries... (Please wait)")
    packages = [
        "langchain",
        "langchain-core",
        "langchain-community",
        "langchain-openai",
        "langchain-groq",
        "langchain-google-genai",
        "pypdf",
        "tiktoken",
        "gradio",
        "plotly",
        "pandas",
        "pydantic"
    ]

    subprocess.check_call([sys.executable, "-m", "pip", "install", "--upgrade", "--quiet"] + packages)
    print("Installation complete! If imports fail below, please restart the runtime.")

try:
    import langchain_core
    import langchain_community
    import gradio
except ImportError:
    install_packages()
    import site
    site.main()

Installing/Updating libraries... (Please wait)
Installation complete! If imports fail below, please restart the runtime.


In [2]:

import gradio as gr
import pandas as pd
import plotly.express as px
from typing import List, Optional
from pydantic import BaseModel, Field


from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import PydanticOutputParser
from langchain_community.document_loaders import PyPDFLoader


from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq
from langchain_google_genai import ChatGoogleGenerativeAI



In [3]:

class LatvianFinancials(BaseModel):
    """Schema for extracting financial data from Latvian Annual Reports."""


    uznemuma_nosaukums: str = Field(description="Name of the company (Uz≈Üƒìmuma nosaukums).")
    parskata_gads: int = Field(description="The reporting year (PƒÅrskata gads/periods).")


    neto_apgrozijums: Optional[float] = Field(description="Net turnover (Neto apgrozƒ´jums). Remove spaces.")
    bruto_pelna: Optional[float] = Field(description="Gross profit (Bruto peƒº≈Üa). Remove spaces.")
    parskata_gada_pelna: Optional[float] = Field(description="Profit/loss for the year (PƒÅrskata gada peƒº≈Üa). Remove spaces.")


    ilgtermina_ieguldijumi: Optional[float] = Field(description="Total Long-term investments (Ilgtermi≈Üa ieguldƒ´jumi).")
    apgrozamie_lidzekli: Optional[float] = Field(description="Total Current assets (ApgrozƒÅmie lƒ´dzekƒºi).")
    nauda: Optional[float] = Field(description="Cash (Nauda).")
    pasu_kapitals: Optional[float] = Field(description="Total Equity (Pa≈°u kapitƒÅls).")
    istermina_kreditori: Optional[float] = Field(description="Total Current liabilities (ƒ™stermi≈Üa kreditori).")
    ilgtermina_kreditori: Optional[float] = Field(description="Total Long-term liabilities (Ilgtermi≈Üa kreditori).")


In [4]:


def get_llm(provider, api_key):
    """Factory to initialize the selected LLM."""
    if not api_key:
        raise ValueError("Please enter an API Key.")

    if provider == "OpenAI (GPT-4o)":
        return ChatOpenAI(model="gpt-4o", api_key=api_key, temperature=0)
    elif provider == "Groq (Llama 3)":

        return ChatGroq(model_name="llama-3.3-70b-versatile", api_key=api_key, temperature=0)
    elif provider == "Gemini (Pro)":
        return ChatGoogleGenerativeAI(model="gemini-1.5-pro", google_api_key=api_key, temperature=0)
    else:
        raise ValueError("Invalid Provider")

def extract_data(file_path, llm):
    """Loads PDF, extracts text, and parses it into JSON."""
    try:
        loader = PyPDFLoader(file_path)
        pages = loader.load()

        full_text = "\n".join([p.page_content for p in pages[:8]])


        full_text = "".join(c for c in full_text if ord(c) < 0x10000)

        parser = PydanticOutputParser(pydantic_object=LatvianFinancials)

        prompt = PromptTemplate(
            template="""
            You are an expert Accountant for Baltic companies.
            Extract the following financial figures from the Latvian Annual Report text below.

            IMPORTANT RULES:
            1. Latvian reports use spaces as thousand separators (e.g., "29 052 268"). You MUST remove spaces and return a pure number (29052268).
            2. If a value is in brackets (), it is negative.
            3. Look for "Rindas kods" to identify correct rows if names are ambiguous.
            4. Extract data for the "Reporting Year" (PƒÅrskata gads), not the previous year.

            RAW TEXT:
            {text}

            {format_instructions}
            """,
            input_variables=["text"],
            partial_variables={"format_instructions": parser.get_format_instructions()}
        )

        chain = prompt | llm | parser
        return chain.invoke({"text": full_text})

    except Exception as e:

        return f"Error extracting {os.path.basename(file_path)}: {str(e)}"

def calculate_ratios(data: LatvianFinancials):
    """Calculates key ratios mathematically."""
    ratios = {
        "Company": data.uznemuma_nosaukums,
        "Year": data.parskata_gads,
        "Revenue (‚Ç¨)": data.neto_apgrozijums,
        "Net Profit (‚Ç¨)": data.parskata_gada_pelna,
    }


    if data.apgrozamie_lidzekli and data.istermina_kreditori:
        ratios["Current Ratio"] = round(data.apgrozamie_lidzekli / data.istermina_kreditori, 2)
    else:
        ratios["Current Ratio"] = 0.0


    if data.parskata_gada_pelna and data.neto_apgrozijums:
        ratios["Net Margin (%)"] = round((data.parskata_gada_pelna / data.neto_apgrozijums) * 100, 2)
    else:
        ratios["Net Margin (%)"] = 0.0


    total_debt = (data.istermina_kreditori or 0) + (data.ilgtermina_kreditori or 0)
    if data.pasu_kapitals:
        ratios["Debt/Equity"] = round(total_debt / data.pasu_kapitals, 2)
    else:
        ratios["Debt/Equity"] = 0.0

    return ratios

def generate_peer_review(df, language, llm):
    """Uses LLM to write a comparative analysis of the calculated data."""


    data_summary = df.to_markdown(index=False)

    lang_instruction = "Write the response in English." if language == "English" else "Raksti atbildi Latvie≈°u valodƒÅ."

    prompt_text = f"""
    You are a Senior Financial Analyst.
    Review the following financial data for Baltic companies:

    {data_summary}

    Perform a Peer Review:
    1. Compare the *Profitability* (Net Margin). Which company is more efficient?
    2. Analyze *Liquidity* (Current Ratio). Are any companies at risk (ratio < 1.0)?
    3. Evaluate *Solvency* (Debt/Equity). Who is more leveraged?
    4. Provide a concluding recommendation.

    {lang_instruction}
    """

    if isinstance(llm, ChatOpenAI) or isinstance(llm, ChatGroq) or isinstance(llm, ChatGoogleGenerativeAI):
        response = llm.invoke(prompt_text)
        return response.content
    return "LLM Error"

In [5]:


def process_reports(files, provider, api_key, language):
    if not files:
        return "No files uploaded", None, None

    try:
        llm = get_llm(provider, api_key)
    except Exception as e:
        return f"API Error: {e}", None, None

    results = []


    for file in files:
        extracted = extract_data(file, llm)


        if isinstance(extracted, str):

            if extracted.startswith("Error extracting"):
                print(f"Skipping file due to error: {extracted}")
                continue
            else:

                print(f"Unexpected string response for {file}")
                continue

        ratios = calculate_ratios(extracted)
        results.append(ratios)

    if not results:
        return "Could not extract data. Check if PDFs are readable or if API Key is valid.", None, None

    df = pd.DataFrame(results)


    df_melted = df.melt(id_vars=["Company", "Year"],
                        value_vars=["Current Ratio", "Net Margin (%)", "Debt/Equity"],
                        var_name="Ratio", value_name="Value")

    fig = px.bar(df_melted, x="Company", y="Value", color="Ratio", barmode="group",
                 title="Peer Comparison: Key Financial Ratios",
                 text_auto=True)


    analysis_text = generate_peer_review(df, language, llm)

    return analysis_text, fig, df

In [None]:


with gr.Blocks(title="Baltic Financial AI Agent") as app:
    gr.Markdown("# üá±üáª Baltic Financial AI Agent")
    gr.Markdown("Upload Latvian Annual Reports (PDF). The Agent will act as an accountant to extract data, calculate ratios, and perform a peer review.")

    with gr.Row():
        with gr.Column(scale=1):
            api_key_input = gr.Textbox(label="API Key", type="password", placeholder="sk-...")
            provider_input = gr.Dropdown(["OpenAI (GPT-4o)", "Groq (Llama 3)", "Gemini (Pro)"],
                                         label="LLM Provider", value="OpenAI (GPT-4o)")
            language_input = gr.Radio(["English", "Latvian"], label="Output Language", value="English")
            file_input = gr.File(file_count="multiple", label="Upload PDFs (Bilance/PZA)", type="filepath")
            analyze_btn = gr.Button("üöÄ Analyze & Peer Review", variant="primary")

        with gr.Column(scale=2):
            plot_output = gr.Plot(label="Financial Ratios Visualization")
            analysis_output = gr.Markdown(label="AI Peer Review Insights")
            data_table = gr.DataFrame(label="Extracted Data & Ratios")

    analyze_btn.click(
        process_reports,
        inputs=[file_input, provider_input, api_key_input, language_input],
        outputs=[analysis_output, plot_output, data_table]
    )


app.launch(debug=True, share=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://7e3555fdebb944f20e.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
