In [None]:
import pandas as pd
import json

# Read the JSON file
with open('income-2024.json', 'r') as file:
    data_2024 = json.load(file)

with open('income-2025.json', 'r') as file:
    data_2025 = json.load(file)

# Convert to DataFrame
df = pd.json_normalize([data_2024, data_2025])
# Convert column names to use underscores instead of dots and spaces
df.columns = df.columns.str.replace('.', '_').str.replace(' ', '_')

# Print the DataFrame
print(df)

In [118]:
from llama_index.core import Settings
from llama_index.llms.fireworks import Fireworks
import os
# Set Fireworks API key
os.environ["FIREWORKS_API_KEY"] = "YOUR_KEY"  # Replace with your actual key

# Configure LlamaIndex to use Fireworks
Settings.llm = Fireworks(
    model="accounts/fireworks/models/mixtral-8x7b-instruct",
    max_tokens=512,
    temperature=0.1
)

In [None]:
df_schema = "\n".join([
    f"{col} ({df[col].dtype})" 
    for col in df.columns
])
print(df_schema)

In [142]:
from llama_index.experimental.query_engine import PandasQueryEngine

query_engine = PandasQueryEngine(
    df=df,
    verbose=True,
    instruction_str = """You are a financial data analysis assistant working with a pandas DataFrame. Your goal is to generate accurate Python pandas code to answer user queries.

DataFrame Schema:
{df_schema}
### IMPORTANT RESPONSE FORMAT RULES:
1. Provide ONLY the Python code, no explanations or markdown.
2. Do not include backticks (```) or language indicators.
3. Do not include any text before or after the code.
4. The response should be a single line of Python code that can be directly evaluated.
"""
)

In [None]:
response = query_engine.query("What is the total revenue in 2024?")
print(response)

In [None]:
response = query_engine.query("What is the total operating expenses in 2025?")
print(response)

In [None]:
response = query_engine.query("What is the total cost of sales in 2024?")
print(response)