# 🤖 LLM-Based Report Generator
This notebook reads report metadata and uses OpenAI (GPT-4) to generate SQL queries.

In [0]:
# Updated for openai>=1.0.0
from openai import OpenAI
import os
import json
from pyspark.sql import SparkSession

# Set up Spark session
spark = SparkSession.builder.getOrCreate()

# Set up OpenAI client
OPENAI_API_KEY = dbutils.secrets.get(scope="llm", key="openai_api_key")
client = OpenAI(api_key=OPENAI_API_KEY)

In [0]:
# Path to metadata file (DBFS or ADLS)
metadata_path = "abfss://kyc-data@reprotingfactorydl.dfs.core.windows.net/finance/kyc/metadata/report_definitions_prompt.json"

# Load metadata
try:
    metadata_df = spark.read.option("multiline", "true").json(metadata_path)
    report_defs = metadata_df.toJSON().map(json.loads).collect()
except Exception as e:
    raise Exception(f"Failed to load metadata file: {metadata_path}. Error: {e}")

In [0]:
# Load schema (simplified for this demo)
schema = """step (int), type (string), amount (double), isFraud (int), date (date)"""

# Generate SQL from prompt using GPT-4
for report in report_defs:
    prompt = f"""
You are a data analyst. Generate a SQL query for the following report request:
Prompt: {report['prompt']}
{schema}
Use the table: finance.kyc_ml.customer_enriched.
Return only valid SQL without explanation or markdown.
"""
    print(f"Generating SQL for: {report['report_name']}")

    response = client.chat.completions.create(
        model="gpt-4.1-nano",
        messages=[{"role": "user", "content": prompt}]
    )

    sql_code = response.choices[0].message.content
    #print(f"\n--- SQL for {report['report_name']} ---\n{sql_code}\n")
    print(f"\n{sql_code}\n")

    try:
        spark.sql(f"DROP TABLE IF EXISTS {report['report_name']}")
        spark.sql(sql_code)
    except Exception as e:
        raise Exception(f"Error executing SQL for {report['report_name']}: {e}")