In [1]:
from pyspark.sql import SparkSession, Row
from pyspark.sql.functions import expr

# -----------------------
# Spark session
# -----------------------
spark = SparkSession.builder.appName("LLM_Parse_Model_Output").getOrCreate()

# -----------------------
# Function to parse & summarize
# -----------------------
def llm_summarize_model_output(file_path: str):
    # Read raw text file (e.g. output.txt from model training)
    with open(file_path, "r") as f:
        raw_text = f.read()

    # Prompt for LLM
    prompt = f"""
You are an expert data scientist.
Here is the raw model output from a statistical model (could include coefficients, odds ratios, accuracy, ROC, etc.):

{raw_text}

Your task:
- Parse this output.
- Summarize key outcomes in 5â€“7 bullet points.
- Cover model quality (AUC, accuracy, etc.), strongest predictors, weak predictors, and interpretation.
- Write in clear business-friendly English.
"""

    # Send prompt to LLM through query_model
    prompt_df = spark.createDataFrame([Row(prompt=prompt)])
    out_df = prompt_df.select(
        expr("query_model('default.oci_ai_models.xai.grok-4', prompt) AS summary")
    )

    summary = out_df.collect()[0]["summary"]
    return summary

# -----------------------
# Example Usage
# -----------------------
if __name__ == "__main__":
    summary = llm_summarize_model_output("/Workspace/output.txt")
    print("\n=== Model Outcome Summary ===\n")
    print(summary)



=== Model Outcome Summary ===

- **Model Performance Overview**: The logistic regression model demonstrates strong predictive capability for customer churn, with an Area Under the ROC Curve (AUC) ranging from 0.8467 to 0.8493 across outputs, indicating good discrimination between churners and non-churners. Accuracy is approximately 80%, with precision and recall also around 79-80%, suggesting reliable overall performance on the dataset of about 4,923 observations.

- **Convergence and Fit**: The model converged after 26 iterations, with the objective function stabilizing, reflecting a well-fitted binomial logit model that balances complexity and accuracy without overfitting.

- **Strongest Predictors of Churn**: Features like two-year contracts (odds ratio ~0.24-0.31, strong negative coefficient) and no internet service (odds ratio ~0.02-0.35, highly negative coefficient) are the most influential in reducing churn risk, implying long-term commitments and lack of internet bundles keep 