In [0]:
%pip install openai


In [0]:
from pyspark.sql import functions as F
from pyspark.sql import Window
import uuid
import json

# Table names
metrics_table     = "nlp_dev.agents.model_metrics"
confusion_table   = "nlp_dev.gold_gold.fact_sentiment_confusion"
summary_table     = "nlp_dev.gold_gold.fact_sentiment_summary"

ops_reports_table = "nlp_dev.agents.ops_reports"


# Storage path for reports
ops_reports_path = (
    "abfss://nlp@nlplakeadls001.dfs.core.windows.net/"
    "agents/ops_reports"
)

In [0]:
df_metrics = spark.table(metrics_table)

# Get the latest metrics row (by created_at)
w = Window.orderBy(F.col("created_at").desc())

df_latest_metrics = (
    df_metrics.withColumn("rn", F.row_number().over(w))
      .filter("rn = 1")
      .drop("rn")
)

display(df_latest_metrics)

In [0]:
df_conf = spark.table(confusion_table)
df_sum  = spark.table(summary_table)

display(df_conf)
display(df_sum)

In [0]:
# Collect metrics row to driver as dict
metrics = df_latest_metrics.limit(1).toPandas().to_dict(orient="records")[0]

# Collect confusion and summary as small JSON-friendly objects
confusion_rows = df_conf.toPandas().to_dict(orient="records")
summary_rows   = df_sum.toPandas().to_dict(orient="records")

context = {
     "metrics": metrics,
    "confusion": confusion_rows,
    "summary": summary_rows
}

print(json.dumps(context, indent=2, default=str)[:2000])  # preview trimmed


In [0]:
def build_ops_summary(context: dict) -> dict:


    m = context["metrics"]
    overall_acc = float(m.get("overall_accuracy",0.0))

    # basic flags
    needs_retrain = overall_acc < 0.85
    performance_tier = "good"

    if overall_acc >= 0.9:
        performance_tier = "excellent"
    elif overall_acc < 0.8:
        performance_tier = "concerning"

    # Simple human-readable summary
    summary_lines = []
    summary_lines.append(
            f"Model '{m.get('model_name')}' (version {m.get('model_version')}) "
            f"has overall accuracy of {overall_acc:.4f} on scope '{m.get('metric_scope')}'."
    )
    summary_lines.append(f"Performance tier: {performance_tier}.")
    if needs_retrain:
        summary_lines.append(
            "Accuracy is below 0.85. Consider retraining the model on more recent data."
        )
    else:
        summary_lines.append(
            "Accuracy is above 0.85. No immediate retraining required."
        )

    summary_text = " ".join(summary_lines)

    flags = {
        "needs_retrain": needs_retrain,
        "performance_tier": performance_tier,
    }

    return {
        "summary_text": summary_text,
        "flags": flags,
    }

agent_output = build_ops_summary(context)
print(agent_output["summary_text"])
print(agent_output["flags"])




In [0]:
from pyspark.sql import Row
from pyspark.sql import functions as F
import uuid
import json

# Build a clean, typed Row for the report
report_row = Row(
    report_id       = str(uuid.uuid4()),
    model_name      = str(metrics.get("model_name") or ""),
    model_version   = int(metrics.get("model_version") or 0),
    run_id          = str(metrics.get("run_id") or ""),
    metric_scope    = str(metrics.get("metric_scope") or ""),
    overall_accuracy= float(metrics.get("overall_accuracy") or 0.0),
    summary_text    = agent_output["summary_text"],
    flags_json      = json.dumps(agent_output["flags"]),
)

# Create a DataFrame with one row, and let Spark set generated_at
df_report = (
    spark.createDataFrame([report_row])
         .withColumn("generated_at", F.current_timestamp())
)

display(df_report)


In [0]:
(
    df_report.write
      .format("delta")
      .mode("append")
      .save(ops_reports_path)
)
print("Appended ops report to Delta path:", ops_reports_path)


In [0]:
%sql
CREATE TABLE IF NOT EXISTS nlp_dev.agents.ops_reports
USING DELTA
LOCATION 'abfss://nlp@nlplakeadls001.dfs.core.windows.net/agents/ops_reports';


In [0]:
%sql
SELECT * 
FROM nlp_dev.agents.ops_reports
ORDER BY generated_at DESC;
