# Databricks notebook: 06_publish_export_notify_reports
## Purpose:
For each APPROVED report in finance.kyc_ml.report_candidates   
- Move DRAFT files to final (remove "_DRAFT" in file name)
- Update final_paths, report_url  
- Set status='published', published_at=now()
- Notify stakeholders (Slack webhook or email stub)
## Assumptions: 
- DRAFT file paths are stored as dbfs:/FileStore/... (or convertible from /files/...)
- Running inside a Databricks cluster (dbutils available) for file moves


In [0]:
import os, json, requests
from datetime import datetime

# ---------------- Widgets / Config ----------------
# ---------- CONFIG ----------
CATALOG        = "finance"
SCHEMA         = "kyc_gold"
META_FILE      = "dbfs:/FileStore/kyc/report_metadata/report_definitions.json"
VIEW_DUMP_ROOT = "dbfs:/FileStore/kyc/reports/views"
REPORT_CAND = f"{CATALOG}.{SCHEMA}.report_candidates"

DEFAULT_NOTIFY_EMAIL = ""

spark.sql(f"USE CATALOG {CATALOG}")
spark.sql(f"USE SCHEMA {SCHEMA}")

# ---------------- Helpers ----------------
def to_dbfs_path(p: str) -> str:
    """
    Accepts '/files/...', 'https://<host>/files/...', or 'dbfs:/FileStore/...'
    Returns 'dbfs:/FileStore/...'
    """
    q = (p or "").strip()
    if not q:
        return q
    if q.startswith("http://") or q.startswith("https://"):
        # keep only path part after the host
        parts = q.split("/", 3)
        q = "/" + parts[3] if len(parts) >= 4 else "/"
    if q.startswith("/files/"):
        return "dbfs:/FileStore/" + q[len("/files/"):]
    if q.startswith("dbfs:/FileStore/"):
        return q
    if q.startswith("dbfs:/"):
        return q
    # last resort: treat as relative under FileStore
    return "dbfs:/FileStore/" + q.lstrip("/")

def strip_draft_suffix(dbfs_path: str) -> str:
    # ..._DRAFT.ext -> ... .ext (single occurrence before last extension)
    return dbfs_path.replace("_DRAFT.", ".")

def dbfs_to_files_url(dbfs_path: str) -> str:
    # dbfs:/FileStore/...  -> https://<host>/files/...
    host = "https://adb-4274438097098742.2.azuredatabricks.net/"
    suffix = dbfs_path.replace("dbfs:/FileStore/","files")
    return f"{host}/{suffix}"

def notify_stakeholders(report_row: dict, report_url: str):
    title = report_row.get("report_name", "Report")
    owner = report_row.get("owner", "")
    notify = report_row.get("notify") or []

    # Email
    if notify or DEFAULT_NOTIFY_EMAIL:
        recipients = notify or [DEFAULT_NOTIFY_EMAIL]
        # Implement your SMTP / Email integration here.

        api_key = dbutils.secrets.get("email-secrets", "GRAPH_CLIENT_SECRET") #MAILERSEND_API_KEY

        url = "https://api.mailersend.com/v1/email"
        payload = {
            "from": {"email": "MS_Wp3pH3@test-nrw7gymdorog2k8e.mlsender.net"},  
            "to": [{"email": addr} for addr in recipients if addr],
            "subject": f"Report '{title}'is ready",
            "text": f"Hi,<p>Report '{title}' is available to download at {report_url}</p>"
        }

        resp = requests.post(
            url,
            headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
            data=json.dumps(payload)
        )

        # For now, we log intended recipients.
        print(f"Email notified → {', '.join([str(x) for x in recipients if x])} :: {report_url}")
    else:
        print("No email recipients configured (notify empty & no DEFAULT_NOTIFY_EMAIL).")

# ---------------- 1) Pick APPROVED reports ----------------
approved = spark.sql(f"""
  SELECT id, report_name, view_name, report_owner, export_format, report_url, notify
  FROM {REPORT_CAND}
  WHERE status = 'APPROVED'
""").collect()

print(f"Approved reports to publish: {len(approved)}")
if not approved:
    dbutils.notebook.exit("No approved reports to publish.")  # graceful exit

# ---------------- 2) Finalize drafts → publish & update table ----------------
for r in approved:
    rid = r["id"]
    report_name = r["report_name"]
    export_format = (r["export_format"] or "xlsx").lower()
    draft_paths = r["report_url"] or []
    report_owner = r["report_owner"] or []
    notify  = r["notify"] or []
    view_name = r["view_name"]

    # Some drivers serialize array<STRING> – normalize to list[str]
    if isinstance(draft_paths, str):
        try:
            import ast
            draft_paths = ast.literal_eval(draft_paths)
        except Exception:
            draft_paths = [draft_paths]

    if not draft_paths:
        print(f"⚠️ No report_url for report {report_name} (id={rid}); skipping.")
        continue

    final_paths = []
    for p in draft_paths:
        src_dbfs = to_dbfs_path(p)
        dst_dbfs = strip_draft_suffix(src_dbfs)

        # Ensure parent exists (dbutils will create as needed, but mkdirs for safety)
        try:
            parent = "/".join(dst_dbfs.split("/")[:-1])
            if parent:
                dbutils.fs.mkdirs(parent)  # type: ignore
        except Exception:
            pass

        try:
            # Overwrite if exists: delete and move
            try:
                dbutils.fs.rm(dst_dbfs, recurse=False)  # type: ignore
            except Exception:
                pass
            dbutils.fs.mv(src_dbfs, dst_dbfs, True)  # type: ignore
            final_paths.append(dst_dbfs)
            print(f"Published: {src_dbfs} → {dst_dbfs}")
        except Exception as e:
            print(f"❌ Failed to publish {src_dbfs}: {e}")
            # Continue to next file, do not stop whole run

    if not final_paths:
        print(f"⚠️ No files published for report {report_name} (id={rid}); leaving status approved.")
        continue

    # Canonical URL for the first final file
    report_url = dbfs_to_files_url(final_paths[0])

    # Update row → published
    spark.sql(f"""
      UPDATE {REPORT_CAND}
      SET
          report_url = '{report_url}',
          status = 'published',
          published_at = current_timestamp()
      WHERE id = '{rid}'
    """)
    print(f"✅ Report published: {report_name} (id={rid}) → {report_url}")

    # Notify stakeholders
    notify_stakeholders({
       "report_name": report_name,
        "owner": r["report_owner"],
        "notify": r["notify"]
    }, report_url)

print("🎯 Done: Published all APPROVED reports, updated to PUBLISHED, and sent notifications where configured.")