In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import os

# Load clean data
df = pd.read_csv("../data/processed_api_logs.csv")

# Ensure dashboards folder exists
os.makedirs("../dashboards", exist_ok=True)

# -------------------------------
# 1. API Latency Distribution
# -------------------------------
latency_by_api = df.groupby("api_name")["latency_ms"].mean().sort_values(ascending=False)

plt.figure(figsize=(10, 5))
latency_by_api.plot(kind="bar")
plt.title("Average Latency by API")
plt.xlabel("API")
plt.ylabel("Latency (ms)")
plt.tight_layout()
plt.savefig("../dashboards/api_latency_distribution.png")
plt.close()

# -------------------------------
# 2. Success Rate by Region
# -------------------------------
success_rate = (
    df.assign(is_success=df["status"] == "success")
      .groupby("region")["is_success"]
      .mean()
      .sort_values(ascending=False) * 100
)

plt.figure(figsize=(6, 4))
success_rate.plot(kind="bar")
plt.title("Success Rate by Region (%)")
plt.xlabel("Region")
plt.ylabel("Success Rate (%)")
plt.tight_layout()
plt.savefig("../dashboards/success_rate_by_region.png")
plt.close()

# -------------------------------
# 3. Failure Rate by Document Type
# -------------------------------
failure_rate = (
    df.assign(is_fail=df["status"] == "fail")
      .groupby("document_type")["is_fail"]
      .mean()
      .sort_values(ascending=False) * 100
)

plt.figure(figsize=(6, 4))
failure_rate.plot(kind="bar")
plt.title("Failure Rate by Document Type (%)")
plt.xlabel("Document Type")
plt.ylabel("Failure Rate (%)")
plt.tight_layout()
plt.savefig("../dashboards/failure_rate_by_document.png")
plt.close()

print("Dashboards generated in /dashboards")


Dashboards generated in /dashboards
