In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# =====================================================
# 1. Definir columnas a evaluar en cada fuente
#    (ajusta nombres si cambian en tu DB)
# =====================================================

wp_cols = [
    "journey_id",
    "capture_time",
    "latitude",
    "longitude",
    "speed_mph",
    "local_time",
]

tp_cols = [
    "SegmentId",
    "CrossingStartDateLocal",
    "CrossingEndDateLocal",
    "CrossingSpeedMph",
]

# =====================================================
# 2. Función de completitud (% de valores no nulos)
# =====================================================

def completeness(series):
    return series.notna().mean() * 100.0

records = []

# Waypoint (map-matched / cleaned)
for col in wp_cols:
    if col not in df_wp.columns:
        print(f"WARNING: columna {col} no existe en df_wp")
        continue
    comp = completeness(df_wp[col])
    records.append({"column": col, "source": "waypoint", "completeness": comp})

# Trajs (cleaned + mapped)
for col in tp_cols:
    if col not in df_tp.columns:
        print(f"WARNING: columna {col} no existe en df_tp")
        continue
    comp = completeness(df_tp[col])
    records.append({"column": col, "source": "trajs", "completeness": comp})

df_comp = pd.DataFrame(records)

# Orden de columnas en el gráfico (como en la figura de tu compa)
order = wp_cols + tp_cols
df_comp["column"] = pd.Categorical(df_comp["column"], categories=order, ordered=True)
df_comp = df_comp.sort_values("column")

# =====================================================
# 3. Graficar barras de completitud
# =====================================================

plt.figure(figsize=(10, 6))

x = np.arange(len(df_comp))
heights = df_comp["completeness"].values

# Colores por fuente
colors = df_comp["source"].map({
    "waypoint": "lightcoral",
    "trajs": "goldenrod",
}).values

bars = plt.bar(x, heights, color=colors)

# Línea de umbral (por ejemplo 99%)
threshold = 99.0
plt.axhline(threshold, color="red", linestyle="--", linewidth=1, label="99% Threshold")

plt.ylim(95, 101)  # mismo estilo que la figura de tu camarada
plt.ylabel("Completeness (%)")
plt.xlabel("Column Name")
plt.title("Data Completeness by Source and Column")

plt.xticks(x, df_comp["column"], rotation=45, ha="right")

# Leyenda manual para las fuentes
from matplotlib.patches import Patch
legend_handles = [
    Patch(facecolor="lightcoral", label="waypoint"),
    Patch(facecolor="goldenrod", label="trajs"),
    Patch(facecolor="none", edgecolor="red", linestyle="--", label="99% Threshold")
]
plt.legend(handles=legend_handles, loc="lower left")

plt.tight_layout()

# Guardar figura si quieres
import os
PROJECT_ROOT = os.getcwd()
out_dir = os.path.join(PROJECT_ROOT, "figures_from_tool_db_only")
os.makedirs(out_dir, exist_ok=True)
out_path = os.path.join(out_dir, "data_completeness_waypoint_trajs.png")
plt.savefig(out_path, dpi=300)
plt.show()

print("Figure saved to:", out_path)
