In [None]:
import pandas as pd
import os

# ====================================================
# --- 1️⃣ Ruta del archivo original ---
# ====================================================
ruta_origen = "..\data\csv\fomc_sentiment_finbert.csv"
ruta_destino = "..\data\csv\fomc_sentiment_clean.csv"

# ====================================================
# --- 2️⃣ Cargar y limpiar ---
# ====================================================
df = pd.read_csv(ruta_origen)

# Detectar columnas de fecha y sentimiento
fecha_col = next((c for c in df.columns if "date" in c.lower()), None)
sent_col = next((c for c in df.columns if "sentiment" in c.lower() and "score" in c.lower()), None)

if fecha_col is None or sent_col is None:
    raise ValueError("❌ No se encontraron las columnas esperadas de fecha o sentimiento.")

# Limpiar datos
df_clean = df[[fecha_col, sent_col]].copy()
df_clean.columns = ["release_date", "sentiment_score"]

# Eliminar filas vacías o con errores
df_clean = df_clean.dropna(subset=["release_date", "sentiment_score"])
df_clean["release_date"] = pd.to_datetime(df_clean["release_date"], errors="coerce")
df_clean = df_clean[~df_clean["release_date"].isna()]

# ====================================================
# --- 3️⃣ Guardar limpio ---
# ====================================================
os.makedirs(os.path.dirname(ruta_destino), exist_ok=True)
df_clean.to_csv(ruta_destino, index=False)

print(f"✅ Dataset limpio guardado en:\n{ruta_destino}")
print(df_clean.head())
