In [1]:
import pandas as pd
import numpy as np
from ta.momentum import RSIIndicator
from ta.trend import MACD, SMAIndicator
from ta.volatility import BollingerBands

In [2]:
# Cargar datos
df = pd.read_csv("../../data/raw/SPLV_raw.csv", parse_dates=["Date"])

In [3]:
# Variables de calendario
df["day_of_week"] = df["Date"].dt.dayofweek
df["is_month_end"] = df["Date"].dt.is_month_end.astype(int)
df["month"] = df["Date"].dt.month  

In [4]:
# Diferencias de precio y retornos
df["price_diff"] = df["Close"] - df["Open"]
df["pct_diff"] = df["price_diff"] / df["Open"]
df["return_daily"] = df["Close"].pct_change()
for lag in range(1, 6):
    df[f"return_lag_{lag}"] = df["return_daily"].shift(lag)

In [5]:
# Medias móviles y desviación
df["sma_5"] = SMAIndicator(close=df["Close"], window=5).sma_indicator().shift(1)
df["sma_10"] = SMAIndicator(close=df["Close"], window=10).sma_indicator().shift(1)  
df["rolling_std_return_5"] = df["return_daily"].rolling(window=5).std().shift(1)

In [6]:
# Indicadores técnicos
df["RSI_5"] = RSIIndicator(close=df["Close"], window=5).rsi().shift(1)

macd = MACD(close=df["Close"])
df["MACD"] = macd.macd().shift(1)
df["MACD_signal"] = macd.macd_signal().shift(1)

bb = BollingerBands(close=df["Close"], window=5, window_dev=2)
df["bb_middle"] = bb.bollinger_mavg().shift(1)
df["bb_upper"] = bb.bollinger_hband().shift(1)
df["bb_lower"] = bb.bollinger_lband().shift(1)

In [7]:
# Bandera de volumen alto (outlier)
df["rolling_mean_vol"] = df["Volume"].rolling(window=5).mean().shift(1)
df["rolling_std_vol"] = df["Volume"].rolling(window=5).std().shift(1)
df["volume_outlier"] = (
    df["Volume"] > df["rolling_mean_vol"] + 2 * df["rolling_std_vol"]
).astype(int)

In [8]:
# Flags binarias técnicas
sma50 = SMAIndicator(close=df["Close"], window=50).sma_indicator()
df["price_above_SMA50"] = (df["Close"] > sma50).shift(1).fillna(0).astype(int)
df["RSI_overbought"] = (df["RSI_5"] > 70).astype(int)
df["MACD_above_signal"] = (df["MACD"] > df["MACD_signal"]).astype(int)

In [9]:
# Target: sube el cierre de mañana respecto a hoy
df["target"] = (df["Close"].shift(-1) > df["Close"]).astype(int)

In [10]:
# Selección de columnas finales (actualizadas)
feature_cols = [
    "day_of_week", "month", "is_month_end",
    "price_diff", "pct_diff", "return_daily",
    "return_lag_1", "return_lag_2", "return_lag_3", "return_lag_4", "return_lag_5",
    "sma_5", "sma_10", "rolling_std_return_5",
    "RSI_5", "MACD", "MACD_signal",
    "bb_middle", "bb_upper", "bb_lower",
    "volume_outlier", "price_above_SMA50",
    "RSI_overbought", "MACD_above_signal",
    "target"
]

# %%
# Limpiar y exportar
df_features = df[["Date", "Open", "High", "Low", "Close"] + feature_cols].dropna().copy()
df_features.to_csv("../../data/processed/SPLV_clean.csv", index=False)
print("✅ Features listas y guardadas en data/processed/SPLV_clean.csv")


✅ Features listas y guardadas en data/processed/SPLV_clean.csv
