In [16]:
import pandas as pd
from pathlib import Path

# === CARGAR DATASET PROCESADO ===
data_dir = Path.cwd().parent.parent / "data"   # Ajustar si cambia tu estructura
df = pd.read_csv(data_dir / "dataset_final.csv")

# === DEFINIR VARIABLES ===
TARGET = "BTC-Close_next_day"
X = df.drop(columns=[TARGET])
y = df[TARGET]

print("‚úÖ Dataset cargado correctamente")
print("Shape:", X.shape)
print("Columnas:", list(X.columns[:10]), "...")



‚úÖ Dataset cargado correctamente
Shape: (2044, 286)
Columnas: ['WTI-Close', 'WTI-Open', 'WTI-High', 'WTI-Low', 'WTI-Volume', 'BRENT-Close', 'BRENT-Open', 'BRENT-High', 'BRENT-Low', 'BRENT-Volume'] ...


In [17]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# --- Divisi√≥n: 70% train, 15% val, 15% test ---
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, shuffle=False
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, shuffle=False
)

# --- Preprocesamiento ---
num_cols = list(X_train.columns)
preprocessor = ColumnTransformer([
    ("num", StandardScaler(), num_cols)
])

# --- Pipeline con Random Forest ---
rf_pipeline = Pipeline(steps=[
    ("scaler", preprocessor),
    ("model", RandomForestRegressor(
        n_estimators=200,
        max_depth=None,
        random_state=42,
        n_jobs=-1
    ))
])

# --- Entrenamiento ---
rf_pipeline.fit(X_train, y_train)

# --- Evaluaci√≥n ---
y_val_pred  = rf_pipeline.predict(X_val)
y_test_pred = rf_pipeline.predict(X_test)

print("üß™ VALIDATION SET RESULTS")
print(f"MAE: {mean_absolute_error(y_val,  y_val_pred):.4f}")
print(f"R¬≤ : {r2_score(y_val,  y_val_pred):.4f}")

print("\nüßæ TEST SET RESULTS")
print(f"MAE: {mean_absolute_error(y_test, y_test_pred):.4f}")
print(f"R¬≤ : {r2_score(y_test, y_test_pred):.4f}")


üß™ VALIDATION SET RESULTS
MAE: 1664.4746
R¬≤ : 0.2449

üßæ TEST SET RESULTS
MAE: 1270.1432
R¬≤ : 0.6845


In [18]:
# === PREDICCI√ìN DEL PR√ìXIMO D√çA (no evaluable a√∫n) ===
x_last_row = X.tail(1).copy()
y_next_pred = rf_pipeline.predict(x_last_row)[0]
print(f"üîÆ Predicci√≥n del cierre de BTC para el pr√≥ximo d√≠a: {y_next_pred:,.2f} USD")


üîÆ Predicci√≥n del cierre de BTC para el pr√≥ximo d√≠a: 7,688.36 USD


In [19]:
# === CHEQUEO PUNTUAL: primera fecha del TEST ===
x_first_test = X_test.iloc[[0]]
y_true_first = y_test.iloc[0]
y_pred_first = rf_pipeline.predict(x_first_test)[0]

print("üìÖ Primera fecha en TEST (√≠ndice):", X_test.index[0] if hasattr(X_test, "index") else "(sin √≠ndice de fecha)")
print(f"Real : {y_true_first:,.2f}")
print(f"Pred : {y_pred_first:,.2f}")
print(f"Error: {abs(y_true_first - y_pred_first):,.2f} USD")


üìÖ Primera fecha en TEST (√≠ndice): 1737
Real : 6,387.09
Pred : 7,705.45
Error: 1,318.36 USD
