In [35]:
import pandas as pd
from pathlib import Path


data_dir = Path.cwd().parent.parent / "data"
df = pd.read_csv(data_dir / "dataset_final.csv")


TARGET = "BTC-Close_next_day"
X = df.drop(columns=[TARGET])
y = df[TARGET]

print("✅ Dataset cargado correctamente")
print("Shape:", X.shape)
print("Columnas:", list(X.columns[:10]), "...")


✅ Dataset cargado correctamente
Shape: (2044, 286)
Columnas: ['WTI-Close', 'WTI-Open', 'WTI-High', 'WTI-Low', 'WTI-Volume', 'BRENT-Close', 'BRENT-Open', 'BRENT-High', 'BRENT-Low', 'BRENT-Volume'] ...


In [36]:

x_last_row = X.head(1).copy()
y_last_row = y.head(1).copy()   

X = X.iloc[:-1]
y = y.iloc[:-1]

print("✅ Última fila separada para predicción.")
print("x_last_row shape:", x_last_row.shape)
print("y_last_row:", y_last_row.values)


✅ Última fila separada para predicción.
x_last_row shape: (1, 286)
y_last_row: [122391.]


In [37]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# DIVIDIMOS EL DATASET 70% TRAIN 15% TEST 15% VALIDATION
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, shuffle=False
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, shuffle=False
)

# PREPROCESAMIENTO
num_cols = [c for c in X_train.columns] #if c != "Date"
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_cols)
    ],
)

# PIPELINE
rf_pipeline = Pipeline(steps=[
    ("scaler", preprocessor),
    ("model", RandomForestRegressor(
        n_estimators=200,
        max_depth=None,
        random_state=42,
        n_jobs=-1
    ))
])

# ENTRENAMOS
rf_pipeline.fit(X_train, y_train)

# EVALUAMOS
y_val_pred = rf_pipeline.predict(X_val)
mae_val = mean_absolute_error(y_val, y_val_pred)
r2_val = r2_score(y_val, y_val_pred)
print("1- RANDOM FOREST VALIDATION RESULTS")
print(f"MAE: {mae_val:.4f}")
print(f"R2: {r2_val:.4f}")

y_test_pred = rf_pipeline.predict(X_test)
mae_test = mean_absolute_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)
print("2- RANDOM FOREST TEST RESULTS")
print(f"MAE: {mae_test:.4f}")
print(f"R2: {r2_test:.4f}")

1- RANDOM FOREST VALIDATION RESULTS
MAE: 1665.9348
R2: 0.2455
2- RANDOM FOREST TEST RESULTS
MAE: 1262.4482
R2: 0.6873


In [38]:
X_train_scaled = rf_pipeline.named_steps["scaler"].transform(X_train)
scaled_feature_names = num_cols
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=scaled_feature_names)
X_train_scaled_df.head()


Unnamed: 0,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,BRENT-Volume,...,BTC-High,BTC-Low,BTC-Close,BTC-Volume,Day_Of_Week,Month,Year,BTC_Close_MA7,BTC_Close_MA30,BTC_Close_diff1
0,-0.540182,-0.469279,-0.487439,-0.481914,0.543137,-0.535311,-0.465947,-0.484069,-0.484033,1.748444,...,2.553936,2.540481,2.564266,-0.595527,0.700612,1.071145,1.564922,2.548276,2.497087,0.04615
1,-0.468349,-0.431722,-0.468406,-0.4257,0.424855,-0.470506,-0.427787,-0.46248,-0.428149,1.26827,...,2.459378,2.507615,2.507445,-0.641543,-0.015525,1.071145,1.564922,2.519957,2.469025,-0.949484
2,-0.435747,-0.394166,-0.448286,-0.390285,0.405178,-0.383228,-0.358786,-0.40388,-0.33288,-1.96433,...,2.381275,2.36134,2.442891,-0.6376,-0.731662,0.771802,1.564922,2.489068,2.438417,-1.084977
3,-0.37607,-0.28757,-0.331914,-0.336882,0.570788,-0.333579,-0.254239,-0.290793,-0.296689,-1.252686,...,2.254324,2.316895,2.29118,-0.691457,-1.447799,0.771802,1.564922,2.435818,2.385651,-2.612182
4,-0.250638,-0.28039,-0.276447,-0.242442,0.502759,-0.220693,-0.252148,-0.2471,-0.212065,-1.230216,...,2.088174,2.195067,2.143906,-0.794401,1.416749,0.771802,1.564922,2.374508,2.324898,-2.534421


In [39]:
# === PREDICCIÓN DEL SIGUIENTE DÍA ===
y_predicted_next = rf_pipeline.predict(x_last_row)
print(f" Predicción del precio de BTC para el siguiente día: {y_predicted_next[0]:,.2f} USD")

# (opcional) Comparar con el valor real de esa última fila
print(f" Valor real anterior: {y_last_row.values[0]:,.2f} USD")
print(f" Diferencia: {abs(y_predicted_next[0] - y_last_row.values[0]):,.2f} USD")


 Predicción del precio de BTC para el siguiente día: 121,422.64 USD
 Valor real anterior: 122,391.00 USD
 Diferencia: 968.36 USD
