In [1]:
import pandas as pd
from pathlib import Path


data_dir = Path.cwd().parent.parent / "data"
df = pd.read_csv(data_dir / "dataset_final.csv")

df["BTC-Close_next_day"] = df["BTC-Close"].shift(1)
df = df.dropna(subset=["BTC-Close_next_day"])

TARGET = "BTC-Close_next_day"
X = df.drop(columns=[TARGET])
y = df[TARGET]

print("✅ Dataset cargado correctamente")
print("Shape:", X.shape)
print("Columnas:", list(X.columns[:10]), "...")


✅ Dataset cargado correctamente
Shape: (2056, 286)
Columnas: ['WTI-Close', 'WTI-Open', 'WTI-High', 'WTI-Low', 'WTI-Volume', 'BRENT-Close', 'BRENT-Open', 'BRENT-High', 'BRENT-Low', 'BRENT-Volume'] ...


In [2]:

x_last_row = X.head(1).copy()
y_last_row = y.head(1).copy()   

X = X.iloc[1:]
y = y.iloc[1:]

print("✅ Última fila separada para predicción.")
print("x_last_row shape:", x_last_row.shape)
print("y_last_row:", y_last_row.values)


✅ Última fila separada para predicción.
x_last_row shape: (1, 286)
y_last_row: [110763.28]


In [3]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# DIVIDIMOS EL DATASET 70% TRAIN 15% TEST 15% VALIDATION
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, shuffle=False
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, shuffle=False
)

# PREPROCESAMIENTO
num_cols = [c for c in X_train.columns] #if c != "Date"
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_cols)
    ],
)

# PIPELINE
rf_pipeline = Pipeline(steps=[
    ("scaler", preprocessor),
    ("model", RandomForestRegressor(
        n_estimators=200,
        max_depth=None,
        random_state=42,
        n_jobs=-1
    ))
])

# ENTRENAMOS
rf_pipeline.fit(X_train, y_train)

# EVALUAMOS
y_val_pred = rf_pipeline.predict(X_val)
mae_val = mean_absolute_error(y_val, y_val_pred)
r2_val = r2_score(y_val, y_val_pred)
print("1- RANDOM FOREST VALIDATION RESULTS")
print(f"MAE: {mae_val:.4f}")
print(f"R2: {r2_val:.4f}")

y_test_pred = rf_pipeline.predict(X_test)
mae_test = mean_absolute_error(y_test, y_test_pred)
r2_test = r2_score(y_test, y_test_pred)
print("2- RANDOM FOREST TEST RESULTS")
print(f"MAE: {mae_test:.4f}")
print(f"R2: {r2_test:.4f}")

1- RANDOM FOREST VALIDATION RESULTS
MAE: 1644.1328
R2: 0.2528
2- RANDOM FOREST TEST RESULTS
MAE: 1249.7137
R2: 0.6975


In [4]:
X_train_scaled = rf_pipeline.named_steps["scaler"].transform(X_train)
scaled_feature_names = num_cols
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=scaled_feature_names)
X_train_scaled_df.head()


Unnamed: 0,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,BRENT-Volume,...,BTC-High,BTC-Low,BTC-Close,BTC-Volume,Day_Of_Week,Month,Year,BTC_Close_MA7,BTC_Close_MA30,BTC_Close_diff1
0,-0.628797,-0.486275,-0.535688,-0.605937,0.899381,-0.608021,-0.475555,-0.524873,-0.591153,2.290662,...,2.153254,2.160175,2.126101,-0.467742,1.41254,1.066235,1.543364,2.121334,2.076961,-1.276565
1,-0.484141,-0.440846,-0.470236,-0.435083,0.313807,-0.477534,-0.446725,-0.461991,-0.433174,0.663079,...,2.4524,1.907467,2.196139,-0.15745,0.697317,1.066235,1.543364,2.134257,2.08978,-5.197063
2,-0.426501,-0.45525,-0.467509,-0.389973,0.417471,-0.423557,-0.447774,-0.464053,-0.390477,0.502567,...,2.491722,2.497378,2.488376,-0.617784,-0.017905,1.066235,1.543364,2.191507,2.146573,-0.994325
3,-0.471948,-0.472979,-0.511689,-0.464969,0.210876,-0.465481,-0.464548,-0.500133,-0.456124,0.245698,...,2.505802,2.544656,2.542418,-0.666901,-0.733128,1.066235,1.543364,2.240108,2.194787,1.103853
4,-0.474165,-0.505665,-0.511144,-0.446925,0.06113,-0.464433,-0.49495,-0.503741,-0.443848,0.293112,...,2.53591,2.52824,2.477544,-0.616977,-1.448351,1.066235,1.543364,2.298938,2.222915,-0.654622


In [5]:
# === PREDICCIÓN DEL SIGUIENTE DÍA ===
y_predicted_next = rf_pipeline.predict(x_last_row)
print(f" Predicción del precio de BTC para el siguiente día: {y_predicted_next[0]:,.2f} USD")

# (opcional) Comparar con el valor real de esa última fila
print(f" Valor real anterior: {y_last_row.values[0]:,.2f} USD")
print(f" Diferencia: {abs(y_predicted_next[0] - y_last_row.values[0]):,.2f} USD")


 Predicción del precio de BTC para el siguiente día: 112,740.83 USD
 Valor real anterior: 110,763.28 USD
 Diferencia: 1,977.55 USD
