In [1]:
import requests
import json
import pandas as pd
import numpy as np
from datetime import datetime
import time
import numpy as np
import sklearn

In [2]:
def format_data(df, ticker):
    renamed_columns = {
    "Fecha": 'Date', 
    'Último': f'{ticker}-Close', 
    "Apertura": f'{ticker}-Open', 
    "Máximo": f"{ticker}-High", 
    "Mínimo": f"{ticker}-Low", 
    "Vol.": f"{ticker}-Volume", 
    "% var.": f"{ticker}-% var"
    }
    df = df.rename(columns=renamed_columns)
    df["Date"] = pd.to_datetime(df["Date"], format="%d.%m.%Y")
    return df


In [3]:
startTime = 1502928000000
limit = time.time() * 1000

OHLC_dataframes = []

while (startTime < limit):
    url = 'https://api.binance.com/api/v3/klines?symbol=BTCUSDT&interval=1d&limit=1000&startTime={startTime}'.format(startTime=startTime)
    res = requests.get(url)
    data = json.loads(res.text)
    df = pd.DataFrame(data, columns=[
        "Open time", "BTC-Open", "BTC-High", "BTC-Low", "BTC-Close", "BTC-Volume",
        "Close time", "Quote asset volume", "Number of trades",
        "Taker buy base asset volume", "Taker buy quote asset volume", "Ignore"
    ])
    df["Date"] = pd.to_datetime(df["Open time"], unit='ms') - pd.Timedelta(days=1)
    startTime = df['Close time'].iloc[-1]
    df = df[["Date", "BTC-Open", "BTC-High", "BTC-Low", "BTC-Close", "BTC-Volume"]]
    df[["BTC-Open", "BTC-High", "BTC-Low", "BTC-Close", "BTC-Volume"]] = df[["BTC-Open", "BTC-High", "BTC-Low", "BTC-Close", "BTC-Volume"]].astype(float)
    OHLC_dataframes.append(df)
    
OHLC_BTC = pd.concat(df for df in OHLC_dataframes)
OHLC_BTC

Unnamed: 0,Date,BTC-Open,BTC-High,BTC-Low,BTC-Close,BTC-Volume
0,2017-08-16,4261.48,4485.39,4200.74,4285.08,795.150377
1,2017-08-17,4285.08,4371.52,3938.77,4108.37,1199.888264
2,2017-08-18,4108.37,4184.69,3850.00,4139.98,381.309763
3,2017-08-19,4120.98,4211.08,4032.62,4086.29,467.083022
4,2017-08-20,4069.13,4119.62,3911.79,4016.00,691.743060
...,...,...,...,...,...,...
978,2025-10-11,110644.40,115770.00,109565.06,114958.80,32255.302720
979,2025-10-12,114958.81,115963.81,113616.50,115166.00,22557.240330
980,2025-10-13,115166.00,115409.96,109866.00,113028.14,31870.329740
981,2025-10-14,113028.13,113612.35,110164.00,110763.28,22986.488110


# OHLC del petróleo (WTI) desde la fecha más vieja en la que pudimos obtener datos del BTC
https://es.investing.com/commodities/crude-oil-historical-data

In [4]:
OHLC_WTI = pd.read_csv("../../data/WTI.csv")
OHLC_WTI = format_data(OHLC_WTI, "WTI")

for feature in OHLC_WTI.columns.values[1:-2]:
    OHLC_WTI[feature] = OHLC_WTI[feature].str.replace(",",".")
OHLC_WTI[["WTI-Close", "WTI-Open", "WTI-High", "WTI-Low"]] = OHLC_WTI[["WTI-Close", "WTI-Open", "WTI-High", "WTI-Low"]].astype(float)

hay_sin_k = OHLC_WTI["WTI-Volume"].str.endswith("K", na=False).all()
if hay_sin_k:
    print("✅ Todos los valores terminan en K")
else:
    print("⚠️ Hay valores que NO terminan en K")

OHLC_WTI.head(25)

⚠️ Hay valores que NO terminan en K


Unnamed: 0,Date,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,WTI-% var
0,2025-10-16,57.62,58.75,59.1,57.48,"131,42K","-1,81%"
1,2025-10-15,58.68,58.6,59.42,58.2,"1,14K","-0,03%"
2,2025-10-14,58.7,59.58,59.82,57.68,"259,20K","-1,33%"
3,2025-10-13,59.49,59.0,60.17,59.0,"0,64K","-0,23%"
4,2025-10-12,59.63,59.43,59.82,59.23,"25,36K","1,24%"
5,2025-10-10,58.9,61.49,61.67,58.22,"339,10K","-4,24%"
6,2025-10-09,61.51,62.31,62.87,61.25,"259,17K","-1,66%"
7,2025-10-08,62.55,62.05,62.92,62.05,"273,32K","1,33%"
8,2025-10-07,61.73,61.73,62.11,60.72,"245,12K","0,06%"
9,2025-10-06,61.69,61.14,62.12,61.04,"224,68K","1,33%"


In [5]:
print("NaN en la columna WTI-Volume:")
print(OHLC_WTI["WTI-Volume"].isna().sum())
print(OHLC_WTI[OHLC_WTI["WTI-Volume"].isna()])
print("Valores en la columna WTI-Volume sin una K")
filtro = OHLC_WTI["WTI-Volume"].notna() & ~OHLC_WTI["WTI-Volume"].str.endswith("K", na=False)
print(filtro.sum())
print(OHLC_WTI[filtro])

NaN en la columna WTI-Volume:
88
           Date  WTI-Close  WTI-Open  WTI-High  WTI-Low WTI-Volume WTI-% var
35   2025-08-31      63.96     63.98     64.01    63.92        NaN    -0,08%
106  2025-05-25      61.93     61.73     62.15    61.58        NaN     0,65%
290  2024-09-02      73.78     73.00     74.39    72.89        NaN     1,10%
291  2024-09-01      72.98     73.33     73.42    72.97        NaN     0,45%
333  2024-07-04      83.94     83.61     84.20    83.03        NaN     1,11%
...         ...        ...       ...       ...      ...        ...       ...
2049 2018-01-01      60.24     60.26     60.28    60.15        NaN    -0,33%
2054 2017-12-25      58.59     58.41     58.62    58.38        NaN     0,09%
2076 2017-11-23      58.38     57.97     58.58    57.76        NaN     0,62%
2134 2017-09-04      47.41     47.31     47.66    47.16        NaN     0,19%
2135 2017-09-03      47.32     47.31     47.42    47.30        NaN     0,06%

[88 rows x 7 columns]
Valores en la column

In [6]:
def parse_value(x):
    if pd.isna(x):  # mantenemos los NaN por ahora
        return np.nan
    x = str(x).strip()
    
    factor = 1
    if x.endswith("K"):
        factor = 1000
        x = x[:-1]
    elif x.endswith("M"):
        factor = 1000000
        x = x[:-1]
    
    # reemplazamos coma decimal por punto
    x = x.replace(",", ".")
    
    try:
        return float(x) * factor
    except ValueError:
        return np.nan  # en caso de algún valor raro

OHLC_WTI["WTI-Volume"] = OHLC_WTI["WTI-Volume"].apply(parse_value)
OHLC_WTI[OHLC_WTI["WTI-Volume"] >= 1000000]

Unnamed: 0,Date,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,WTI-% var
1441,2020-04-21,11.57,21.32,22.58,6.5,2290000.0,"-43,37%"
1442,2020-04-20,20.43,24.76,24.92,20.19,1320000.0,"-18,38%"
1462,2020-03-20,22.63,25.59,28.49,22.39,1130000.0,"-12,66%"
1463,2020-03-19,25.91,22.82,28.28,21.77,1190000.0,"24,39%"
1464,2020-03-18,20.83,27.3,27.6,20.52,1000000.0,"-23,78%"
2133,2017-09-05,48.66,47.28,48.98,47.15,1030000.0,"2,64%"


In [7]:
OHLC_WTI.head(25)

Unnamed: 0,Date,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,WTI-% var
0,2025-10-16,57.62,58.75,59.1,57.48,131420.0,"-1,81%"
1,2025-10-15,58.68,58.6,59.42,58.2,1140.0,"-0,03%"
2,2025-10-14,58.7,59.58,59.82,57.68,259200.0,"-1,33%"
3,2025-10-13,59.49,59.0,60.17,59.0,640.0,"-0,23%"
4,2025-10-12,59.63,59.43,59.82,59.23,25360.0,"1,24%"
5,2025-10-10,58.9,61.49,61.67,58.22,339100.0,"-4,24%"
6,2025-10-09,61.51,62.31,62.87,61.25,259170.0,"-1,66%"
7,2025-10-08,62.55,62.05,62.92,62.05,273320.0,"1,33%"
8,2025-10-07,61.73,61.73,62.11,60.72,245120.0,"0,06%"
9,2025-10-06,61.69,61.14,62.12,61.04,224680.0,"1,33%"


# OHLC del petróleo (Brent) desde la fecha más vieja en la que pudimos obtener datos del BTC
https://es.investing.com/commodities/brent-oil-historical-data

In [8]:
OHLC_BRENT = pd.read_csv("../../data/BRENT.csv")
OHLC_BRENT = format_data(OHLC_BRENT, "BRENT")

for feature in OHLC_BRENT.columns.values[1:-2]:
    OHLC_BRENT[feature] = OHLC_BRENT[feature].str.replace(",",".")
OHLC_BRENT[["BRENT-Close", "BRENT-Open", "BRENT-High", "BRENT-Low"]] = OHLC_BRENT[["BRENT-Close", "BRENT-Open", "BRENT-High", "BRENT-Low"]].astype(float)

hay_sin_k = OHLC_BRENT["BRENT-Volume"].str.endswith("K", na=False).all()
if hay_sin_k:
    print("✅ Todos los valores terminan en K")
else:
    print("⚠️ Hay valores que NO terminan en K")

OHLC_BRENT.head(25)

⚠️ Hay valores que NO terminan en K


Unnamed: 0,Date,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,BRENT-Volume,BRENT-% var
0,2025-10-16,61.19,62.44,62.74,61.07,"349,15K","-1,16%"
1,2025-10-15,61.91,62.33,63.04,61.79,"331,20K","-0,77%"
2,2025-10-14,62.39,63.49,63.63,61.5,"423,16K","-1,47%"
3,2025-10-13,63.32,62.9,63.95,62.9,"346,53K","0,94%"
4,2025-10-10,62.73,65.27,65.36,62.0,"547,99K","-3,82%"
5,2025-10-09,65.22,65.82,66.58,64.96,"345,80K","-1,55%"
6,2025-10-08,66.25,65.8,66.54,65.76,"325,86K","1,22%"
7,2025-10-07,65.45,65.48,65.84,64.53,"293,95K","-0,03%"
8,2025-10-06,65.47,64.9,65.77,64.76,"299,84K","1,46%"
9,2025-10-03,64.53,64.38,65.02,64.2,"311,95K","0,66%"


In [9]:
print("NaN en la columna BRENT-Volume:")
print(OHLC_BRENT["BRENT-Volume"].isna().sum())
print("Valores en la columna BRENT-Volume sin una K")
filtro = ~OHLC_BRENT["BRENT-Volume"].str.endswith("K", na=False)
print(filtro.sum())
print(OHLC_BRENT[filtro])

NaN en la columna BRENT-Volume:
0
Valores en la columna BRENT-Volume sin una K
1
         Date  BRENT-Close  BRENT-Open  BRENT-High  BRENT-Low BRENT-Volume  \
89 2025-06-13        74.23        70.5        78.5      70.41        1,24M   

   BRENT-% var  
89       7,02%  


In [10]:
OHLC_BRENT["BRENT-Volume"] = OHLC_BRENT["BRENT-Volume"].apply(parse_value)
OHLC_BRENT[OHLC_BRENT["BRENT-Volume"] >= 1000000]

Unnamed: 0,Date,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,BRENT-Volume,BRENT-% var
89,2025-06-13,74.23,70.5,78.5,70.41,1240000.0,"7,02%"


In [11]:
OHLC_BRENT.head(25)

Unnamed: 0,Date,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,BRENT-Volume,BRENT-% var
0,2025-10-16,61.19,62.44,62.74,61.07,349150.0,"-1,16%"
1,2025-10-15,61.91,62.33,63.04,61.79,331200.0,"-0,77%"
2,2025-10-14,62.39,63.49,63.63,61.5,423160.0,"-1,47%"
3,2025-10-13,63.32,62.9,63.95,62.9,346530.0,"0,94%"
4,2025-10-10,62.73,65.27,65.36,62.0,547990.0,"-3,82%"
5,2025-10-09,65.22,65.82,66.58,64.96,345800.0,"-1,55%"
6,2025-10-08,66.25,65.8,66.54,65.76,325860.0,"1,22%"
7,2025-10-07,65.45,65.48,65.84,64.53,293950.0,"-0,03%"
8,2025-10-06,65.47,64.9,65.77,64.76,299840.0,"1,46%"
9,2025-10-03,64.53,64.38,65.02,64.2,311950.0,"0,66%"


# OHLC del SPX (ticker del S&P 500) desde la fecha más vieja en la que pudimos obtener datos del BTC
https://es.investing.com/indices/us-spx-500-historical-data

In [12]:
OHLC_SPX = pd.read_csv("../../data/BRENT.csv")
OHLC_SPX = format_data(OHLC_SPX, "SPX")

OHLC_SPX = OHLC_SPX[['Date', 'SPX-Close', 'SPX-Open', 'SPX-High', 'SPX-Low', 'SPX-% var']]
for feature in OHLC_SPX.columns.values[1:-1]:
    OHLC_SPX[feature] = OHLC_SPX[feature].str.replace(".","")
    OHLC_SPX[feature] = OHLC_SPX[feature].str.replace(",",".")
OHLC_SPX

Unnamed: 0,Date,SPX-Close,SPX-Open,SPX-High,SPX-Low,SPX-% var
0,2025-10-16,61.19,62.44,62.74,61.07,"-1,16%"
1,2025-10-15,61.91,62.33,63.04,61.79,"-0,77%"
2,2025-10-14,62.39,63.49,63.63,61.50,"-1,47%"
3,2025-10-13,63.32,62.90,63.95,62.90,"0,94%"
4,2025-10-10,62.73,65.27,65.36,62.00,"-3,82%"
...,...,...,...,...,...,...
2105,2017-08-22,51.58,51.37,51.93,51.08,"0,59%"
2106,2017-08-21,51.28,52.63,52.67,50.96,"-2,16%"
2107,2017-08-18,52.41,50.61,52.64,50.54,"3,27%"
2108,2017-08-17,50.75,50.22,50.84,49.81,"1,30%"


In [13]:
OHLC_SPX[["SPX-Close", "SPX-Open", "SPX-High", "SPX-Low"]] = OHLC_SPX[["SPX-Close", "SPX-Open", "SPX-High", "SPX-Low"]].astype(float)
OHLC_SPX.dtypes


Date         datetime64[ns]
SPX-Close           float64
SPX-Open            float64
SPX-High            float64
SPX-Low             float64
SPX-% var            object
dtype: object

In [14]:
OHLC_SPX

Unnamed: 0,Date,SPX-Close,SPX-Open,SPX-High,SPX-Low,SPX-% var
0,2025-10-16,61.19,62.44,62.74,61.07,"-1,16%"
1,2025-10-15,61.91,62.33,63.04,61.79,"-0,77%"
2,2025-10-14,62.39,63.49,63.63,61.50,"-1,47%"
3,2025-10-13,63.32,62.90,63.95,62.90,"0,94%"
4,2025-10-10,62.73,65.27,65.36,62.00,"-3,82%"
...,...,...,...,...,...,...
2105,2017-08-22,51.58,51.37,51.93,51.08,"0,59%"
2106,2017-08-21,51.28,52.63,52.67,50.96,"-2,16%"
2107,2017-08-18,52.41,50.61,52.64,50.54,"3,27%"
2108,2017-08-17,50.75,50.22,50.84,49.81,"1,30%"


# OHLC del XAU/USD (precio del oro en USD) desde la fecha más vieja en la que pudimos obtener datos del BTC
https://es.investing.com/currencies/xau-usd-historical-data

In [15]:
OHLC_XAUUSD = pd.read_csv("../../data/XAU-USD.csv")
OHLC_XAUUSD = format_data(OHLC_XAUUSD, "XAUUSD")

OHLC_XAUUSD = OHLC_XAUUSD[['Date', 'XAUUSD-Close', 'XAUUSD-Open', 'XAUUSD-High', 'XAUUSD-Low', 'XAUUSD-% var']]
for feature in OHLC_XAUUSD.columns.values[1:-1]:
    OHLC_XAUUSD[feature] = OHLC_XAUUSD[feature].str.replace(".","")
    OHLC_XAUUSD[feature] = OHLC_XAUUSD[feature].str.replace(",",".")
OHLC_XAUUSD

Unnamed: 0,Date,XAUUSD-Close,XAUUSD-Open,XAUUSD-High,XAUUSD-Low,XAUUSD-% var
0,2025-10-16,4295.38,4209.43,4296.40,4203.68,"2,04%"
1,2025-10-15,4209.51,4144.40,4218.42,4140.45,"1,62%"
2,2025-10-14,4142.31,4108.98,4180.39,4090.49,"0,77%"
3,2025-10-13,4110.70,4018.79,4117.14,4008.80,"1,87%"
4,2025-10-12,4035.12,4014.43,4059.81,4008.75,"0,42%"
...,...,...,...,...,...,...
2120,2017-08-22,1284.72,1291.18,1292.69,1282.11,"-0,50%"
2121,2017-08-21,1291.22,1283.60,1293.58,1280.60,"0,52%"
2122,2017-08-18,1284.50,1288.20,1301.20,1283.64,"-0,27%"
2123,2017-08-17,1288.01,1283.61,1290.47,1282.20,"0,38%"


In [16]:
OHLC_XAUUSD[["XAUUSD-Close", "XAUUSD-Open", "XAUUSD-High", "XAUUSD-Low"]] = OHLC_XAUUSD[["XAUUSD-Close", "XAUUSD-Open", "XAUUSD-High", "XAUUSD-Low"]].astype(float)
OHLC_XAUUSD.dtypes

Date            datetime64[ns]
XAUUSD-Close           float64
XAUUSD-Open            float64
XAUUSD-High            float64
XAUUSD-Low             float64
XAUUSD-% var            object
dtype: object

In [17]:
OHLC_XAUUSD

Unnamed: 0,Date,XAUUSD-Close,XAUUSD-Open,XAUUSD-High,XAUUSD-Low,XAUUSD-% var
0,2025-10-16,4295.38,4209.43,4296.40,4203.68,"2,04%"
1,2025-10-15,4209.51,4144.40,4218.42,4140.45,"1,62%"
2,2025-10-14,4142.31,4108.98,4180.39,4090.49,"0,77%"
3,2025-10-13,4110.70,4018.79,4117.14,4008.80,"1,87%"
4,2025-10-12,4035.12,4014.43,4059.81,4008.75,"0,42%"
...,...,...,...,...,...,...
2120,2017-08-22,1284.72,1291.18,1292.69,1282.11,"-0,50%"
2121,2017-08-21,1291.22,1283.60,1293.58,1280.60,"0,52%"
2122,2017-08-18,1284.50,1288.20,1301.20,1283.64,"-0,27%"
2123,2017-08-17,1288.01,1283.61,1290.47,1282.20,"0,38%"


# Consumer Price Index for All Urban Consumers: All Items in U.S. City Average (CPIAUCSL) 
Buscamos el CPI para consumidores urbanos en lo que respecta a todos los artículos (de Estados Unidos), que es el índice de precios de una canasta de bienes y servicios pagados por consumidores urbanos, cuyos cambios porcentuales miden la tasa de inflación entre cualquier intervalo de tiempo.
Este índice tiene relación con el bitcoin porque si los precios de los bienes y servicios suben, el dinero pierde valor, entonces, si el IPC es muy alto, más personas pueden querer comprar BTC para asegurar su dinero y así su precio sube (esto no está garantizado).
Este índice incluye aproximadamente al 88% de la población estadounidense, y se basa en precios de comida, ropa, alojamiento, combustibles, tarifas de transporte, tarifas de servicios (agua, alcantarillado, impuestos de ventas).

https://fred.stlouisfed.org/series/CPIAUCSL

In [18]:
CPIAUCSL = pd.read_csv("../../data/CPIAUCSL to 8.2025.csv")
CPIAUCSL.dtypes

observation_date     object
CPIAUCSL            float64
dtype: object

In [19]:
CPIAUCSL["observation_date"] = pd.to_datetime(CPIAUCSL["observation_date"], format="%Y-%m-%d")
CPIAUCSL.dtypes

observation_date    datetime64[ns]
CPIAUCSL                   float64
dtype: object

In [20]:
CPIAUCSL = CPIAUCSL.rename(columns={'observation_date':'Date'})
CPIAUCSL

Unnamed: 0,Date,CPIAUCSL
0,2017-08-01,245.183
1,2017-09-01,246.435
2,2017-10-01,246.626
3,2017-11-01,247.284
4,2017-12-01,247.805
...,...,...
92,2025-04-01,320.321
93,2025-05-01,320.580
94,2025-06-01,321.500
95,2025-07-01,322.132


# CBOE Volatility Index (VIX)
El índice de volatilidad CBOE (VIX) mide la expectativa del mercado sobre la futura volatilidad del índice S&P 500. Cuando el VIX sube, por lo general indica que los inversores esperan una mayor turbulencia en el mercado, lo cual puede estar asociado a caídas significativas o incertidumbre sobre el mismo, mientras que si baja, se suele asociar a un período de estabilidad.
Decidimos incluirlo porque en principio el VIX y el valor del BTC se creía que eran totalmente independientes uno del otro, pero luego de un tiempo se han observado patrones en los que se parece encontrar una cierta correlación negativa entre ambos.

https://es.investing.com/indices/volatility-s-p-500

In [21]:
VIX = pd.read_csv("../../data/VIX.csv")
VIX.dtypes

Fecha        object
Último       object
Apertura     object
Máximo       object
Mínimo       object
Vol.        float64
% var.       object
dtype: object

In [22]:
VIX = format_data(VIX, "VIX")
VIX = VIX[["Date","VIX-Close","VIX-Open","VIX-High","VIX-Low","VIX-% var"]]
VIX

Unnamed: 0,Date,VIX-Close,VIX-Open,VIX-High,VIX-Low,VIX-% var
0,2025-10-16,2266,2049,2418,1985,"9,79%"
1,2025-10-15,2064,2002,2244,1911,"-0,82%"
2,2025-10-14,2081,2146,2294,1918,"9,35%"
3,2025-10-13,1903,1945,2077,1861,"-12,14%"
4,2025-10-10,2166,1636,2244,1623,"31,83%"
...,...,...,...,...,...,...
2087,2017-08-22,1135,1260,1294,1135,"-13,95%"
2088,2017-08-21,1319,1459,1474,1307,"-7,50%"
2089,2017-08-18,1426,1538,1604,1332,"-8,30%"
2090,2017-08-17,1555,1181,1577,1154,"32,45%"


In [23]:
for feature in VIX.columns.values[1:-1]:
    VIX[feature] = VIX[feature].str.replace(",",".")
VIX[["VIX-Close","VIX-Open","VIX-High","VIX-Low"]] = VIX[["VIX-Close","VIX-Open","VIX-High","VIX-Low"]].astype(float)
VIX.dtypes

Date         datetime64[ns]
VIX-Close           float64
VIX-Open            float64
VIX-High            float64
VIX-Low             float64
VIX-% var            object
dtype: object

In [24]:
VIX

Unnamed: 0,Date,VIX-Close,VIX-Open,VIX-High,VIX-Low,VIX-% var
0,2025-10-16,22.66,20.49,24.18,19.85,"9,79%"
1,2025-10-15,20.64,20.02,22.44,19.11,"-0,82%"
2,2025-10-14,20.81,21.46,22.94,19.18,"9,35%"
3,2025-10-13,19.03,19.45,20.77,18.61,"-12,14%"
4,2025-10-10,21.66,16.36,22.44,16.23,"31,83%"
...,...,...,...,...,...,...
2087,2017-08-22,11.35,12.60,12.94,11.35,"-13,95%"
2088,2017-08-21,13.19,14.59,14.74,13.07,"-7,50%"
2089,2017-08-18,14.26,15.38,16.04,13.32,"-8,30%"
2090,2017-08-17,15.55,11.81,15.77,11.54,"32,45%"


In [25]:
dataset=pd.merge(OHLC_WTI, OHLC_BRENT, on='Date', how='inner').merge(OHLC_SPX, on='Date', how='inner').merge(OHLC_XAUUSD, on='Date', how='inner').merge(VIX, on='Date', how='inner')

In [26]:
dataset.dtypes

Date            datetime64[ns]
WTI-Close              float64
WTI-Open               float64
WTI-High               float64
WTI-Low                float64
WTI-Volume             float64
WTI-% var               object
BRENT-Close            float64
BRENT-Open             float64
BRENT-High             float64
BRENT-Low              float64
BRENT-Volume           float64
BRENT-% var             object
SPX-Close              float64
SPX-Open               float64
SPX-High               float64
SPX-Low                float64
SPX-% var               object
XAUUSD-Close           float64
XAUUSD-Open            float64
XAUUSD-High            float64
XAUUSD-Low             float64
XAUUSD-% var            object
VIX-Close              float64
VIX-Open               float64
VIX-High               float64
VIX-Low                float64
VIX-% var               object
dtype: object

In [27]:
dataset
dataset = dataset.drop(columns=[col for col in dataset.columns if "% var" in col])
dataset = dataset.dropna()
dataset

Unnamed: 0,Date,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,...,SPX-High,SPX-Low,XAUUSD-Close,XAUUSD-Open,XAUUSD-High,XAUUSD-Low,VIX-Close,VIX-Open,VIX-High,VIX-Low
0,2025-10-16,57.62,58.75,59.10,57.48,131420.0,61.19,62.44,62.74,61.07,...,62.74,61.07,4295.38,4209.43,4296.40,4203.68,22.66,20.49,24.18,19.85
1,2025-10-15,58.68,58.60,59.42,58.20,1140.0,61.91,62.33,63.04,61.79,...,63.04,61.79,4209.51,4144.40,4218.42,4140.45,20.64,20.02,22.44,19.11
2,2025-10-14,58.70,59.58,59.82,57.68,259200.0,62.39,63.49,63.63,61.50,...,63.63,61.50,4142.31,4108.98,4180.39,4090.49,20.81,21.46,22.94,19.18
3,2025-10-13,59.49,59.00,60.17,59.00,640.0,63.32,62.90,63.95,62.90,...,63.95,62.90,4110.70,4018.79,4117.14,4008.80,19.03,19.45,20.77,18.61
4,2025-10-10,58.90,61.49,61.67,58.22,339100.0,62.73,65.27,65.36,62.00,...,65.36,62.00,4018.30,3977.58,4022.96,3946.80,21.66,16.36,22.44,16.23
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2075,2017-08-22,47.64,47.45,48.03,47.20,38400.0,51.58,51.37,51.93,51.08,...,51.93,51.08,1284.72,1291.18,1292.69,1282.11,11.35,12.60,12.94,11.35
2076,2017-08-21,47.37,48.72,48.75,47.03,129190.0,51.28,52.63,52.67,50.96,...,52.67,50.96,1291.22,1283.60,1293.58,1280.60,13.19,14.59,14.74,13.07
2077,2017-08-18,48.51,46.93,48.74,46.78,253070.0,52.41,50.61,52.64,50.54,...,52.64,50.54,1284.50,1288.20,1301.20,1283.64,14.26,15.38,16.04,13.32
2078,2017-08-17,47.09,46.80,47.19,46.46,608310.0,50.75,50.22,50.84,49.81,...,50.84,49.81,1288.01,1283.61,1290.47,1282.20,15.55,11.81,15.77,11.54


In [28]:
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(2, include_bias=False)
numeric_dataset = dataset.drop(columns=["Date"]).copy()
poly_array = poly.fit_transform(numeric_dataset)
poly_features = poly.get_feature_names_out(numeric_dataset.columns)

poly_df = pd.DataFrame(poly_array, columns=poly_features, index=numeric_dataset.index)

poly_df['Date'] = dataset['Date'].values

cols = ['Date'] + [c for c in poly_df.columns if c != 'Date']
poly_df = poly_df[cols]

poly_df

Unnamed: 0,Date,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,...,VIX-Close^2,VIX-Close VIX-Open,VIX-Close VIX-High,VIX-Close VIX-Low,VIX-Open^2,VIX-Open VIX-High,VIX-Open VIX-Low,VIX-High^2,VIX-High VIX-Low,VIX-Low^2
0,2025-10-16,57.62,58.75,59.10,57.48,131420.0,61.19,62.44,62.74,61.07,...,513.4756,464.3034,547.9188,449.8010,419.8401,495.4482,406.7265,584.6724,479.9730,394.0225
1,2025-10-15,58.68,58.60,59.42,58.20,1140.0,61.91,62.33,63.04,61.79,...,426.0096,413.2128,463.1616,394.4304,400.8004,449.2488,382.5822,503.5536,428.8284,365.1921
2,2025-10-14,58.70,59.58,59.82,57.68,259200.0,62.39,63.49,63.63,61.50,...,433.0561,446.5826,477.3814,399.1358,460.5316,492.2924,411.6028,526.2436,439.9892,367.8724
3,2025-10-13,59.49,59.00,60.17,59.00,640.0,63.32,62.90,63.95,62.90,...,362.1409,370.1335,395.2531,354.1483,378.3025,403.9765,361.9645,431.3929,386.5297,346.3321
4,2025-10-10,58.90,61.49,61.67,58.22,339100.0,62.73,65.27,65.36,62.00,...,469.1556,354.3576,486.0504,351.5418,267.6496,367.1184,265.5228,503.5536,364.2012,263.4129
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2075,2017-08-22,47.64,47.45,48.03,47.20,38400.0,51.58,51.37,51.93,51.08,...,128.8225,143.0100,146.8690,128.8225,158.7600,163.0440,143.0100,167.4436,146.8690,128.8225
2076,2017-08-21,47.37,48.72,48.75,47.03,129190.0,51.28,52.63,52.67,50.96,...,173.9761,192.4421,194.4206,172.3933,212.8681,215.0566,190.6913,217.2676,192.6518,170.8249
2077,2017-08-18,48.51,46.93,48.74,46.78,253070.0,52.41,50.61,52.64,50.54,...,203.3476,219.3188,228.7304,189.9432,236.5444,246.6952,204.8616,257.2816,213.6528,177.4224
2078,2017-08-17,47.09,46.80,47.19,46.46,608310.0,50.75,50.22,50.84,49.81,...,241.8025,183.6455,245.2235,179.4470,139.4761,186.2437,136.2874,248.6929,181.9858,133.1716


In [29]:
poly_df = pd.merge(poly_df, OHLC_BTC, on="Date", how="inner")
poly_df

Unnamed: 0,Date,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,...,VIX-Open VIX-High,VIX-Open VIX-Low,VIX-High^2,VIX-High VIX-Low,VIX-Low^2,BTC-Open,BTC-High,BTC-Low,BTC-Close,BTC-Volume
0,2025-10-15,58.68,58.60,59.42,58.20,1140.0,61.91,62.33,63.04,61.79,...,449.2488,382.5822,503.5536,428.8284,365.1921,110763.28,111982.45,107577.57,108790.12,24183.338040
1,2025-10-14,58.70,59.58,59.82,57.68,259200.0,62.39,63.49,63.63,61.50,...,492.2924,411.6028,526.2436,439.9892,367.8724,113028.13,113612.35,110164.00,110763.28,22986.488110
2,2025-10-13,59.49,59.00,60.17,59.00,640.0,63.32,62.90,63.95,62.90,...,403.9765,361.9645,431.3929,386.5297,346.3321,115166.00,115409.96,109866.00,113028.14,31870.329740
3,2025-10-10,58.90,61.49,61.67,58.22,339100.0,62.73,65.27,65.36,62.00,...,367.1184,265.5228,503.5536,364.2012,263.4129,112774.49,113322.39,109561.59,110644.40,35448.516520
4,2025-10-09,61.51,62.31,62.87,61.25,259170.0,65.22,65.82,66.58,64.96,...,281.6737,265.2006,298.2529,280.8102,264.3876,121662.41,122550.00,102000.00,112774.50,64171.939270
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2053,2017-08-22,47.64,47.45,48.03,47.20,38400.0,51.58,51.37,51.93,51.08,...,163.0440,143.0100,167.4436,146.8690,128.8225,4040.00,4265.80,4013.89,4114.01,1001.136565
2054,2017-08-21,47.37,48.72,48.75,47.03,129190.0,51.28,52.63,52.67,50.96,...,215.0566,190.6913,217.2676,192.6518,170.8249,4016.00,4104.82,3400.00,4040.00,966.684858
2055,2017-08-18,48.51,46.93,48.74,46.78,253070.0,52.41,50.61,52.64,50.54,...,246.6952,204.8616,257.2816,213.6528,177.4224,4108.37,4184.69,3850.00,4139.98,381.309763
2056,2017-08-17,47.09,46.80,47.19,46.46,608310.0,50.75,50.22,50.84,49.81,...,186.2437,136.2874,248.6929,181.9858,133.1716,4285.08,4371.52,3938.77,4108.37,1199.888264


In [30]:
# DEFINIMOS TARGET Y FEATURES

poly_df["Day_Of_Week"] = poly_df["Date"].dt.dayofweek     # 0=Lunes, 6=Domingo
poly_df["Month"] = poly_df["Date"].dt.month       
poly_df["Year"] = poly_df["Date"].dt.year
poly_df = poly_df.drop('Date', axis=1)
poly_df["BTC_Close_MA7"] = poly_df["BTC-Close"].rolling(window=7, min_periods=1).mean()
poly_df["BTC_Close_MA30"] = poly_df["BTC-Close"].rolling(window=30, min_periods=1).mean()
poly_df["BTC_Close_diff1"] = poly_df["BTC-Close"].diff(-1).fillna(0)

poly_df.head()

Unnamed: 0,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,BRENT-Volume,...,BTC-High,BTC-Low,BTC-Close,BTC-Volume,Day_Of_Week,Month,Year,BTC_Close_MA7,BTC_Close_MA30,BTC_Close_diff1
0,58.68,58.6,59.42,58.2,1140.0,61.91,62.33,63.04,61.79,331200.0,...,111982.45,107577.57,108790.12,24183.33804,2,10,2025,108790.12,108790.12,-1973.16
1,58.7,59.58,59.82,57.68,259200.0,62.39,63.49,63.63,61.5,423160.0,...,113612.35,110164.0,110763.28,22986.48811,1,10,2025,109776.7,109776.7,-2264.86
2,59.49,59.0,60.17,59.0,640.0,63.32,62.9,63.95,62.9,346530.0,...,115409.96,109866.0,113028.14,31870.32974,0,10,2025,110860.513333,110860.513333,2383.74
3,58.9,61.49,61.67,58.22,339100.0,62.73,65.27,65.36,62.0,547990.0,...,113322.39,109561.59,110644.4,35448.51652,4,10,2025,110806.485,110806.485,-2130.1
4,61.51,62.31,62.87,61.25,259170.0,65.22,65.82,66.58,64.96,345800.0,...,122550.0,102000.0,112774.5,64171.93927,3,10,2025,111200.088,111200.088,-8887.9


In [31]:
X = poly_df.copy()
X

Unnamed: 0,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,BRENT-Volume,...,BTC-High,BTC-Low,BTC-Close,BTC-Volume,Day_Of_Week,Month,Year,BTC_Close_MA7,BTC_Close_MA30,BTC_Close_diff1
0,58.68,58.60,59.42,58.20,1140.0,61.91,62.33,63.04,61.79,331200.0,...,111982.45,107577.57,108790.12,24183.338040,2,10,2025,108790.120000,108790.120000,-1973.16
1,58.70,59.58,59.82,57.68,259200.0,62.39,63.49,63.63,61.50,423160.0,...,113612.35,110164.00,110763.28,22986.488110,1,10,2025,109776.700000,109776.700000,-2264.86
2,59.49,59.00,60.17,59.00,640.0,63.32,62.90,63.95,62.90,346530.0,...,115409.96,109866.00,113028.14,31870.329740,0,10,2025,110860.513333,110860.513333,2383.74
3,58.90,61.49,61.67,58.22,339100.0,62.73,65.27,65.36,62.00,547990.0,...,113322.39,109561.59,110644.40,35448.516520,4,10,2025,110806.485000,110806.485000,-2130.10
4,61.51,62.31,62.87,61.25,259170.0,65.22,65.82,66.58,64.96,345800.0,...,122550.00,102000.00,112774.50,64171.939270,3,10,2025,111200.088000,111200.088000,-8887.90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2053,47.64,47.45,48.03,47.20,38400.0,51.58,51.37,51.93,51.08,221020.0,...,4265.80,4013.89,4114.01,1001.136565,1,8,2017,4416.521429,4163.520333,74.01
2054,47.37,48.72,48.75,47.03,129190.0,51.28,52.63,52.67,50.96,172770.0,...,4104.82,3400.00,4040.00,966.684858,0,8,2017,4318.680000,4157.900667,-99.98
2055,48.51,46.93,48.74,46.78,253070.0,52.41,50.61,52.64,50.54,186530.0,...,4184.69,3850.00,4139.98,381.309763,4,8,2017,4259.371429,4152.233333,31.61
2056,47.09,46.80,47.19,46.46,608310.0,50.75,50.22,50.84,49.81,231590.0,...,4371.52,3938.77,4108.37,1199.888264,3,8,2017,4190.927143,4143.228667,-176.71


In [32]:
X = X.iloc[1:]
X

Unnamed: 0,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,BRENT-Close,BRENT-Open,BRENT-High,BRENT-Low,BRENT-Volume,...,BTC-High,BTC-Low,BTC-Close,BTC-Volume,Day_Of_Week,Month,Year,BTC_Close_MA7,BTC_Close_MA30,BTC_Close_diff1
1,58.70,59.58,59.82,57.68,259200.0,62.39,63.49,63.63,61.50,423160.0,...,113612.35,110164.00,110763.28,22986.488110,1,10,2025,109776.700000,109776.700000,-2264.86
2,59.49,59.00,60.17,59.00,640.0,63.32,62.90,63.95,62.90,346530.0,...,115409.96,109866.00,113028.14,31870.329740,0,10,2025,110860.513333,110860.513333,2383.74
3,58.90,61.49,61.67,58.22,339100.0,62.73,65.27,65.36,62.00,547990.0,...,113322.39,109561.59,110644.40,35448.516520,4,10,2025,110806.485000,110806.485000,-2130.10
4,61.51,62.31,62.87,61.25,259170.0,65.22,65.82,66.58,64.96,345800.0,...,122550.00,102000.00,112774.50,64171.939270,3,10,2025,111200.088000,111200.088000,-8887.90
5,62.55,62.05,62.92,62.05,273320.0,66.25,65.80,66.54,65.76,325860.0,...,123762.94,119651.47,121662.40,21559.360070,2,10,2025,112943.806667,112943.806667,-1643.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2053,47.64,47.45,48.03,47.20,38400.0,51.58,51.37,51.93,51.08,221020.0,...,4265.80,4013.89,4114.01,1001.136565,1,8,2017,4416.521429,4163.520333,74.01
2054,47.37,48.72,48.75,47.03,129190.0,51.28,52.63,52.67,50.96,172770.0,...,4104.82,3400.00,4040.00,966.684858,0,8,2017,4318.680000,4157.900667,-99.98
2055,48.51,46.93,48.74,46.78,253070.0,52.41,50.61,52.64,50.54,186530.0,...,4184.69,3850.00,4139.98,381.309763,4,8,2017,4259.371429,4152.233333,31.61
2056,47.09,46.80,47.19,46.46,608310.0,50.75,50.22,50.84,49.81,231590.0,...,4371.52,3938.77,4108.37,1199.888264,3,8,2017,4190.927143,4143.228667,-176.71


In [33]:
from pathlib import Path
data_dir = Path.cwd().parent.parent / "data"
X.to_csv(data_dir / "dataset_final.csv", index=False)
print("✅ Dataset procesado guardado en:", data_dir / "dataset_final.csv")


✅ Dataset procesado guardado en: D:\IA\ML_Bitcoin\data\dataset_final.csv
