In [None]:
import requests
import json
import pandas as pd
import numpy as np
from datetime import datetime
import time
import numpy as np
import sklearn

In [None]:
def format_data(df, ticker):
    renamed_columns = {
    "Fecha": 'Date', 
    'Último': f'{ticker}-Close', 
    "Apertura": f'{ticker}-Open', 
    "Máximo": f"{ticker}-High", 
    "Mínimo": f"{ticker}-Low", 
    "Vol.": f"{ticker}-Volume", 
    "% var.": f"{ticker}-% var"
    }
    df = df.rename(columns=renamed_columns)
    df["Date"] = pd.to_datetime(df["Date"], format="%d.%m.%Y")
    return df


In [None]:
startTime = 1502928000000
limit = time.time() * 1000

OHLC_dataframes = []

while (startTime < limit):
    url = 'https://api.binance.com/api/v3/klines?symbol=BTCUSDT&interval=1d&limit=1000&startTime={startTime}'.format(startTime=startTime)
    res = requests.get(url)
    data = json.loads(res.text)
    df = pd.DataFrame(data, columns=[
        "Open time", "BTC-Open", "BTC-High", "BTC-Low", "BTC-Close", "BTC-Volume",
        "Close time", "Quote asset volume", "Number of trades",
        "Taker buy base asset volume", "Taker buy quote asset volume", "Ignore"
    ])
    df["Date"] = pd.to_datetime(df["Open time"], unit='ms') - pd.Timedelta(days=1)
    startTime = df['Close time'].iloc[-1]
    df = df[["Date", "BTC-Open", "BTC-High", "BTC-Low", "BTC-Close", "BTC-Volume"]]
    df[["BTC-Open", "BTC-High", "BTC-Low", "BTC-Close", "BTC-Volume"]] = df[["BTC-Open", "BTC-High", "BTC-Low", "BTC-Close", "BTC-Volume"]].astype(float)
    OHLC_dataframes.append(df)
    
OHLC_BTC = pd.concat(df for df in OHLC_dataframes)
OHLC_BTC

# OHLC del petróleo (WTI) desde la fecha más vieja en la que pudimos obtener datos del BTC
https://es.investing.com/commodities/crude-oil-historical-data

In [None]:
OHLC_WTI = pd.read_csv("CSVs/WTI.csv")
OHLC_WTI = format_data(OHLC_WTI, "WTI")

for feature in OHLC_WTI.columns.values[1:-2]:
    OHLC_WTI[feature] = OHLC_WTI[feature].str.replace(",",".")
OHLC_WTI[["WTI-Close", "WTI-Open", "WTI-High", "WTI-Low"]] = OHLC_WTI[["WTI-Close", "WTI-Open", "WTI-High", "WTI-Low"]].astype(float)

hay_sin_k = OHLC_WTI["WTI-Volume"].str.endswith("K", na=False).all()
if hay_sin_k:
    print("✅ Todos los valores terminan en K")
else:
    print("⚠️ Hay valores que NO terminan en K")

OHLC_WTI.head(25)

⚠️ Hay valores que NO terminan en K


Unnamed: 0,Date,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,WTI-% var
0,2025-10-05,61.43,61.46,61.79,61.36,"6,92K","0,90%"
1,2025-10-03,60.88,60.7,61.38,60.55,"209,16K","0,66%"
2,2025-10-02,60.48,61.78,62.54,60.4,"290,51K","-2,10%"
3,2025-10-01,61.78,62.46,62.89,61.4,"274,34K","-0,95%"
4,2025-09-30,62.37,63.14,63.26,62.03,"271,65K","-1,70%"
5,2025-09-29,63.45,65.07,65.4,62.98,"294,29K","-2,59%"
6,2025-09-28,65.14,65.0,65.18,64.88,"8,23K","-0,88%"
7,2025-09-26,65.72,65.2,66.42,64.66,"284,99K","1,14%"
8,2025-09-25,64.98,64.8,65.34,64.06,"258,35K","-0,02%"
9,2025-09-24,64.99,63.64,65.05,63.25,"282,72K","2,49%"


In [None]:
print("NaN en la columna WTI-Volume:")
print(OHLC_WTI["WTI-Volume"].isna().sum())
print(OHLC_WTI[OHLC_WTI["WTI-Volume"].isna()])
print("Valores en la columna WTI-Volume sin una K")
filtro = OHLC_WTI["WTI-Volume"].notna() & ~OHLC_WTI["WTI-Volume"].str.endswith("K", na=False)
print(filtro.sum())
print(OHLC_WTI[filtro])

NaN en la columna WTI-Volume:
88
           Date  WTI-Close  WTI-Open  WTI-High  WTI-Low WTI-Volume WTI-% var
27   2025-08-31      63.96     63.98     64.01    63.92        NaN    -0,08%
98   2025-05-25      61.93     61.73     62.15    61.58        NaN     0,65%
282  2024-09-02      73.78     73.00     74.39    72.89        NaN     1,10%
283  2024-09-01      72.98     73.33     73.42    72.97        NaN     0,45%
325  2024-07-04      83.94     83.61     84.20    83.03        NaN     1,11%
...         ...        ...       ...       ...      ...        ...       ...
2041 2018-01-01      60.24     60.26     60.28    60.15        NaN    -0,33%
2046 2017-12-25      58.59     58.41     58.62    58.38        NaN     0,09%
2068 2017-11-23      58.38     57.97     58.58    57.76        NaN     0,62%
2126 2017-09-04      47.41     47.31     47.66    47.16        NaN     0,19%
2127 2017-09-03      47.32     47.31     47.42    47.30        NaN     0,06%

[88 rows x 7 columns]
Valores en la column

In [None]:
def parse_value(x):
    if pd.isna(x):  # mantenemos los NaN por ahora
        return np.nan
    x = str(x).strip()
    
    factor = 1
    if x.endswith("K"):
        factor = 1000
        x = x[:-1]
    elif x.endswith("M"):
        factor = 1000000
        x = x[:-1]
    
    # reemplazamos coma decimal por punto
    x = x.replace(",", ".")
    
    try:
        return float(x) * factor
    except ValueError:
        return np.nan  # en caso de algún valor raro

OHLC_WTI["WTI-Volume"] = OHLC_WTI["WTI-Volume"].apply(parse_value)
OHLC_WTI[OHLC_WTI["WTI-Volume"] >= 1000000]

Unnamed: 0,Date,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,WTI-% var
1433,2020-04-21,11.57,21.32,22.58,6.5,2290000.0,"-43,37%"
1434,2020-04-20,20.43,24.76,24.92,20.19,1320000.0,"-18,38%"
1454,2020-03-20,22.63,25.59,28.49,22.39,1130000.0,"-12,66%"
1455,2020-03-19,25.91,22.82,28.28,21.77,1190000.0,"24,39%"
1456,2020-03-18,20.83,27.3,27.6,20.52,1000000.0,"-23,78%"
2125,2017-09-05,48.66,47.28,48.98,47.15,1030000.0,"2,64%"


In [None]:
OHLC_WTI.head(25)

Unnamed: 0,Date,WTI-Close,WTI-Open,WTI-High,WTI-Low,WTI-Volume,WTI-% var
0,2025-10-05,61.43,61.46,61.79,61.36,6920.0,"0,90%"
1,2025-10-03,60.88,60.7,61.38,60.55,209160.0,"0,66%"
2,2025-10-02,60.48,61.78,62.54,60.4,290510.0,"-2,10%"
3,2025-10-01,61.78,62.46,62.89,61.4,274340.0,"-0,95%"
4,2025-09-30,62.37,63.14,63.26,62.03,271650.0,"-1,70%"
5,2025-09-29,63.45,65.07,65.4,62.98,294290.0,"-2,59%"
6,2025-09-28,65.14,65.0,65.18,64.88,8230.0,"-0,88%"
7,2025-09-26,65.72,65.2,66.42,64.66,284990.0,"1,14%"
8,2025-09-25,64.98,64.8,65.34,64.06,258350.0,"-0,02%"
9,2025-09-24,64.99,63.64,65.05,63.25,282720.0,"2,49%"
