In [18]:
import pandas as pd
from datetime import datetime, timedelta
import os
import asyncio
from binance.client import AsyncClient, HistoricalKlinesType


In [2]:
client = await AsyncClient.create()

<h1>DOWNLOADER F()</h1>

In [3]:
async def get_batch_historical_candles(symbols, days, interval = "1m", klines_type=HistoricalKlinesType.SPOT, limit_per_second = 2):
  new_dir = "Binance_Historical_%s_%s_%i_days_%s" % (interval, klines_type.name, days, str(datetime.utcnow().replace(microsecond=0).isoformat()))
  os.mkdir(new_dir)
  now = datetime.utcnow()
  start = str(now - timedelta(days = days))
  
  chunks = []
  for idx, _ in enumerate(symbols):
    if idx % limit_per_second == 0:
      symbols_chunk = symbols[idx:idx+limit_per_second]
      futures_chunk = []
      
      for symbol in symbols_chunk:
        futures_chunk.append(client.get_historical_klines(symbol = symbol, interval = interval,
                                      start_str = start, end_str = None, limit = 1000, klines_type=klines_type))
        
      chunks.append(futures_chunk)
      
  for index, chunk in enumerate(chunks):
    print("Fetching data for ↓ %s %s" % (klines_type.name, interval), "%i / %i" % (index, len(chunks)))
    results = await asyncio.gather(*chunk)
    
    for i, bars in enumerate(results):
      print(symbols[index*limit_per_second+i])
      try:
        df = bars_to_df(bars)
        df.to_csv("%s/%s_%s_%s.csv" % (new_dir, symbols[index*limit_per_second+i], klines_type.name, interval))
      except:
        print("raw: ", bars)
        print("Couldn't construct DataFrame from raw data for %s" % symbols[index*limit_per_second+i])
    
def bars_to_df(bars):
  df = pd.DataFrame(bars)
  df["Date"] = pd.to_datetime(df.iloc[:,0], unit = "ms")
  df.columns = ["Open Time", "Open", "High", "Low", "Close", "Volume",
                "Close Time", "Quote Asset Volume", "Number of Trades",
                "Taker Buy Base Asset Volume", "Taker Buy Quote Asset Volume", "Ignore", "Date"]
  df = df[["Date", "Open", "High", "Low", "Close", "Volume"]].copy()
  df.set_index("Date", inplace = True)
  for column in df.columns:
      df[column] = pd.to_numeric(df[column], errors = "coerce")
      
  return df
      

In [4]:
futures_info = await client.futures_exchange_info()
spot_info = await client.get_exchange_info()

In [5]:
symbols_futures = list(map(lambda x: x['symbol'], futures_info['symbols']))
symbols_spot = list(map(lambda x: x['symbol'], spot_info['symbols']))


In [None]:
await get_batch_historical_candles(symbols=symbols_futures, days=9, interval="1m", limit_per_second=6, klines_type=HistoricalKlinesType.FUTURES)
await get_batch_historical_candles(symbols=symbols_spot, days=9, interval="1m", limit_per_second=6, klines_type=HistoricalKlinesType.SPOT)


- downloaded data is placed in new directory in the root, so it can be easily cleaned
- target data sets that we want to preserve should be manually moved to `./raw_data` dir

<h1>Raw Data Processing </h1>

<h3>Merging time series</h3>
<table>
<tr>
<th>[BTCUSDT]</th>
<th>[ETHUSDT]</th>
</tr>
<tr>
<td>[Close]</td>
<td>[Close]</td>
</tr>
<table>

In [2]:
import os

In [5]:
directories = ["Binance_Historical_1m_FUTURES_9_days_2022-06-06T13:56:51", "Binance_Historical_1m_SPOT_9_days_2022-06-06T14:02:09"]

In [36]:
df_closings = pd.DataFrame()
with os.scandir('raw_data/%s' % directories[0]) as entries:
    for idx, entry in enumerate(entries):
        instrument = "_".join(entry.name.split("_")[0:2])
        df = pd.read_csv('raw_data/%s/%s' % (directories[0], entry.name), index_col="Date")
        df = df[["Close"]].copy()
        df.columns = [instrument]
        df_closings = pd.concat([df_closings, df], axis=1)
        
df_closings.index = pd.to_datetime(df_closings.index)       

In [38]:
df_closings.loc["2022-05-29":"2022-06-02"]

Unnamed: 0_level_0,DEFIUSDT_FUTURES,JASMYUSDT_FUTURES,SRMUSDT_FUTURES,BATUSDT_FUTURES,BTCSTUSDT_FUTURES,VETUSDT_FUTURES,ATAUSDT_FUTURES,RLCUSDT_FUTURES,AVAXUSDT_FUTURES,ADAUSDT_FUTURES,...,KAVAUSDT_FUTURES,MTLUSDT_FUTURES,GALBUSD_FUTURES,WOOUSDT_FUTURES,DYDXUSDT_FUTURES,ETHUSDT_FUTURES,DOGEUSDT_FUTURES,ICPUSDT_FUTURES,1INCHUSDT_FUTURES,BELUSDT_FUTURES
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-05-29 00:00:00,746.0,0.01172,1.026,0.3678,319.408,0.02961,0.1481,0.7984,24.66,0.4634,...,2.3700,1.2917,4.623,0.16118,1.715,1790.87,0.08175,7.22,0.9153,0.3745
2022-05-29 00:01:00,744.9,0.01167,1.024,0.3676,319.408,0.02961,0.1476,0.7967,24.58,0.4625,...,2.3669,1.2826,4.610,0.16080,1.713,1789.78,0.08169,7.18,0.9150,0.3737
2022-05-29 00:02:00,747.5,0.01171,1.027,0.3691,319.408,0.02973,0.1482,0.8005,24.76,0.4640,...,2.3747,1.2912,4.628,0.16177,1.721,1792.29,0.08180,7.22,0.9165,0.4025
2022-05-29 00:03:00,746.8,0.01169,1.025,0.3685,319.408,0.02968,0.1480,0.7987,24.73,0.4633,...,2.3729,1.2883,4.609,0.16116,1.717,1790.76,0.08175,7.22,0.9156,0.4187
2022-05-29 00:04:00,747.4,0.01167,1.025,0.3688,319.408,0.02962,0.1479,0.7992,24.71,0.4628,...,2.3685,1.2835,4.609,0.16105,1.717,1790.00,0.08169,7.22,0.9153,0.4364
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-06-02 23:55:00,838.9,0.01195,1.084,0.3999,319.408,0.03154,0.1623,0.9233,24.66,0.5868,...,2.6288,1.4065,4.600,0.16356,1.860,1833.50,0.08273,9.55,0.9313,0.7227
2022-06-02 23:56:00,838.9,0.01198,1.084,0.3998,319.408,0.03151,0.1622,0.9233,24.66,0.5875,...,2.6267,1.4055,4.600,0.16353,1.861,1833.84,0.08275,9.54,0.9315,0.7221
2022-06-02 23:57:00,838.8,0.01196,1.085,0.3996,319.408,0.03148,0.1619,0.9212,24.63,0.5880,...,2.6249,1.4044,4.600,0.16352,1.859,1833.42,0.08272,9.53,0.9311,0.7207
2022-06-02 23:58:00,838.1,0.01195,1.085,0.3998,319.408,0.03145,0.1619,0.9216,24.59,0.5873,...,2.6230,1.4036,4.594,0.16333,1.858,1832.50,0.08271,9.44,0.9303,0.7220
