### Spot Market Data Loading

In [1]:
import ccxt

f = open("./APIKEY.txt", 'r')
lines = f.readlines()
f.close()

api_key = lines[0].strip()
secret  = lines[1].strip()

binance = ccxt.binance(config={
    'apiKey': api_key, 
    'secret': secret,
    'enableRateLimit': True,
    'options': {
        'defaultType': 'spot'
    }
})

In [2]:
import time
import pandas as pd

from stockstats import StockDataFrame as Sdf

def get_historical_data(coin_pair, timeframe):
    """Get Historical data (ohlcv) from a coin_pair
    """
    # optional: exchange.fetch_ohlcv(coin_pair, '1h', since)
    since = binance.parse8601('2021-01-01T00:00:00Z')
    data = binance.fetch_ohlcv(since=since, symbol=coin_pair, timeframe=timeframe, limit=30000)
    # update timestamp to human readable timestamp
    data = [[binance.iso8601(candle[0])] + candle[1:] for candle in data]
    header = ['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume']
    df = pd.DataFrame(data, columns=header)
    return df


def create_stock(historical_data):
    """Create StockData from historical data 
    """
    stock  = Sdf.retype(historical_data)
    return stock

In [45]:
import calendar
from datetime import datetime, date, timedelta
import numpy as np

def min_ohlcv(dt, pair, limit):
    # UTC native object
    since = calendar.timegm(dt.utctimetuple())*1000
    ohlcv1 = binance.fetch_ohlcv(symbol=pair, timeframe='1m', since=since, limit=limit)
    ohlcv2 = binance.fetch_ohlcv(symbol=pair, timeframe='1m', since=since, limit=limit)
    ohlcv = ohlcv1 + ohlcv2
    return ohlcv

def ohlcv(dt, pair, period='1d'):
    startDate, endDate = dt[0], dt[1]
    print(startDate, endDate)
    ohlcv = []
    limit = 1000
    if period == '1m':
        td = 1
        limit = 1440
    elif period == '1d':
        td = 1
        limit = 365 
    elif period == '1h':
        td = 60
        limit = 24 
    elif period == '5m':
        td = 5
        limit = 288 
        
    start_dt = datetime.strptime(startDate, "%Y%m%d")
    end_dt = datetime.strptime(endDate, "%Y%m%d")
    while (end_dt - start_dt).days > 0:
        diffDays = (end_dt - start_dt).days
        if diffDays < 61:
            td = diffDays
        since = calendar.timegm(start_dt.utctimetuple())*1000
        if period == '1m':
            ohlcv.extend(min_ohlcv(start_dt, pair, limit))
        else:
            ohlcv.extend(binance.fetch_ohlcv(symbol=pair, timeframe=period, since=since, limit=limit*td))
        start_dt = start_dt + timedelta(days = td)
        

    df = pd.DataFrame(ohlcv, columns = ['Time', 'Open', 'High', 'Low', 'Close', 'Volume'])
    time = [datetime.fromtimestamp(float(time)/1000) for time in df['Time']]
    df_t = pd.DataFrame([[t.timetuple().tm_yday,t.timetuple().tm_wday,t.timetuple().tm_hour] for t in time]
                        , columns = ['yDay', 'wDay', 'Hour'])

    del df ['Time']
    df['Open'] = df['Open'].astype(np.float64)
    df['High'] = df['High'].astype(np.float64)
    df['Low'] = df['Low'].astype(np.float64)
    df['Close'] = df['Close'].astype(np.float64)
    df['Volume'] = df['Volume'].astype(np.float64)

    df = pd.concat([df_t, df], axis = 1)
    df.insert(0, 'Pair', pair)
    return df

In [46]:
start_day = "20170101"
end_day = "20211225"
symbols = ['BTC/USDT']

res = []
for symbol in symbols:
    res.append(ohlcv([start_day, end_day], symbol, '1h'))
    
df = pd.concat(res, axis = 0)

20170101 20211225


In [47]:
df

Unnamed: 0,Pair,yDay,wDay,Hour,Open,High,Low,Close,Volume
0,BTC/USDT,229,3,13,4261.48,4313.62,4261.32,4308.83,47.181009
1,BTC/USDT,229,3,14,4308.83,4328.69,4291.37,4315.32,23.234916
2,BTC/USDT,229,3,15,4330.29,4345.45,4309.37,4324.35,7.229691
3,BTC/USDT,229,3,16,4316.62,4349.99,4287.41,4349.99,4.443249
4,BTC/USDT,229,3,17,4333.32,4377.85,4333.32,4360.69,0.972807
...,...,...,...,...,...,...,...,...,...
30451,BTC/USDT,359,5,4,51600.47,51810.00,51083.58,51126.40,1935.071560
30452,BTC/USDT,359,5,5,51126.40,51236.15,50773.90,51109.18,1640.570910
30453,BTC/USDT,359,5,6,51109.17,51135.26,50847.84,50876.44,598.850440
30454,BTC/USDT,359,5,7,50876.44,50955.53,50570.16,50760.55,1001.761550


In [5]:
data = create_stock(df)

In [48]:
df.to_csv('./dataset/BTCUSDT_1h.csv', index = False)

### Monday = 0 Sunday = 6

In [33]:
((data['close']-data['open'])/data['open'] * 100).abs().mean()

0.6089043146693095

In [16]:
[(((data['close']-data['close'].shift(n))/data['close'])*100).abs().mean() for n in range (24)]

[0.0,
 0.9787048904000825,
 1.540184717468691,
 2.030538998071487,
 2.4865877568769017,
 2.929156208202759,
 3.3577865846513006,
 3.769365871183702,
 4.179326034157048,
 4.5798074765284085,
 4.9721191852893485,
 5.36517282694626,
 5.752945727540854,
 6.139999265171288,
 6.525956004039234,
 6.914976991212183,
 7.302813768804734,
 7.684520235374529,
 8.061388431713741,
 8.443901872842718,
 8.824117925559811,
 9.201054559679873,
 9.582938733538457,
 9.96628300104373]