In [1]:
import csv
import datetime
import json

import ccxt
import pandas as pd
import pandas_ta as ta # needed for processing chart data 
from tqdm import tqdm

In [2]:
with open("BTC_USDT-4h_interval.json") as f: 
    d = json.load(f) 
    
chart_df = pd.DataFrame(d) 
chart_df = chart_df.rename(columns={0:"timestamp",
                                    1:"open",
                                    2:"high",
                                    3:"low", 
                                    4:"close",
                                    5:"volume"}) 

def process(df):
    binance = ccxt.binance() 
    dates = df['timestamp'].values 
    timestamp = [] 
    for i in range(len(dates)):
        date_string = binance.iso8601(int(dates[i])) 
        date_string = date_string[:10] + " " + date_string[11:-5] 
        timestamp.append(date_string) 
    df['datetime'] = timestamp 
    df = df.drop(columns={'timestamp'}) 
    return df

chart_df = process(chart_df) 

hours, days, months, years = [],[],[],[] 

for dt in tqdm(chart_df['datetime']):
    dtobj = pd.to_datetime(dt) 
    hour = dtobj.hour 
    day = dtobj.day 
    month = dtobj.month 
    year = dtobj.year 
    hours.append(hour)
    days.append(day) 
    months.append(month)
    years.append(year) 
    
chart_df['years'], chart_df['months'], chart_df['days'], chart_df['hours'] = years, months, days, hours
    
high_change, low_change = [], [] 
close = chart_df['close'].values 
high = chart_df['high'].values 
low = chart_df['low'].values 
for i in range(close.shape[0]-1):
    high_delta = (high[i+1] - close[i]) / close[i] 
    low_delta = (low[i+1] - close[i]) / close[i]
    high_change.append(high_delta) 
    low_change.append(low_delta)
high_change.append(None) 
low_change.append(None)

chart_df['high_delta'] = high_change
chart_df['low_delta'] = low_change 

chart_df.dropna(inplace=True) 

chart_df

100%|████████████████████████████████████████████████████████████| 10560/10560 [00:00<00:00, 12189.14it/s]


Unnamed: 0,open,high,low,close,volume,datetime,years,months,days,hours,high_delta,low_delta
0,4261.48,4349.99,4261.32,4349.99,82.088865,2017-08-17 04:00:00,2017,8,17,4,0.031127,-0.003832
1,4333.32,4485.39,4333.32,4427.30,63.619882,2017-08-17 08:00:00,2017,8,17,8,0.013121,-0.021205
2,4436.06,4485.39,4333.42,4352.34,174.562001,2017-08-17 12:00:00,2017,8,17,12,0.000574,-0.034832
3,4352.33,4354.84,4200.74,4325.23,225.109716,2017-08-17 16:00:00,2017,8,17,16,0.010279,-0.015414
4,4307.56,4369.69,4258.56,4285.08,249.769913,2017-08-17 20:00:00,2017,8,17,20,0.012961,-0.035115
...,...,...,...,...,...,...,...,...,...,...,...,...
10554,22485.27,22796.71,20846.00,22127.91,64139.981750,2022-06-14 00:00:00,2022,6,14,0,0.055811,-0.016459
10555,22127.92,23362.88,21763.71,22877.52,43892.535610,2022-06-14 04:00:00,2022,6,14,4,0.004478,-0.050368
10556,22877.52,22979.96,21725.22,21994.56,22494.024070,2022-06-14 08:00:00,2022,6,14,8,0.036620,-0.008345
10557,21994.56,22800.00,21811.01,22680.38,29165.257260,2022-06-14 12:00:00,2022,6,14,12,0.004263,-0.024426


In [3]:
chart_df.set_index(pd.DatetimeIndex(chart_df['datetime']), inplace=True) 

chart_df['bop'] = chart_df.ta.bop(lookahead=False)
chart_df['ebsw'] = chart_df.ta.ebsw(lookahead=False) 
chart_df['cmf'] = chart_df.ta.cmf(lookahead=False) 
chart_df['rsi/100'] = chart_df.ta.rsi(lookahead=False) / 100 
chart_df['vwap'] = chart_df.ta.vwap(lookahead=False) 
chart_df['high/low'] = chart_df['high'] / chart_df['low'] 
chart_df['close/open'] = chart_df['close'] / chart_df['open'] 
chart_df['high/open'] = chart_df['high'] / chart_df['open'] 
chart_df['low/open'] = chart_df['low'] / chart_df['open'] 

chart_df['hwma'] = chart_df.ta.hwma(lookahead=False)
chart_df['linreg'] = chart_df.ta.linreg(lookahead=False)
chart_df['hwma/close'] = chart_df['hwma'] / chart_df['close'] 
chart_df['linreg/close'] = chart_df['linreg'] / chart_df['close']

for i in tqdm(range(1, 4)): 
    for col in ['open', 'high', 'low', 'close', 'volume', 'vwap']:
        val = chart_df[col].values 
        val_ret = [None for _ in range(i)] 
        for j in range(i, len(val)): 
            if val[j-i] == 0:
                ret = 1 
            else:
                ret = val[j] / val[j-i] 
            val_ret.append(ret) 
        chart_df['{}_change_{}'.format(col, i)] = val_ret 
            
    

chart_df.dropna(inplace=True) 
chart_df.drop(columns={'datetime', 'open', 'high', 'low', 'close', 'volume', 'vwap', 'hwma', 'linreg', 'years'}, inplace=True) 

100%|███████████████████████████████████████████████████████████████████████| 3/3 [00:00<00:00, 12.74it/s]


In [4]:
chart_df.head(2)

Unnamed: 0_level_0,months,days,hours,high_delta,low_delta,bop,ebsw,cmf,rsi/100,high/low,...,low_change_2,close_change_2,volume_change_2,vwap_change_2,open_change_3,high_change_3,low_change_3,close_change_3,volume_change_3,vwap_change_3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-08-23 16:00:00,8,23,16,0.014957,-0.011481,-0.703979,0.0,0.121501,0.524823,1.038033,...,1.011464,0.971468,1.293382,1.009008,1.034863,1.034916,1.016898,1.010354,1.231233,1.016728
2017-08-23 20:00:00,8,23,20,0.022008,-0.007049,-0.206431,0.57735,0.09759,0.522657,1.026746,...,0.980521,0.980215,0.642973,0.999909,1.013438,0.984992,1.003155,0.970746,1.130291,1.008057


In [5]:
chart_df.to_csv('default.csv')