In [1]:
import psycopg2
import pandas as pd
import numpy as np
import re
from datetime import datetime
import pytz
import re
from operator import itemgetter

In [2]:
# local postgres connection only
import hidden
sql_string = hidden.psycopg2(hidden.secrets())
print('PostgreSQL connection data taken from hidden.py')

# Make the connection and cursor
conn = psycopg2.connect(sql_string, connect_timeout=3)

PostgreSQL connection data taken from hidden.py


In [3]:
# These are only used for caching
import hashlib
import pickle

def get_candle_data(base_coin, quote_coin):
    h = base_coin+quote_coin
    try:
        with open(f'./cache_data/{h}.pkl', 'rb') as fp:
            print(f"Using cache file: ./cache_data/{h}.pkl")
            return pickle.load(fp)
    except:
        print(f"No cache found")
        pass

    sql = f"""
select *
from
    (select id as the_pair from pairs p where p.coin1='{base_coin}' and p.coin2='{quote_coin}') z
inner join
    candlestick_15m on the_pair=pair_id
where close_time notnull and open_time < '2021-08-07'
order by open_time
"""
    df = pd.read_sql_query(sql, conn)
    with open(f'./cache_data/{h}.pkl', 'wb') as fp:
        print(f"Saving cache to: ./cache_data/{h}.pkl")
        pickle.dump(df, fp, protocol=4)

    return df

In [4]:
dfs = [get_candle_data('ETH', 'BTC'),
       get_candle_data('ETH', 'USDT'),
       get_candle_data('USDT', 'BTC'),
       get_candle_data('USDT', 'ETH'),
       get_candle_data('BTC', 'ETH'),
       get_candle_data('BTC', 'USDT')]
df = pd.concat(dfs)
df = df.set_index(['pair_id','open_time'])

No cache found
Saving cache to: ./cache_data/ETHBTC.pkl
No cache found
Saving cache to: ./cache_data/ETHUSDT.pkl
No cache found
Saving cache to: ./cache_data/USDTBTC.pkl
No cache found
Saving cache to: ./cache_data/USDTETH.pkl
No cache found
Saving cache to: ./cache_data/BTCETH.pkl
No cache found
Saving cache to: ./cache_data/BTCUSDT.pkl


In [5]:
def y_ewm(x):
    x = x.sort_index()
    for i in [8,24,48,96]:
        x[f'close__ewm_{i}'] = (x['close'].ewm(span=i, adjust=False).mean().shift(-i) / x['close'])
    return x

In [6]:
df = df.groupby('pair_id').apply(y_ewm)

In [7]:
y_df = df[[x for x in df.columns if x.startswith('close__')]]

In [8]:
with open(f'./cache_data/targets.pkl', 'wb') as fp:
    pickle.dump(((y_df-1)*20), fp, protocol=4)

In [9]:
df

Unnamed: 0_level_0,Unnamed: 1_level_0,the_pair,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,created_at,updated_at,column1,close__ewm_8,close__ewm_24,close__ewm_48,close__ewm_96
pair_id,open_time,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0,2017-07-14 04:00:00,0,0.080000,0.086400,0.080000,0.086400,8.752000,2017-07-14 04:14:59.999,7.282741e-01,26,3.268000,2.823552e-01,2021-06-30 14:24:31.900306+00:00,2021-07-01 11:09:34.291214+00:00,,0.995701,1.015724,1.011158,1.029471
0,2017-07-14 04:15:00,0,0.085289,0.086000,0.085128,0.085811,61.042000,2017-07-14 04:29:59.999,5.217079e+00,33,2.738000,2.350271e-01,2021-06-30 14:24:31.900306+00:00,2021-07-01 10:57:33.052002+00:00,,1.004748,1.023623,1.017379,1.037132
0,2017-07-14 04:30:00,0,0.085811,0.086380,0.085811,0.086314,53.769000,2017-07-14 04:44:59.999,4.631740e+00,41,21.225000,1.829643e+00,2021-06-30 14:24:31.900306+00:00,2021-07-01 10:57:33.052002+00:00,,1.001206,1.018605,1.011097,1.031644
0,2017-07-14 04:45:00,0,0.086314,0.086380,0.086309,0.086347,42.818000,2017-07-14 04:59:59.999,3.697253e+00,61,7.948000,6.862948e-01,2021-06-30 14:24:31.900306+00:00,2021-07-01 10:57:33.052002+00:00,,1.002836,1.018833,1.010478,1.031839
0,2017-07-14 05:00:00,0,0.085874,0.086205,0.084608,0.084680,16.520000,2017-07-14 05:14:59.999,1.404228e+00,15,1.176000,1.009137e-01,2021-06-30 14:24:31.900306+00:00,2021-07-01 10:57:33.052002+00:00,,1.025165,1.039666,1.030114,1.052657
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,2021-08-06 22:45:00,1,42643.900000,42785.710000,42571.310000,42780.070000,213.598415,2021-08-06 22:59:59.000,9.117914e+06,7798,112.209132,4.790198e+06,2021-08-06 22:45:04.250384+00:00,2021-08-06 23:00:04.697706+00:00,,,,,
1,2021-08-06 23:00:00,1,42780.070000,42789.000000,42656.290000,42678.950000,213.887211,2021-08-06 23:14:59.000,9.133697e+06,7844,106.336888,4.541276e+06,2021-08-06 23:00:04.697706+00:00,2021-08-06 23:15:04.533388+00:00,,,,,
1,2021-08-06 23:15:00,1,42678.950000,42752.530000,42571.430000,42719.500000,223.398210,2021-08-06 23:29:59.000,9.531670e+06,8041,118.922631,5.074128e+06,2021-08-06 23:15:04.533388+00:00,2021-08-06 23:30:04.413748+00:00,,,,,
1,2021-08-06 23:30:00,1,42719.500000,42841.690000,42699.010000,42801.360000,424.432578,2021-08-06 23:44:59.000,1.816148e+07,9422,225.516483,9.649508e+06,2021-08-06 23:30:04.413748+00:00,2021-08-06 23:45:04.585229+00:00,,,,,
