In [2]:
%%bash 
catalyst ingest-exchange -x binance -i xrp_usdt -f minute
catalyst ingest-exchange -x binance -i btc_usdt -f minute


Trying to ingest exchange bundle binance...
Trying to ingest exchange bundle binance...


In [None]:
# start terminal from anaconda/environments/catalyst
# in terminal the catalyst environment should be active, which can be checked by conda info --all
# jupyter lab work doesn't work as it doesn't switch to the catalyst environment
# start jupyter notebook by: jupyter notebook
%load_ext catalyst
# required to activate catalyst magic words 


# Setup matplotlib to display graphs inline in this Notebook
%matplotlib inline


# Get Catalyst historic data 

In [3]:
import os
import pandas as pd
import pickle


from catalyst.api import symbol, symbols
from catalyst.protocol import BarData
cur_cand = ['xrp_usdt', 'btc_usdt'] 
data_keys = ['open', 'high', 'low', 'close', 'volume'] # , 'price'


def catalyst2picklepandas(context, data: BarData):
    "reads catalyst data for trading pairs and stores them in a testfile for subsequent usage"
    test = currencies = dict()
    filename = os.getcwd() + '/df-test.pydata'

    for pair in cur_cand:
        current = data.history(symbol(pair), data_keys, 239*24*60, '1T')
        currencies[pair] = current
        print(current.head())
        print(current.tail())

    print("got catalyst history data")
    df_f = open(filename, 'wb')
    pickle.dump(currencies, df_f)
    df_f.close()
    print("data frame is written")
#    df_f = open(filename, 'rb')
#    test = pickle.load(df_f)
#    df_f.close()
#    print(test)
    return None

def feature_normalize(filename: str):
    currencies = dict()
    df_f = open(filename, 'rb')
    currencies = pickle.load(df_f)
    df_f.close()
#    combined_curr = combine_catalyst_data(currencies)
    aggregate_currencies = pair_aggregation(currencies)
    return None


# Features and Labels

In [37]:
import os
import pandas as pd
import pickle


time_aggregations = {'1T':4, '2T':4} # , '4T':4
vol_base_period = '1D'


def check_tag(tg, ltg):
    return abs(tg) == abs(ltg)
    
def add_target_labels(df):
    "target = achieved if improvement > 1% with intermediate Close loss not lower than start Close"
    # df.tg = 0 means not yet checked; < 0 is negative tg of delta < 0; > 0 is tg with best improvement > 1%
    df['tg'] = ltg = 0
    df['change'] = 0.
    df['other'] = df.close
    delta = 0.
    for ltg in range(-1, -5, -1) : # tg = time gap; max time gap 4h = 60*4 T(minutes)
        loss_check = (df.close.tshift(ltg) - df.other) / df.other * 1000 # delta in per mille: 1% == 10
        delta = (df.close.tshift(ltg) - df.close) / df.close * 1000 # delta in per mille: 1% == 10
        df.loc[(loss_check > 0) & (df.tg == 0), 'other'] = df.close.tshift(ltg) # note new high in other
        df.loc[(loss_check < -2) & (df.tg == 0), 'tg'] = ltg # equals < -0.2% loss from last high

        df.loc[(delta < 0) & (df.tg == 0), 'tg'] = ltg # equals any loss from start
# doesn't work: df.loc[(delta > 1) & (df.close.tshift(ltg) > df.close.tshift(df.tg)) & (df.tg >= 0), 'tg'] = ltg
        df.loc[(delta > 10) & (df.tg == 0), 'tg'] = -ltg
        df.loc[check_tag(df.tg, int(ltg)), 'change'] = delta


def derive_features(df):
    "calc derived candle features in relation to price based on the provided time aggregated dataframe df"
    # price changes in 1/1000
    df['delta'] = (df['close'] - df['close_prev']) / df['close'] * 1000
    df['height'] = (df['high'] - df['low']) / df['close'] * 1000
    df.loc[df['close'] > df['open'], 'top'] = (df['high'] - df['close']) / df['close'] * 1000
    df.loc[df['close'] <= df['open'], 'top'] = (df['high'] - df['open']) / df['close'] * 1000
    df.loc[df['close'] > df['open'], 'bottom'] = (df['open'] - df['low']) / df['close'] * 1000
    df.loc[df['close'] <= df['open'], 'bottom'] = (df['close'] - df['low']) / df['close'] * 1000
    return None

def time_aggregation(minute_data):
    """in: dataframe of minute data of a currency pair; 
       out: dict of dataframes of aggregations with features and targets"""
    aggregations = dict()
    keys = list(time_aggregations.keys())
    for time_agg in keys:
        if time_agg is '1T':
            df = minute_data
            df['close_prev'] = df.close.tshift(1) # surprisingly open != previous close 
            df['volume_change'] = df['volume'] / df.volume.rolling(vol_base_period).mean() * 100 # in %
        else :
            df = pd.DataFrame()
            df['close'] = minute_data.close.resample(time_agg).last()
            df['close_prev'] = minute_data.close_prev.resample(time_agg).first()
            df['high'] = minute_data.high.resample(time_agg).max()
            df['low'] = minute_data.low.resample(time_agg).min()
            df['open'] = minute_data.open.resample(time_agg).first()
            df['volume_change'] = minute_data.volume_change.resample(time_agg).mean()
        print(time_agg)
        print(df.dtypes)
        derive_features(df)
        add_target_labels(df)
#        print(time_agg)
        print(df)
        aggregations[time_agg] = df
    return aggregations


def pair_aggregation(currencies):
    "transform dict of currency dataframes to dict of currency dicts with all time aggregations"
    for pair in currencies:
        cur = currencies[pair] # take 1T currency data
#        print(cur.head())
#        print(cur.tail())
        currencies[pair] = time_aggregation(cur) # exchange by all required time aggregations
    return currencies


    
def test_features_labels():
    "tests creation of features and labels with artificial data"
    df_len = 10
    df = pd.DataFrame(index = pd.date_range('2018-12-28 01:10:00', periods=df_len, freq='T'))
    cl = 100.
    cl_delta = 1.1 / 5
    df['open'] = 0.
    df['high'] = 0.
    df['low'] = 0.
    df['close'] = 0.
    df['volume'] = 10.
    
    for tf in range( 0, df_len) : 
        df.iloc[tf] = [cl- 1., cl + 0.5, cl - 2., cl, 10.]
        if tf <= 4 :
            cl += cl_delta
        elif tf <= 5 :
            cl -= cl_delta
            df.iloc[tf, 4] = 20.
        elif tf <= 9 :
            cl += cl_delta
                

    print(df)
    currencies = dict()
    currencies['tst_usdt'] = df
    return currencies


aggregate_currencies = pair_aggregation(test_features_labels())


                       open    high    low   close  volume
2018-12-28 01:10:00   99.00  100.50  98.00  100.00    10.0
2018-12-28 01:11:00   99.22  100.72  98.22  100.22    10.0
2018-12-28 01:12:00   99.44  100.94  98.44  100.44    10.0
2018-12-28 01:13:00   99.66  101.16  98.66  100.66    10.0
2018-12-28 01:14:00   99.88  101.38  98.88  100.88    10.0
2018-12-28 01:15:00  100.10  101.60  99.10  101.10    20.0
2018-12-28 01:16:00   99.88  101.38  98.88  100.88    10.0
2018-12-28 01:17:00  100.10  101.60  99.10  101.10    10.0
2018-12-28 01:18:00  100.32  101.82  99.32  101.32    10.0
2018-12-28 01:19:00  100.54  102.04  99.54  101.54    10.0
1T
open             float64
high             float64
low              float64
close            float64
volume           float64
close_prev       float64
volume_change    float64
dtype: object
                       open    high    low   close  volume  close_prev  \
2018-12-28 01:10:00   99.00  100.50  98.00  100.00    10.0         NaN   
2018-12-28 

ValueError: Must have equal len keys and value when setting with an iterable

# Catalyst Frame


In [None]:
import pytz
import pandas as pd
from datetime import datetime

from catalyst.utils.run_algo import run_algorithm
from catalyst.protocol import BarData

def initialize(context):
    context.handle_count = 0
    print("init")


def handle_data(context, data: BarData):
    
    if (context.handle_count < 1):
        catalyst2picklepandas(context, data)
#        feature_normalize(fn)

        context.handle_count = context.handle_count + 1
    return None
        

def analyze(context=None, results=None):
    pass

start = datetime(2018, 12, 18, 0, 0, 0, 0, pytz.utc)
# end = datetime(2018, 9, 24, 0, 0, 0, 0, pytz.utc)
end = datetime(2018, 12, 18, 0, 0, 0, 0, pytz.utc)
results = run_algorithm(initialize=initialize,
                        handle_data=handle_data,
                        analyze=analyze,
                        start=start,
                        end=end,
                        exchange_name='binance',
                        data_frequency='minute',
                        quote_currency ='usdt',
                        capital_base=10000 )

# Unused

In [None]:
def combine_catalyst_data(currencies):
    "unused: receive a dictionary of dataframes and returns a single multiindex dataframe"
    combined_curr = None
    cindex = []
    for pair in currencies:
        cindex.clear()
        datakeys = [dkey for dkey in currencies[pair].keys()]
        currkeys = [pair for x in datakeys]
        cindex = [currkeys, datakeys]
        
        #simply set the column attribute to the new index ti get a multilevel index
        currencies[pair].columns = pd.MultiIndex.from_arrays(cindex, names=['currency', 'candle'])
#        print(currencies[pair])
    combined_curr = currencies['xrp_usdt'].merge(currencies['btc_usdt'], how='outer', 
                                                 left_index=True, right_index=True)
    print(combined_curr)
    return combined_curr



# To be investigated

In [None]:
def add_target_labels(df):
    "target = achieved if improvement > 1% with intermediate Close loss not lower than start Close"
    # df.tg = 0 means not yet checked; < 0 is negative tg of delta < 0; > 0 is tg with best improvement > 1%
    df['tg'] = ltg = 0
    df['change'] = 0.
    df['other'] = df.close
    delta = 0.
    for ltg in range(-1, -5, -1) : # tg = time gap; max time gap 4h = 60*4 T(minutes)
        loss_check = (df.close.tshift(ltg) - df.other) / df.other * 1000 # delta in per mille: 1% == 10
        delta = (df.close.tshift(ltg) - df.close) / df.close * 1000 # delta in per mille: 1% == 10
        df.loc[(loss_check > 0) & (df.tg == 0), 'other'] = df.close.tshift(ltg) # note new high in other
        df.loc[(loss_check < -2) & (df.tg == 0), ['tg', 'change']] = [ltg, delta] # equals < -0.2% loss from last high
# reports error: ValueError: setting an array element with a sequence.
# code snippet shows it should work

        df.loc[(delta < 0) & (df.tg == 0), ['tg', 'change']] = [ltg, delta] # equals any loss from start
# doesn't work: df.loc[(delta > 1) & (df.close.tshift(ltg) > df.close.tshift(df.tg)) & (df.tg >= 0), 'tg'] = ltg
        df.loc[(delta > 10) & (df.tg == 0), ['tg', 'change']] = [-ltg, delta]

        df.loc[(df.tg == ltg) | (df.tg == -ltg), 'change'] = delta
# reports error: ValueError: Must have equal len keys and value when setting with an iterable
# although it works in a previos iteration with 1T

