In [None]:
%%bash 
catalyst ingest-exchange -x binance -i xrp_usdt -f minute
catalyst ingest-exchange -x binance -i btc_usdt -f minute


In [None]:
# start terminal from anaconda/environments/catalyst
# in terminal the catalyst environment should be active, which can be checked by conda info --all
# jupyter lab work doesn't work as it doesn't switch to the catalyst environment
# start jupyter notebook by: jupyter notebook
%load_ext catalyst
# required to activate catalyst magic words 


# Setup matplotlib to display graphs inline in this Notebook
%matplotlib inline


# Get Catalyst historic data 

In [None]:
import os
import pandas as pd
import pickle


from catalyst.api import symbol, symbols
from catalyst.protocol import BarData
cur_cand = ['xrp_usdt', 'btc_usdt'] 
data_keys = ['open', 'high', 'low', 'close', 'volume'] # , 'price'


def catalyst2picklepandas(context, data: BarData):
    "reads catalyst data for trading pairs and stores them in a testfile for subsequent usage"
    test = currencies = dict()
    filename = os.getcwd() + '/df-test.pydata'

    for pair in cur_cand:
        current = data.history(symbol(pair), data_keys, 239*24*60, '1T')
        currencies[pair] = current
        print(current.head())
        print(current.tail())

    print("got catalyst history data")
    df_f = open(filename, 'wb')
    pickle.dump(currencies, df_f)
    df_f.close()
    print("data frame is written")
#    df_f = open(filename, 'rb')
#    test = pickle.load(df_f)
#    df_f.close()
#    print(test)
    return None

def feature_normalize(filename: str):
    currencies = dict()
    df_f = open(filename, 'rb')
    currencies = pickle.load(df_f)
    df_f.close()
#    combined_curr = combine_catalyst_data(currencies)
    aggregate_currencies = pair_aggregation(currencies)
    return None


# Features and Labels

In [129]:
import os
import pandas as pd
import pickle


time_aggregations = {'1T':4, '2T':4} # , '4T':4
vol_base_period = '1D'


def check_tag(tg, ltg):
    return abs(tg) == abs(ltg)
    
def add_period_specific_labels(df: pd.DataFrame):
    "target = achieved if improvement > 1% without intermediate loss of more than 0.2%"

    df['sell_tg'] = df['dip_tg'] = df['buy_tg'] = 0
    df['loc_max'] = df['buy_max'] = df.close
    for ltg in range(-1, -(20), -1) : # tg = time gap; max time gap 4h = 60*4 T(minutes)
        df['loss_check'] = (df.close.tshift(ltg) - df.loc_max) / df.loc_max * 1000 #  in per mille: 1% == 10
        df['max_profit'] = (df.close.tshift(ltg) - df.close) / df.close * 1000 #  in per mille: 1% == 10
        
        df.loc[(df.max_profit > 0) & (df.sell_tg == 0), 'sell_tg'] = -ltg # equals no loss from start 
        df.loc[(df.max_profit < -2) & (df.sell_tg == 0), 'sell_tg'] = ltg # equals < -0.2% loss from start 

        df.loc[(df.loss_check > 0) & (df.dip_tg == 0), 'loc_max'] = df.close.tshift(ltg) # note new high in other
        df.loc[(df.loss_check < -2) & (df.dip_tg == 0), 'dip_tg'] = ltg # equals < -0.2% loss from last high
        df.loc[(df.max_profit > 10) & (df.max_profit > ((df.buy_max - df.close) / df.close * 1000)) & (df.dip_tg == 0), 'buy_tg'] = ltg
        df.loc[(df.max_profit > 10) & (df.max_profit > ((df.buy_max - df.close) / df.close * 1000)) & (df.dip_tg == 0), 'buy_max'] = df.close.tshift(ltg)
        
    df['cpc_label'] = "hold"
    df.loc[(df.sell_tg < 0), 'cpc_label'] = "sell"
    df.loc[(df.buy_tg < 0), 'cpc_label'] = "buy"
    while df.loc[((df.cpc_label == "hold") & (df.cpc_label.tshift(1) == "buy"))] :
        df.loc[((df.cpc_label == "hold") & (df.cpc_label.tshift(1) == "buy")), 'cpc_label'] = "buy"
    df['performance'] = 0.
    df.loc[((df.cpc_label == "buy") & (df.cpc_label.tshift(1) != "buy")), 'performance'] = - df.close.tshift(1) * 0.001 + df.close.tshift - df.close.tshift(1) # buy fee + performance
    df.loc[((df.cpc_label == "buy") & (df.cpc_label.tshift(1) == "buy")), 'performance'] =  df.close.tshift - df.close.tshift(1) # performance
    df.loc[((df.cpc_label == "sell") & (df.cpc_label.tshift(1) == "buy")), 'performance'] = - df.close.tshift(1) * 0.001 + df.close.tshift - df.close.tshift(1) # buy fee + performance
    df['cpc_label'] = "hold"
    df.loc[(df.sell_tg < 0), 'cpc_label'] = "sell"
    df.loc[(df.buy_tg < 0), 'cpc_label'] = "buy"
        
    df['max_profit'] = 0.
#    df.loc[(df.dip_tg < df.buy_tg) & (df.buy_tg != 0), 'dip_tg'] = 0 # if sell event happens later than buy event then remove sell signal
    df.loc[(df.buy_tg != 0), 'max_profit'] = ((df.buy_max - df.close) / df.close * 1000) # in per mille: 1% == 10
    df.pop('loss_check')
    df.pop('loc_max')
    df.pop('buy_max')
    df.pop('dip_tg')

def derive_features(df: pd.DataFrame):
    "calc derived candle features in relation to price based on the provided time aggregated dataframe df"
    # price changes in 1/1000
    df['height'] = (df['high'] - df['low']) / df['close'] * 1000
    df.loc[df['close'] > df['open'], 'top'] = (df['high'] - df['close']) / df['close'] * 1000
    df.loc[df['close'] <= df['open'], 'top'] = (df['high'] - df['open']) / df['close'] * 1000
    df.loc[df['close'] > df['open'], 'bottom'] = (df['open'] - df['low']) / df['close'] * 1000
    df.loc[df['close'] <= df['open'], 'bottom'] = (df['close'] - df['low']) / df['close'] * 1000
    return None

def time_aggregation(minute_data: pd.DataFrame):
    """in: dataframe of minute data of a currency pair; 
       out: dict of dataframes of aggregations with features and targets"""
    aggregations = dict()
    time_aggs = list(time_aggregations.keys())
    for time_agg in time_aggs:
        print(time_agg)
        if time_agg is '1T':
            df = minute_data
            df['volume_change'] = (df['volume']  - df.volume.rolling(vol_base_period).median()) / df.volume.rolling(vol_base_period).median() * 100 # in %
        else :
            df = pd.DataFrame()
            df['close'] = minute_data.close.resample(time_agg, label='right', closed='right').last()
            df['high'] = minute_data.high.resample(time_agg, label='right', closed='right').max()
            df['low'] = minute_data.low.resample(time_agg, label='right', closed='right').min()
            df['open'] = minute_data.open.resample(time_agg, label='right', closed='right').first()
            df['volume_change'] = minute_data.volume_change.resample(time_agg, label='right', closed='right').mean()
        derive_features(df)
        add_period_specific_labels(df)
#        print(df)
#        print(df[['close', 'high', 'low', 'open', 'volume_change']])
#        print(df[['close', 'sell_tg', 'buy_tg', 'loc_max', 'buy_max', 'max_profit']])
        print(df[['close', 'sell_tg', 'buy_tg', 'max_profit']])
        aggregations[time_agg] = df
    aggregations['asset_summary'] = add_asset_summary_labels(aggregations)
    return aggregations

def add_asset_summary_labels(aggregations: dict):
    "target = achieved if improvement > 1% without intermediate loss of more than 0.2%"

    time_aggs = list(aggregations.keys())
    time_agg = '1T'
    print(time_agg)
    df = aggregations[time_agg]
    if True :
        
        ldf = df[['buy_tg', 'sell_tg']]

    #    ldf['cpc_label'] = pd.Series(["sell", "hold", "buy", "inconsistent"], dtype = "category", index = ldf.index)
        ldf['cpc_label'] = "hold"
        ldf.loc[ldf['buy_tg'] < 0, 'cpc_label'] = "buy"
        ldf.loc[ldf['sell_tg'] < 0, 'cpc_label'] = "sell"
        ldf.loc[(ldf['buy_tg'] < 0) & (ldf['sell_tg'] < 0), 'cpc_label'] = "inconsistent"

        for time_agg in time_aggs:
            if time_agg != '1T':
                print(time_agg)
                df = aggregations[time_agg]
                df_extract  = df[['buy_tg', 'sell_tg']]
                df_extract = df_extract.resample('1T').bfill()

                ldf = ldf.merge(df_extract, how='left', left_index=True, right_index=True)
                ldf.loc[(ldf['buy_tg'] < 0)  & (ldf['cpc_label'] != "sell"), 'cpc_label'] = "buy"
                ldf.loc[(ldf['sell_tg'] < 0) & (ldf['cpc_label'] != "buy"), 'cpc_label'] = "sell"
                ldf.loc[(ldf['buy_tg'] < 0)  & (ldf['sell_tg'] < 0), 'cpc_label'] = "inconsistent"
            print(ldf)
            ldf.pop('buy_tg')
            ldf.pop('sell_tg')
    else :
        
        ldf = df[['buy_tg', 'sell_tg', 'max_profit']]
        ldf.columns=['buy_tg_' + time_agg, 'sell_tg_' + time_agg, 'max_profit_' + time_agg]
        for time_agg in time_aggs:
            if time_agg != '1T':
                print(time_agg)
                df = aggregations[time_agg]
                df_extract  = df[['buy_tg', 'sell_tg', 'max_profit']]
                df_extract = df_extract.resample('1T').bfill()
                df_extract.columns=['buy_tg_' + time_agg, 'sell_tg_' + time_agg, 'max_profit_' + time_agg]
                ldf = ldf.merge(df_extract, how='left', left_index=True, right_index=True)
            print(ldf)
    return ldf

def pair_aggregation(currencies):
    "transform dict of currency dataframes to dict of currency dicts with all time aggregations"
    for pair in currencies:
        cur = currencies[pair] # take 1T currency data
        currencies[pair] = time_aggregation(cur) # exchange by all required time aggregations
    return currencies


    
def test_features_labels():
    "tests creation of features and labels with artificial data"
    df_len = 21
    df = pd.DataFrame(index = pd.date_range('2018-12-28 01:10:00', periods=df_len, freq='T'))
    cl = 100.
    cl_delta = 1.1 / 5
    df['open'] = 0.
    df['high'] = 0.
    df['low'] = 0.
    df['close'] = 0.
    df['volume'] = 10.
    
    for tf in range( 0, df_len) : 
        df.iloc[tf] = [cl- 1., cl + 0.5, cl - 2., cl, 10.]
        if tf <= 4 : #raise above 1% to trigger buy
            cl += cl_delta
        elif tf <= 5 : # fall -0.2% to trigger sell but only on minute basis
            cl -= cl_delta
            df.iloc[tf, 4] = 20.
        elif tf <= 9 : # raise above 1% with dip above -0.2% to not raise a trigger
            cl += cl_delta
        elif tf <= 13 : # raise above 1% with dip above -0.2% to not raise a trigger
            cl -= cl_delta / 4
        elif tf <= 30 : # raise above 1% with dip above -0.2% to not raise a trigger
            cl += cl_delta
                
    currencies = dict()
    currencies['tst_usdt'] = df
    return currencies


aggregate_currencies = pair_aggregation(test_features_labels())


1T


ValueError: The truth value of a DataFrame is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

# Catalyst Frame


In [None]:
import pytz
import pandas as pd
from datetime import datetime

from catalyst.utils.run_algo import run_algorithm
from catalyst.protocol import BarData

def initialize(context):
    context.handle_count = 0
    print("init")


def handle_data(context, data: BarData):
    
    if (context.handle_count < 1):
        catalyst2picklepandas(context, data)
#        feature_normalize(fn)

        context.handle_count = context.handle_count + 1
    return None
        

def analyze(context=None, results=None):
    pass

start = datetime(2018, 12, 18, 0, 0, 0, 0, pytz.utc)
# end = datetime(2018, 9, 24, 0, 0, 0, 0, pytz.utc)
end = datetime(2018, 12, 18, 0, 0, 0, 0, pytz.utc)
results = run_algorithm(initialize=initialize,
                        handle_data=handle_data,
                        analyze=analyze,
                        start=start,
                        end=end,
                        exchange_name='binance',
                        data_frequency='minute',
                        quote_currency ='usdt',
                        capital_base=10000 )

# Unused

In [None]:
def combine_catalyst_data(currencies):
    "unused: receive a dictionary of dataframes and returns a single multiindex dataframe"
    combined_curr = None
    cindex = []
    for pair in currencies:
        cindex.clear()
        datakeys = [dkey for dkey in currencies[pair].keys()]
        currkeys = [pair for x in datakeys]
        cindex = [currkeys, datakeys]
        
        #simply set the column attribute to the new index ti get a multilevel index
        currencies[pair].columns = pd.MultiIndex.from_arrays(cindex, names=['currency', 'candle'])
#        print(currencies[pair])
    combined_curr = currencies['xrp_usdt'].merge(currencies['btc_usdt'], how='outer', 
                                                 left_index=True, right_index=True)
    print(combined_curr)
    return combined_curr


def add_asset_summary_labels(aggregations: dict):
    "target = achieved if improvement > 1% without intermediate loss of more than 0.2%"

    time_aggs = list(aggregations.keys())
    time_agg = '1T'
    print(time_agg)
    df = aggregations[time_agg]
    labeldf = df[['buy_tg', 'sell_tg', 'max_profit']]
    labeldf.columns=['buy_tg_' + time_agg, 'sell_tg_' + time_agg, 'max_profit_' + time_agg]
    for time_agg in time_aggs:
        if time_agg != '1T':
            print(time_agg)
            df = aggregations[time_agg]
            df_extract  = df[['buy_tg', 'sell_tg', 'max_profit']]
            df_extract = df_extract.resample('1T').bfill()
            df_extract.columns=['buy_tg_' + time_agg, 'sell_tg_' + time_agg, 'max_profit_' + time_agg]
            labeldf = labeldf.merge(df_extract, how='left', left_index=True, right_index=True)
        print(labeldf)
    return labeldf


# To be investigated

In [None]:
def add_target_labels(df):
    "target = achieved if improvement > 1% with intermediate Close loss not lower than start Close"
    # df.tg = 0 means not yet checked; < 0 is negative tg of delta < 0; > 0 is tg with best improvement > 1%
    df['tg'] = ltg = 0
    df['change'] = 0.
    df['other'] = df.close
    delta = 0.
    for ltg in range(-1, -5, -1) : # tg = time gap; max time gap 4h = 60*4 T(minutes)
        loss_check = (df.close.tshift(ltg) - df.other) / df.other * 1000 # delta in per mille: 1% == 10
        delta = (df.close.tshift(ltg) - df.close) / df.close * 1000 # delta in per mille: 1% == 10
        df.loc[(loss_check > 0) & (df.tg == 0), 'other'] = df.close.tshift(ltg) # note new high in other
        df.loc[(loss_check < -2) & (df.tg == 0), ['tg', 'change']] = [ltg, delta] # equals < -0.2% loss from last high
# reports error: ValueError: setting an array element with a sequence.
# code snippet shows it should work

        df.loc[(delta < 0) & (df.tg == 0), ['tg', 'change']] = [ltg, delta] # equals any loss from start
# doesn't work: df.loc[(delta > 1) & (df.close.tshift(ltg) > df.close.tshift(df.tg)) & (df.tg >= 0), 'tg'] = ltg
        df.loc[(delta > 10) & (df.tg == 0), ['tg', 'change']] = [-ltg, delta]

        df.loc[(df.tg == ltg) | (df.tg == -ltg), 'change'] = delta
# reports error: ValueError: Must have equal len keys and value when setting with an iterable
# although it works in a previos iteration with 1T

