In [None]:
%%bash 
catalyst ingest-exchange -x binance -i ltc_usdt -f minute


In [None]:
%%bash 
catalyst ingest-exchange -x binance -i xrp_usdt -f minute
catalyst ingest-exchange -x binance -i btc_usdt -f minute
catalyst ingest-exchange -x binance -i eth_usdt -f minute
catalyst ingest-exchange -x binance -i bnb_usdt -f minute
catalyst ingest-exchange -x binance -i eos_usdt -f minute
catalyst ingest-exchange -x binance -i neo_usdt -f minute
catalyst ingest-exchange -x binance -i trx_usdt -f minute
catalyst ingest-exchange -x binance -i ltc_usdt -f minute


In [None]:
# start terminal from anaconda/environments/catalyst
# in terminal the catalyst environment should be active, which can be checked by conda info --all
# jupyter lab work doesn't work as it doesn't switch to the catalyst environment
# start jupyter notebook by: jupyter notebook
%load_ext catalyst
# required to activate catalyst magic words 


# Setup matplotlib to display graphs inline in this Notebook
%matplotlib inline


In [None]:
import pixiedust
pixiedust.optOut()

# magic line to be inserted as first line of cell
# %%pixie_debugger


# Get Catalyst historic data 

In [None]:
import os
import pandas as pd
import pickle
import pixiedust

from catalyst.api import symbol, symbols
from catalyst.protocol import BarData
cur_cand = ['xrp_usdt', 'btc_usdt'] 
data_keys = ['open', 'high', 'low', 'close', 'volume'] # , 'price'


def catalyst2picklepandas(context, data: BarData):
    "reads catalyst data for trading pairs and stores them in a testfile for subsequent usage"
    test = currencies = dict()
    filename = os.getcwd() + '/df-test.pydata'

    for pair in cur_cand:
        current = data.history(symbol(pair), data_keys, 239*24*60, '1T')
        currencies[pair] = current
        print(current.head())
        print(current.tail())

    print("got catalyst history data")
    df_f = open(filename, 'wb')
    pickle.dump(currencies, df_f)
    df_f.close()
    print("data frame is written")
#    df_f = open(filename, 'rb')
#    test = pickle.load(df_f)
#    df_f.close()
#    print(test)
    return None

def feature_normalize(filename: str):
    currencies = dict()
    df_f = open(filename, 'rb')
    currencies = pickle.load(df_f)
    df_f.close()
#    combined_curr = combine_catalyst_data(currencies)
    aggregate_currencies = pair_aggregation(currencies)
    return None


# Features and Labels

In [None]:
import os
import pandas as pd
import pickle

agg_minutes = [1, 2] # minutes = T
time_aggregations = {'1T': 4, '2T': 4} # , '4T':4
minute_data = pd.DataFrame() # required to use minute data in apply
#time_aggregations = dict(zip(zip([str(n) for n in agg_minutes] + ['T' for x in agg_minutes]), [4 for x in agg_minutes])) # , '4T':4
print(time_aggregations)
vol_base_period = '1D'
sell_threshold = -2 # in per mille
buy_threshold = 10 # in per mille
transaction_fee = 1 # in per mille, i.e. 0,1%
best_n = 10

def check_tag(tg, ltg):
    return abs(tg) == abs(ltg)
    
def add_period_specific_labels(df: pd.DataFrame):
    "target = achieved if improvement > 1% without intermediate loss of more than 0.2%"

#            df.apply(lambda x: integrate_f(x['a'], x['b'], x['N']), axis=1)
#            s = pd.concat([ (pd.Series(vwap(df.iloc[i:i+window]), index=[df.index[i+window]])) for i in range(len(df)) ]);
#            a = df.iloc[1, df.columns.get_loc('Name')]
#            df1.iat[[1, 3, 5], [1, 3]]

    df['change'] = 0.
    df['label'] = lastlabel = "-"
    pix = df.columns.get_loc('change') # performance column index
    lix = df.columns.get_loc('label')
    cix = df.columns.get_loc('close')
    win = loss = 0.
    for tix in range(1, len(df)) : # tix = time index
        last_close = df.iat[tix-1, cix]
        delta = (df.iat[tix, cix] - last_close) / last_close * 1000 #  in per mille: 1% == 10
        df.iat[tix, pix] = delta 
        if delta < 0 :
            if loss < 0 : # loss monitoring is running
                loss += delta
            else : # first time bar of decrease period
                lossix = tix
                loss = delta
            if loss < sell_threshold : # reset win monitor because dip exceeded threshold
                win = 0.
                if lastlabel != "sell" : # only one signal without further repeat
                    df.iat[lossix, lix] = lastlabel = "sell"
                    last_close = df.iat[lossix-1, cix]
                    df.iat[lossix, pix] = (df.iat[lossix, cix] - last_close) / last_close * 1000 # - transaction_fee
                lossix += 1
            if win > 0 : # win monitoring is running
                win += delta
                if win < 0 : # reset win monitor because it is below start price
                    win = 0.
        elif delta > 0 :
            if win > 0 : # win monitoring is running
                win += delta
            else : # first time bar of increase period
                winix = tix
                win = delta
            if win > buy_threshold : # reset win monitor because dip exceeded threshold
                if lastlabel != "buy" : # only one signal without further repeat
                    df.iat[winix, lix] = lastlabel = "buy"
                    last_close = df.iat[winix-1, cix]
                    df.iat[winix, pix] = (df.iat[winix, cix] - last_close) / last_close * 1000 # - transaction_fee
                winix += 1
            if loss < 0 : # loss monitoring is running
                loss += delta
                if loss > 0 :
                    loss = 0. # reset loss monitor as it recovered before before triggered sell threshold

    print(df[['close', 'label', 'change']])

                
        

def derive_features(df: pd.DataFrame):
    "calc derived candle features in relation to price based on the provided time aggregated dataframe df"
    # price changes in 1/1000
    df['height'] = (df['high'] - df['low']) / df['close'] * 1000
    df.loc[df['close'] > df['open'], 'top'] = (df['high'] - df['close']) / df['close'] * 1000
    df.loc[df['close'] <= df['open'], 'top'] = (df['high'] - df['open']) / df['close'] * 1000
    df.loc[df['close'] > df['open'], 'bottom'] = (df['open'] - df['low']) / df['close'] * 1000
    df.loc[df['close'] <= df['open'], 'bottom'] = (df['close'] - df['low']) / df['close'] * 1000
    return None

def time_aggregation(minute_data: pd.DataFrame):
    """in: dataframe of minute data of a currency pair; 
       out: dict of dataframes of aggregations with features and targets"""
    aggregations = dict()
    time_aggs = list(time_aggregations.keys())
    for time_agg in time_aggs:
        print(time_agg)
        if time_agg is '1T':
            df = minute_data
            df['volume_change'] = (df['volume']  - df.volume.rolling(vol_base_period).median()) / df.volume.rolling(vol_base_period).median() * 100 # in %
        else :
            df = pd.DataFrame()
            df['close'] = minute_data.close.resample(time_agg, label='right', closed='right').last()
            df['high'] = minute_data.high.resample(time_agg, label='right', closed='right').max()
            df['low'] = minute_data.low.resample(time_agg, label='right', closed='right').min()
            df['open'] = minute_data.open.resample(time_agg, label='right', closed='right').first()
            df['volume_change'] = minute_data.volume_change.resample(time_agg, label='right', closed='right').mean()
        derive_features(df)
        add_period_specific_labels(df)
#        print(df)
#        print(df[['close', 'high', 'low', 'open', 'volume_change']])
        aggregations[time_agg] = df
    aggregations['CPC'] = add_asset_summary_labels(aggregations)
    return aggregations

def next_pairs_level(pairs: pd.DataFrame, level):
    "build pairs of pairs"
    paired = False
    first = pairs.loc[pairs.lvl < level] # childs can be on different levels!
    if not first.empty :
#        print('#first#')
#        print(first)
        for fp in first.index :
            second = first.loc[(first.bts > first.at[fp, 'sts'])] # & (((first.status == 'init') & (first.at[fp, 'status'] == 'init')) != True)] # if both are best == True then pair already exists
#            print('#second#')
#            print(second)
            for sp in second.index :
                if first.loc[(first.child1 == fp) & (first.child2 == sp)].empty : # otherwise pair already exists
                    paired = True
                    pairs = pairs.append(dict([('bts', first.at[fp, 'bts']), ('sts', second.at[sp, 'sts']), ('lvl', level), ('perf', first.at[fp, 'perf'] + second.at[sp, 'perf']), ('child1', fp), ('child2', sp)]), ignore_index=True)
#            pairs = pairs.append(dict([('bts',[first.iloc[fix].bts for x in second.index]), ('sts',[second.sts]), 
#                                  ('lvl',[level for x in second.index]), ('perf',[first.iloc[fix].perf + sp for sp in second.perf]), 
#                                  ('child1', [first.iloc[fix].index for x in second.index]), ('child2', [x for x in second.index])]), ignore_index=True)
        if paired :
            pairs = next_pairs_level(pairs, level + 1)
#    print('###')
#    print(pairs)
    return pairs
    
def calc_performance(row) :
#    print('==')
#    print(minute_data[['close', 'label']])
#    print('--')
#    print(row)
    return (minute_data.at[row.sts, 'close'] - minute_data.at[row.bts, 'close']) / minute_data.at[row.bts, 'close'] * 1000 - 2 * transaction_fee
                     
def mark_childs(pairs, bix) :
#    print('mark_childs')
#    print(pairs)
#    print(bix)
    pairs.at[bix, 'best'] = True
    if pairs.at[bix, 'lvl'] > 0 :
        mark_childs(pairs, pairs[bix, 'child1'])
        mark_childs(pairs, pairs[bix, 'child2'])


def add_asset_summary_labels(aggregations: dict):
    "target = achieved if improvement > 1% without intermediate loss of more than 0.2%"

    global minute_data
    minute_data = aggregations['1T']
    max_period = '1T'
    for p in iter(aggregations.keys()) :
        if aggregations[max_period].index[0].freq < aggregations[p].index[0].freq :
            max_period = p
#    print('max period'+ aggregations[max_period].index[0].freq)
    start_ts = minute_data.index[0]
    sell_ixs = aggregations[max_period].loc[aggregations[max_period].label == 'sell']
#    pairs = pd.DataFrame(columns=['bts','sts','lvl','best','perf'])
    pairs = pd.DataFrame()
    for end_ts in sell_ixs.index :
        for p in iter(aggregations.keys()) :
            buy_sigs = aggregations[p].loc[(aggregations[p].label == 'buy') & (aggregations[p].index >= start_ts) & (aggregations[p].index < end_ts)]
#            print (buy_sigs)
            for bs in buy_sigs.index :
                sell_sigs = aggregations[p].loc[(aggregations[p].label == 'sell') & (aggregations[p].index <= end_ts) & (aggregations[p].index > bs)]
#                print (sell_sigs)
#                print ('^^^')
#                pairs
#                print (dict([('bts', [bs for x in sell_sigs.index]), ('sts', [sell_sigs.index]), ('lvl', [0 for x in sell_sigs.index])]))
                for s in sell_sigs.index :
#                    pairs = pairs.append(dict([('bts', [bs for x in sell_sigs.index]), ('sts', [s for s in sell_sigs.index]), ('lvl', [0 for x in sell_sigs.index])]), ignore_index=True)
                    pairs = pairs.append(dict([('bts', bs), ('sts', s), ('lvl', 0)]), ignore_index=True)
#        print('---')
#        print(minute_data[['close', 'label']])
        pairs['perf'] = pairs.apply(calc_performance, axis=1)

        # 1st level of pairs created 
        pairs['best'] = False
        pairs = next_pairs_level(pairs, 1) # recursively create all pair levels
#        print('-pairs created-')
#        print(pairs)
#        print(pairs.dtypes)
        
        # now select only those pairs that are part of the best n paths and continue to work with those
        best_perf = pairs.nlargest(max(best_n, len(pairs.index)), 'perf')
        st = best_perf.nsmallest(1, 'sts')
        start_ts = st.at[st.index[0], 'sts']
#        print('-st-')
#        print(st)
#        print(start_ts)
        for bix in best_perf.index :
            mark_childs(pairs, bix)
#        print('-best_perf-')
#        print(pairs)
        pairs = pairs.loc[pairs.best] # reduce pairs to best_n path pairs
    best_perf = pairs.nlargest(1, 'perf')
    pairs['best'] = False
    mark_childs(pairs, best_perf.index[0])
    print('-pairs marked-')
    print(pairs)
    pairs = pairs.loc[pairs.best & (pairs.lvl == 0)] # reduce pairs to best_n path pairs on level 0
    check_result_consistency(aggregations, pairs)
    print('-pairs reduced-')
    print(pairs)
    cpc_labels = pd.DataFrame(minute_data, columns=['close'])
    cpc_labels['label']= '-'
    for bix in pairs.index :
        if cpc_labels.at[pairs.at[bix, 'bts'], 'label'] != '-' :
            print('error buy: inconsistency due to unexpected value instead of hold at timestamp')
            print(pairs.at[bix, 'bts'])
        else:
            cpc_labels.at[pairs.at[bix, 'bts'], 'label'] = 'buy'

        if cpc_labels.at[pairs.at[bix, 'sts'], 'label'] != '-' :
            print('error sell: inconsistency due to unexpected value instead of hold at timestamp')
            print(pairs.at[bix, 'sts'])
        else:
            cpc_labels.at[pairs.at[bix, 'sts'], 'label'] = 'sell'
    print(cpc_labels)
    return cpc_labels

def check_result_consistency(aggregations, pairs) :
    "consistency checks"
    perf = time_aggregations.copy()
    for perf_elem in iter(perf.keys()) :
        perf_elem = 0
    buy_list = sell_list = list()
    best_perf = 0.
    tdf = aggregations['1T']
    pairs.sort_values(by=['lvl','bts'])
    for p in pairs.index :
        bts = pairs.at[p, 'bts']
        sts = pairs.at[p, 'sts']
        check_perf = (tdf.at[sts, 'close'] - tdf.at[bts, 'close']) / tdf.at[bts, 'close'] * 1000 - 2 * transaction_fee
        if check_perf > best_perf :
            best_perf = check_perf
        
        if bts >= sts :
            print("error: intra pair sequence incorrect")

        if pairs.at[p, 'lvl'] == 0 :
            if p != pairs.index[0] :
                if bts <= laststs :
                    print("error: level 0 pairs sequence between pairs incorrect")
            laststs = sts
            
            if bts in buy_list :
                print("error: double buy in pairs")
            else :
                buy_list.append(bts)
            if sts in sell_list :
                print("error: double sell in pairs")
            else :
                sell_list.append(sts)
        elif pairs.at(p, 'lvl') > 0 :
            if (pairs.loc[(pairs.at[p, 'bts'] == pairs.at[pairs.at[p, 'child1'], 'bts'])].empty) or (pairs.loc[(pairs.at[p, 'sts'] == pairs.at[pairs.at[p, 'child2'], 'sts'])].empty) :
                print("error: can't find consistent childs")
        else : 
            print("error: unexpected level in pairs")
    for agg in iter(aggregations.keys()) :
        tdf = aggregations[agg]
        lastclose = 0.
        sigs = tdf.loc[(tdf.label == 'buy') | (tdf.label == 'sell')]
        missed_sell_start = 0
        missed_buy_end = -1
        for sig in sigs.index :
            if sigs.at[sig, 'label'] == 'buy' : 
                if lastclose == 0. : 
                    lastclose = sigs.at[sig, 'close']
                    missed_buy_end = 1
                else :
                    missed_buy_end += 1 # buy is following buy                 
            elif sigs.at[sig, 'label'] == 'sell' :
                if lastclose > 0. : 
                    perf[agg] = (sigs.at[sig, 'close'] - lastclose) / lastclose * 1000 - 2 * transaction_fee
                    lastclose = 0.
                    missed_buy_end = 0
                else :
                    if missed_buy_end < 0 : # no buy signal yet seen
                        missed_sell_start += 1
                    else :
                        pass # sell is following sell
            else :
                print("error: unexpected signal - neither buy nor sell")
        if missed_buy_end > 0 :
            print('info: missed buy signals at the end')
        if missed_sell_start > 0 :
            print('info: missed sell signals at the start')

#        bsigs = tdf.loc[(tdf.label == 'buy')]
#        check = bsigs.loc[buy_list]
#        if (len(bsigs.index) - len(check.index)) > missed_buy_end :
#            print("error: missing buy signals in pairs")
            
#        ssigs = tdf.loc[(tdf.label == 'sell')]
#        check = ssigs.loc[sell_list]
#        if (len(ssigs.index) - len(check.index)) > missed_sell_start :
#            print("error: missing sell signals in pairs")

    print('performances')
    print(best_perf)
    for agg in iter(aggregations.keys()) :
        print(agg)
        print(perf[agg])
        if perf[agg] > best_perf :
            print('error: single time aggrgation performance exceeds global best performance')


def pair_aggregation(currencies):
    "transform dict of currency dataframes to dict of currency dicts with all time aggregations"
    for pair in currencies:
        cur = currencies[pair] # take 1T currency data
        currencies[pair] = time_aggregation(cur) # exchange by all required time aggregations
    return currencies


    
def test_features_labels():
    "tests creation of features and labels with artificial data"
    df_len = 21
    df = pd.DataFrame(index = pd.date_range('2018-12-28 01:10:00', periods=df_len, freq='T'))
    cl = 100.
    cl_delta = 1.1 / 5
    df['open'] = 0.
    df['high'] = 0.
    df['low'] = 0.
    df['close'] = 0.
    df['volume'] = 10.
    
    for tf in range( 0, df_len) : 
        df.iloc[tf] = [cl- 1., cl + 0.5, cl - 2., cl, 10.]
        if tf <= 4 : #raise above 1% to trigger buy
            cl += cl_delta
        elif tf <= 5 : # fall -0.2% to trigger sell but only on minute basis
            cl -= cl_delta
            df.iloc[tf, 4] = 20.
        elif tf <= 9 : # raise above 1% with dip above -0.2% to not raise a trigger
            cl += cl_delta
        elif tf <= 13 : # raise above 1% with dip above -0.2% to not raise a trigger
            cl -= cl_delta / 4
        elif tf <= 30 : # raise above 1% with dip above -0.2% to not raise a trigger
            cl += cl_delta
                
    currencies = dict()
    currencies['tst_usdt'] = df
    return currencies


aggregate_currencies = pair_aggregation(test_features_labels())


# Catalyst Frame


In [None]:
import pytz
import pandas as pd
from datetime import datetime

from catalyst.utils.run_algo import run_algorithm
from catalyst.protocol import BarData

def initialize(context):
    context.handle_count = 0
    print("init")


def handle_data(context, data: BarData):
    
    if (context.handle_count < 1):
        catalyst2picklepandas(context, data)
#        feature_normalize(fn)

        context.handle_count = context.handle_count + 1
    return None
        

def analyze(context=None, results=None):
    pass

start = datetime(2018, 12, 18, 0, 0, 0, 0, pytz.utc)
# end = datetime(2018, 9, 24, 0, 0, 0, 0, pytz.utc)
end = datetime(2018, 12, 18, 0, 0, 0, 0, pytz.utc)
results = run_algorithm(initialize=initialize,
                        handle_data=handle_data,
                        analyze=analyze,
                        start=start,
                        end=end,
                        exchange_name='binance',
                        data_frequency='minute',
                        quote_currency ='usdt',
                        capital_base=10000 )

# Unused

In [None]:
def add_asset_summary_labels(aggregations: dict):
    "target = achieved if improvement > 1% without intermediate loss of more than 0.2%"

    df_1t = aggregations['1T']
    ix_1t = aggregations['1T'].index
    res_keys = ['freqkey', 'buyix', 'sellix', 'performance', 'status']
    res = pd.DataFrame(columns=res_keys)
    res = res.append(pd.DataFrame(dict(zip(res.columns, [['a'], ['b'], ['c'], [50.], [True]]))), ignore_index=True)

    index_set = dict(zip(aggregations.keys, [0 for x in aggregations.keys]))
    for ix1t in range(0, len(ix_1t)) : # tix = timestamp index of 1T index
        for tkey in aggregations.keys :
            df = aggregations[tkey]
            if index_set[tkey] < len(df.index) :
                if ix_1t[ix1t] == df.index[index_set[tkey]] : # common timestamp
                    rdf = df.iloc[index_set[tkey]] # work with that row
                    if rdf.label == "buy" :
                        res = res.append(pd.DataFrame({'freqkey':[tkey], 'buyix':[index_set[tkey]], 'status':['open']}), ignore_index=True)
                    elif rdf.label == "sell" :
                        if not res.loc[(res.freqkey == tkey) & (res.status == 'open')].empty :
                            res.loc[(res.freqkey == tkey) & (res.status == 'open'), ['sellix', 'status']] = [index_set[tkey], 'close']
                    index_set[tkey]+= 1
    #now go through the list of pairs and remove all open orphans and create for closed pairs a pair of pair list that can be combined from an sequence perspective
    res = res.loc[(res.freqkey == tkey) & (res.status == 'close')]
    res_buy = res.sort_values(by=['buyix'])
    res_sell = res.sort_values(by=['sellix'])
    # calculate performance only for possible pathes
    perf = df_1t.iloc[ix1t]['performance'] # open: stepwise adapt performance or calculate at sell with roll up call?
    perf -= transaction_fee
    # open: add 'res' to aggregations                 
    return res


def combine_catalyst_data(currencies):
    "unused: receive a dictionary of dataframes and returns a single multiindex dataframe"
    combined_curr = None
    cindex = []
    for pair in currencies:
        cindex.clear()
        datakeys = [dkey for dkey in currencies[pair].keys()]
        currkeys = [pair for x in datakeys]
        cindex = [currkeys, datakeys]
        
        #simply set the column attribute to the new index ti get a multilevel index
        currencies[pair].columns = pd.MultiIndex.from_arrays(cindex, names=['currency', 'candle'])
#        print(currencies[pair])
    combined_curr = currencies['xrp_usdt'].merge(currencies['btc_usdt'], how='outer', 
                                                 left_index=True, right_index=True)
    print(combined_curr)
    return combined_curr


def add_asset_summary_labels(aggregations: dict):
    "target = achieved if improvement > 1% without intermediate loss of more than 0.2%"

    time_aggs = list(aggregations.keys())
    time_agg = '1T'
    print(time_agg)
    df = aggregations[time_agg]
    labeldf = df[['buy_tg', 'sell_tg', 'max_profit']]
    labeldf.columns=['buy_tg_' + time_agg, 'sell_tg_' + time_agg, 'max_profit_' + time_agg]
    for time_agg in time_aggs:
        if time_agg != '1T':
            print(time_agg)
            df = aggregations[time_agg]
            df_extract  = df[['buy_tg', 'sell_tg', 'max_profit']]
            df_extract = df_extract.resample('1T').bfill()
            df_extract.columns=['buy_tg_' + time_agg, 'sell_tg_' + time_agg, 'max_profit_' + time_agg]
            labeldf = labeldf.merge(df_extract, how='left', left_index=True, right_index=True)
        print(labeldf)
    return labeldf

def add_period_specific_labels(df: pd.DataFrame):
    "target = achieved if improvement > 1% without intermediate loss of more than 0.2%"

    df['sell_tg'] = df['dip_tg'] = df['buy_tg'] = 0
    df['loc_max'] = df['buy_max'] = df.close
    for ltg in range(-1, -(20), -1) : # tg = time gap; max time gap 4h = 60*4 T(minutes)
        df['loss_check'] = (df.close.tshift(ltg) - df.loc_max) / df.loc_max * 1000 #  in per mille: 1% == 10
        df['max_profit'] = (df.close.tshift(ltg) - df.close) / df.close * 1000 #  in per mille: 1% == 10

        # calculate sell signals
        df.loc[(df.max_profit > 0) & (df.sell_tg == 0), 'sell_tg'] = -ltg # equals no loss from start 
        df.loc[(df.max_profit < sell_threshold) & (df.sell_tg == 0), 'sell_tg'] = ltg # equals < -0.2% loss from start 

        # calculate buy signals
        df.loc[(df.loss_check > 0) & (df.dip_tg == 0), 'loc_max'] = df.close.tshift(ltg) # note new high in other
        df.loc[(df.loss_check < sell_threshold) & (df.dip_tg == 0), 'dip_tg'] = ltg # equals < -0.2% loss from last high
        df.loc[(df.max_profit > buy_threshold) & (df.max_profit > ((df.buy_max - df.close) / df.close * 1000)) & (df.dip_tg == 0), 'buy_tg'] = ltg
        df.loc[(df.max_profit > buy_threshold) & (df.max_profit > ((df.buy_max - df.close) / df.close * 1000)) & (df.dip_tg == 0), 'buy_max'] = df.close.tshift(ltg)
    df['max_profit'] = 0.
#    df.loc[(df.dip_tg < df.buy_tg) & (df.buy_tg != 0), 'dip_tg'] = 0 # if sell event happens later than buy event then remove sell signal
    df.loc[(df.buy_tg != 0), 'max_profit'] = ((df.buy_max - df.close) / df.close * 1000) # in per mille: 1% == 10

    # now cleanup    
    df.pop('loss_check')
    df.pop('loc_max')
    df.pop('buy_max')
    df.pop('dip_tg')
#    df.pop('max_profit') # max_profit is not needed anymore but handy to cross check results

    df['change'] =  df.close - df.close.tshift(1) # performance without fees
    df['label'] = "hold"
    df.loc[(df.sell_tg < 0), 'label'] = "sell"
    df.loc[(df.buy_tg < 0), 'label'] = "buy"

    #        print(df[['close', 'sell_tg', 'buy_tg', 'loc_max', 'buy_max', 'max_profit']])
    print(df[['close', 'sell_tg', 'buy_tg', 'max_profit']])



# To be investigated

In [None]:
def add_target_labels(df):
    "target = achieved if improvement > 1% with intermediate Close loss not lower than start Close"
    # df.tg = 0 means not yet checked; < 0 is negative tg of delta < 0; > 0 is tg with best improvement > 1%
    df['tg'] = ltg = 0
    df['change'] = 0.
    df['other'] = df.close
    delta = 0.
    for ltg in range(-1, -5, -1) : # tg = time gap; max time gap 4h = 60*4 T(minutes)
        loss_check = (df.close.tshift(ltg) - df.other) / df.other * 1000 # delta in per mille: 1% == 10
        delta = (df.close.tshift(ltg) - df.close) / df.close * 1000 # delta in per mille: 1% == 10
        df.loc[(loss_check > 0) & (df.tg == 0), 'other'] = df.close.tshift(ltg) # note new high in other
        df.loc[(loss_check < -2) & (df.tg == 0), ['tg', 'change']] = [ltg, delta] # equals < -0.2% loss from last high
# reports error: ValueError: setting an array element with a sequence.
# code snippet shows it should work

        df.loc[(delta < 0) & (df.tg == 0), ['tg', 'change']] = [ltg, delta] # equals any loss from start
# doesn't work: df.loc[(delta > 1) & (df.close.tshift(ltg) > df.close.tshift(df.tg)) & (df.tg >= 0), 'tg'] = ltg
        df.loc[(delta > 10) & (df.tg == 0), ['tg', 'change']] = [-ltg, delta]

        df.loc[(df.tg == ltg) | (df.tg == -ltg), 'change'] = delta
# reports error: ValueError: Must have equal len keys and value when setting with an iterable
# although it works in a previos iteration with 1T

