In [1]:
%%bash 
catalyst ingest-exchange -x binance -i xrp_usdt -f minute
catalyst ingest-exchange -x binance -i btc_usdt -f minute


Trying to ingest exchange bundle binance...
Trying to ingest exchange bundle binance...


In [1]:
# start terminal from anaconda/environments/catalyst
# in terminal the catalyst environment should be active, which can be checked by conda info --all
# jupyter lab work doesn't work as it doesn't switch to the catalyst environment
# start jupyter notebook by: jupyter notebook
%load_ext catalyst
# required to activate catalyst magic words 


# Setup matplotlib to display graphs inline in this Notebook
%matplotlib inline


In [2]:
import os
import csv
import pytz
import pandas as pd
import pickle

from datetime import datetime

from catalyst.api import record, symbol, symbols
from catalyst.utils.run_algo import run_algorithm
from catalyst.protocol import BarData
cur_cand = ['xrp_usdt', 'btc_usdt'] 
data_keys = ['open', 'high', 'low', 'close', 'volume'] # , 'price'
time_aggregations = {'1T':4, '2T':4} # , '4T':4
vol_base_period = '1D'

def initialize(context):
    context.handle_count = 0
    print("init")


def catalyst2picklepandas(context, data: BarData, filename: str):
    "reads catalyst data for trading pairs and stores them in a testfile for subsequent usage"
    test = currencies = dict()

    for pair in cur_cand:
        current = data.history(symbol(pair), data_keys, 239*24*60, '1T')
        currencies[pair] = current
        print(current.head())
        print(current.tail())

    print("got catalyst history data")
    df_f = open(filename, 'wb')
    pickle.dump(currencies, df_f)
    df_f.close()
    print("data frame is written")
#    df_f = open(filename, 'rb')
#    test = pickle.load(df_f)
#    df_f.close()
#    print(test)
    return None


def combine_catalyst_data(currencies):
    "unused: receive a dictionary of dataframes and returns a single multiindex dataframe"
    combined_curr = None
    cindex = []
    for pair in currencies:
        cindex.clear()
        datakeys = [dkey for dkey in currencies[pair].keys()]
        currkeys = [pair for x in datakeys]
        cindex = [currkeys, datakeys]
        
        #simply set the column attribute to the new index ti get a multilevel index
        currencies[pair].columns = pd.MultiIndex.from_arrays(cindex, names=['currency', 'candle'])
#        print(currencies[pair])
    combined_curr = currencies['xrp_usdt'].merge(currencies['btc_usdt'], how='outer', 
                                                 left_index=True, right_index=True)
    print(combined_curr)
    return combined_curr

def add_target_labels(df):
    "target = achieved if improvement > 1% with intermediate Close loss not lower than start Close"
    # df.tg = 0 means not yet checked; < 0 is negative tg of delta < 0; > 0 is tg with best improvement > 1%
    df['tg'] = 0
    for tg in range(-1, -5, -1) : # tg = time gap; max time gap 4h = 60*4 T(minutes)
        delta = (df.close.tshift(tg) - df.close) / df.close * 1000 # delta in per mille: 1% == 10
        df.loc[(df.tg == 0), 'other'] = df.close.tshift(tg) # delta
        df.loc[(delta < 0) & (df.tg == 0), 'tg'] = tg
# doesn't work: df.loc[(delta > 1) & (df.close.tshift(tg) > df.close.tshift(df.tg)) & (df.tg >= 0), 'tg'] = tg
        df.loc[(delta > 1) & (df.tg == 0), 'tg'] = -tg
        df.loc[(df.tg == tg) | (df.tg == -tg), 'change'] = delta


def derive_features(df):
    "calc derived candle features in relation to price based on the provided time aggregated dataframe df"
    # price changes in 1/1000
    df['delta'] = (df['close'] - df['close_prev']) / df['close'] * 1000
    df['height'] = (df['high'] - df['low']) / df['close'] * 1000
    df.loc[df['close'] > df['open'], 'top'] = (df['high'] - df['close']) / df['close'] * 1000
    df.loc[df['close'] <= df['open'], 'top'] = (df['high'] - df['open']) / df['close'] * 1000
    df.loc[df['close'] > df['open'], 'bottom'] = (df['open'] - df['low']) / df['close'] * 1000
    df.loc[df['close'] <= df['open'], 'bottom'] = (df['close'] - df['low']) / df['close'] * 1000
    return None

def time_aggregation(minute_data):
    """in: dataframe of minute data of a currency pair; 
       out: dict of dataframes of aggregations with features and targets"""
    aggregations = dict()
    keys = list(time_aggregations.keys())
    print(keys)
    for time_agg in keys:
        if time_agg is '1T':
            df = minute_data
            df['close_prev'] = df.close.tshift(1) # surprisingly open != previous close 
            df['volume_change'] = df['volume'] / df.volume.rolling(vol_base_period).mean() * 100 # in %
        else :
            df = pd.DataFrame()
            df['close'] = minute_data.close.resample(time_agg).last()
            df['close_prev'] = minute_data.close_prev.resample(time_agg).first()
            df['high'] = minute_data.high.resample(time_agg).max()
            df['low'] = minute_data.low.resample(time_agg).min()
            df['open'] = minute_data.open.resample(time_agg).first()
            df['volume_change'] = minute_data.volume_change.resample(time_agg).mean()
        derive_features(df)
        add_target_labels(df)
#        print(df.head())
        print(df.tail())
        aggregations[time_agg] = df
#        print(aggregations[time_agg])
    return aggregations


def pair_aggregation(currencies):
    "transform dict of currency dataframes to dict of currency dicts with all time aggregations"
    for pair in currencies:
        cur = currencies[pair] # take 1T currency data
#        print(cur.head())
#        print(cur.tail())
        currencies[pair] = time_aggregation(cur) # exchange by all required time aggregations
    return currencies


    
def feature_normalize(filename: str):
    currencies = dict()
    df_f = open(filename, 'rb')
    currencies = pickle.load(df_f)
    df_f.close()
#    combined_curr = combine_catalyst_data(currencies)
    aggregate_currencies = pair_aggregation(currencies)
    return None

def handle_data(context, data: BarData):
    
    if (context.handle_count < 1):
        fn = os.getcwd() + '/df-test.pydata'
        catalyst2picklepandas(context, data, fn)
#        feature_normalize(fn)

        context.handle_count = context.handle_count + 1
    return None
        

def analyze(context=None, results=None):
    pass

start = datetime(2018, 12, 18, 0, 0, 0, 0, pytz.utc)
# end = datetime(2018, 9, 24, 0, 0, 0, 0, pytz.utc)
end = datetime(2018, 12, 18, 0, 0, 0, 0, pytz.utc)
results = run_algorithm(initialize=initialize,
                        handle_data=handle_data,
                        analyze=analyze,
                        start=start,
                        end=end,
                        exchange_name='binance',
                        data_frequency='minute',
                        quote_currency ='usdt',
                        capital_base=10000 )

[2018-12-30 19:56:33.061244] INFO: run_algo: Catalyst version 0.5.21
[2018-12-30 19:56:36.064117] INFO: run_algo: running algo in backtest mode
[2018-12-30 19:56:36.765051] INFO: exchange_algorithm: initialized trading algorithm in backtest mode


init
                             close     high      low     open    volume
2018-05-05 00:00:00+00:00  0.88800  0.88980  0.88800  0.88980   4605.73
2018-05-05 00:01:00+00:00  0.88800  0.88894  0.88800  0.88894    244.65
2018-05-05 00:02:00+00:00  0.88890  0.88890  0.88800  0.88800    363.30
2018-05-05 00:03:00+00:00  0.88889  0.88890  0.88888  0.88890   4102.78
2018-05-05 00:04:00+00:00  0.89055  0.89055  0.88889  0.88889  30208.12
                             close     high      low     open   volume
2018-12-17 23:55:00+00:00  0.32712  0.32712  0.32658  0.32659  64859.7
2018-12-17 23:56:00+00:00  0.32712  0.32713  0.32679  0.32707  38881.4
2018-12-17 23:57:00+00:00  0.32714  0.32716  0.32697  0.32702  24553.6
2018-12-17 23:58:00+00:00  0.32769  0.32776  0.32688  0.32715  77926.8
2018-12-17 23:59:00+00:00  0.32721  0.32787  0.32719  0.32769  42909.8
                             close     high      low     open     volume
2018-04-23 00:00:00+00:00  8771.00  8786.99  8769.00  8785.70  2

[2018-12-30 19:56:40.071316] INFO: Performance: Simulated 1 trading days out of 1.
[2018-12-30 19:56:40.071993] INFO: Performance: first open: 2018-12-18 00:00:00+00:00
[2018-12-30 19:56:40.072648] INFO: Performance: last close: 2018-12-18 23:59:00+00:00
