In [1]:
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
import numpy as np
import backtrader as bt
import datetime  # For datetime objects
import os.path  # To manage paths
import sys  # To find out the script name (in argv[0])
import matplotlib.pylab as pylab
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.layers import LSTM
from keras.utils import np_utils
from backtrader.indicators import EMA
import backtrader.analyzers as btanalyzers
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import seaborn as sns
from collections import deque
from pandas.tseries.offsets import BDay
from IPython.display import display
from sklearn.ensemble import GradientBoostingClassifier
from sklearn import metrics
from sklearn.model_selection import cross_val_score

pylab.rcParams['figure.figsize'] = 40, 20  # that's default image size for this interactive session
pylab.rcParams['font.family'] = 'sans-serif'
pylab.rcParams['font.sans-serif'] = ['Bitstream Vera Sans']
pylab.rcParams['font.serif'] = ['Bitstream Vera Sans']
pylab.rcParams["font.size"] = "40"

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def compute_hurst_exponent(arr):
    num_lag = 18
    lag_arr = np.arange(2,2+num_lag)
    tau_arr = np.zeros(num_lag)
    #  Step through the different lags
    for i in range(num_lag):
        lag = lag_arr[i]
        #  produce price difference with lag
        arr_diff = np.diff(arr,lag)
        #  Calculate the variance of the differnce vector
        tau_arr[i] = np.sqrt(np.std(arr_diff))
    #  linear fit to double-log graph (gives power)
    m = np.polyfit(np.log10(lag_arr),np.log10(tau_arr),1)
    # calculate hurst
    hurst = m[0]*2
    return hurst

def compute_vol(arr):
    log_arr = np.log(arr)
    return_arr = np.diff(log_arr)
    vol = np.float(np.sqrt(np.cov(return_arr)))
    return vol

def compute_long_short_scale_vol_ratio(arr, long_scale ,short_scale=1):
    arr_long_scale = arr[::long_scale]
    arr_short_scale = arr[::short_scale]
    
    return compute_vol(arr_short_scale)/compute_vol(arr_long_scale)

def compute_autocorr(arr, lag):
    log_arr = np.log(arr)
    return_arr = np.diff(log_arr)
    mat_autocov = np.cov(return_arr[lag:],return_arr[:-lag])
    autocorr = mat_autocov[1,0]/np.sqrt(mat_autocov[0,0]*mat_autocov[1,1])
    return autocorr

def compute_sharpe(arr):
    ret = np.divide(np.diff(arr),arr[:-1])
    return(np.mean(ret)/np.std(ret))

In [6]:
def run_backtest(aStrategy,  startdate ='2017-01-01', fromdate = '2013-01-01', todate = '2014-01-01', duration = 30, label="s", plot = False):
    # Create a cerebro entity
    cerebro = bt.Cerebro()

    startdate = pd.to_datetime(startdate)
    enddate = startdate + BDay(duration)
    
    fromdate=startdate - BDay(100)
    todate=enddate + BDay(50)
    
    # Add a strategy
    cerebro.addstrategy(aStrategy, start_date = startdate, duration = duration, end_date=enddate)

    datapath = os.path.join('../../../datas/spx-2013-2018.txt')
    
    
    
    # Create a Data Feed
    
    data = bt.feeds.YahooFinanceCSVData(
        dataname=datapath,
        # Do not pass values before this date
        fromdate=pd.to_datetime(fromdate),
        # Do not pass values before this date
        todate=pd.to_datetime(todate),
        # Do not pass values after this date
        reverse=False)

    # Add the Data Feed to Cerebro
    cerebro.adddata(data)

    # Set our desired cash start
    cerebro.broker.setcash(100000.0)

    # Write output
    #cerebro.addwriter(bt.WriterFile, out='%s.csv'%label,csv=True)

    # Print out the starting conditions
    #print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
    start_value = cerebro.broker.getvalue()
    # Run over everything
    cerebro.run()
    
    if plot == True:
        cerebro.plot()

    # Print out the final result
    #print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
  
    
    return cerebro.broker.getvalue() - start_value

In [7]:
class MomentumStrategy(bt.Strategy):
    params = (('start_date','2010-01-01'),
              ('end_date','2018-01-01'),
             ('duration', 30),
             ('macd1', 12),
             ('macd2', 26),
             ('macd3', 9),
             ('ema1',8),
             ('ema2',28))
    
  
    def log(self, txt, dt=None):
        ''' Logging function fot this strategy'''
        dt = dt or self.datas[0].datetime.date(0)
        #print('%s, %s' % (dt.isoformat(), txt))
    
    def __init__(self):
        # Keep a reference to the "close" line in the data[0] dataseries
        self.dataclose = self.datas[0].high
        self.datadate = self.datas[0].datetime.date(0)
        self.order = None
        self.ema_short =  bt.indicators.EMA(self.datas[0], period = self.params.ema1)
        self.ema_long =  bt.indicators.EMA(self.datas[0], period = self.params.ema2)
        self.crossover = bt.indicators.CrossOver(self.ema_short, self.ema_long)
        self.diff = self.ema_short - self.ema_long
        
        self.long_q = 0
        self.short_q = 0
        self.long_p = 0
        self.short_p = 0
        
    
    def start(self):
        self.order = None
        
    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return
       
        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)

            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')

        # Write down: no pending order
        self.order = None
        
    def next(self):

        cur_date = pd.to_datetime(self.datetime.date(ago=0))
        if pd.to_datetime(self.params.start_date) <=  cur_date and pd.to_datetime(self.params.end_date) > cur_date:
            
            # Always long or short $10000 securities
            size = 10000./self.dataclose[0]

            # Simply log the closing price of the series from the reference
            self.log('Close, %.2f ; Position %f' % (self.dataclose[0],self.position.size))

            # Check if an order is pending ... if yes, we cannot send a 2nd one
            if self.order:
                return

            # Check if we are in the market
            if not self.position or (self.long_p<=2 and self.short_p <= 2):
                #Open position

                # BUY if:
                # short line cross long line from below 
                if  self.crossover[0] == 1 :
                    # BUY, BUY, BUY!!! (with all possible default parameters)
                    self.log('BUY CREATE, %.2f' % self.dataclose[0])
                    #print (self.histo.get(ago=-1, size=3))
                    #print (self.histo.get(ago=0, size=3))
                    # Keep track of the created order to avoid a 2nd order
                    self.order = self.buy(size=size)
                    self.long_q += size
                    self.long_p += 1

                # short tend to cross long
                if self.diff.get(ago=-1, size=10) > self.diff.get(ago=-2, size=10)  :
                    # BUY, BUY, BUY!!! (with all possible default parameters)
                    self.log('BUY CREATE, %.2f' % self.dataclose[0])
                    #print (self.histo.get(ago=-1, size=3))
                    #print (self.histo.get(ago=0, size=3))
                    # Keep track of the created order to avoid a 2nd order
                    self.order = self.buy(size=size)
                    self.long_q += size
                    self.long_p += 1
                # SELL if:
                 # short line cross long line from above
                if  self.crossover[0] == -1 :

                    # SELL, SELL, SELL!!! (with all possible default parameters)
                    self.log('SELL CREATE, %.2f' % self.dataclose[0])
                    #print (self.histo.get(ago=-1, size=3))
                    #print (self.histo.get(ago=0, size=3))
                    # Keep track of the created order to avoid a 2nd order
                    self.order = self.sell(size=size)
                    self.short_q += size
                    self.short_p += 1
               # short tend to diverge from long
                if self.diff.get(ago=-1, size=10) < self.diff.get(ago=-2, size=10):
                    # BUY, BUY, BUY!!! (with all possible default parameters)
                    self.log('SELL CREATE, %.2f' % self.dataclose[0])
                    #print (self.histo.get(ago=-1, size=3))
                    #print (self.histo.get(ago=0, size=3))
                    # Keep track of the created order to avoid a 2nd order
                    self.order = self.sell(size=size)
                    self.short_q += size
                    self.short_p += 1

            #Exit position
            elif self.position.size>0 :
                #Exit the long position
                if len(self) > self.bar_executed+40 or np.all(np.abs(self.diff.get(ago=-1, size=10)) < np.abs(self.diff.get(ago=-2, size=10))):
                    # SELL, SELL, SELL!!! (with all possible default parameters)
                    self.log('[Exit]SELL CREATE, %.2f' % self.dataclose[0])

                    # Keep track of the created order to avoid a 2nd order
                    self.order = self.sell(size= self.long_q)
                    self.long_q = 0
                    self.long_p = 0
            elif self.position.size<0:
                #Exit the short position
                if len(self) > self.bar_executed+40 or np.all(np.abs(self.diff.get(ago=-1, size=10)) < np.abs(self.diff.get(ago=-2, size=10))) :

                    self.log('[Exit]BUY CREATE, %.2f' % self.dataclose[0])

                    # Keep track of the created order to avoid a 2nd order
                    self.order = self.buy(size = self.short_q)
                    self.short_q = 0
                    self.short_p = 0
        if  pd.to_datetime(self.params.end_date) == cur_date:
            if self.position.size != 0:
                self.order = self.close()


In [8]:
# test case
pnl = run_backtest(MomentumStrategy, startdate = '2013-05-10', duration = 30, label='mo')     

In [9]:
# Create a Stratey
class MeanReversionStrategy(bt.Strategy):
    params = (
        ('start_date','2010-01-01'),
        ('end_date','2018-01-01'),
        ('duration', 30),
        ('period_BB', 15),
        ('devfactor', 1.2)
    )

    def log(self, txt, dt=None):
        ''' Logging function fot this strategy'''
        dt = dt or self.datas[0].datetime.date(0)
        #print('%s, %s' % (dt.isoformat(), txt))

    def __init__(self):
        # Keep a reference to the "close" line in the data[0] dataseries
        self.dataclose = self.datas[0].close
        
        
        # Add a MovingAverageSimple indicator
        self.midband = ma = bt.indicators.SimpleMovingAverage(self.datas[0], period=self.params.period_BB)
        stddev = self.params.devfactor * bt.indicators.StdDev(self.datas[0], ma, period=self.params.period_BB,
                                           movav=bt.indicators.SimpleMovingAverage)
        self.topband = ma + stddev
        self.botband = ma - stddev
        
        # To keep track of pending orders
        self.order = None
        
    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return

        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)

            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')

        # Write down: no pending order
        self.order = None


    def next(self):
        cur_date = pd.to_datetime(self.datetime.date(ago=0))

        if pd.to_datetime(self.params.start_date) <=  cur_date and pd.to_datetime(self.params.end_date) > cur_date:
            
            # Always long or short $10000 securities
            size = 10000./self.dataclose[0]

            # Simply log the closing price of the series from the reference
            self.log('Close, %.2f ; Position %f' % (self.dataclose[0],self.position.size))

            # Check if an order is pending ... if yes, we cannot send a 2nd one
            if self.order:
                return

            # Check if we are in the market
            if not self.position:
                #Open position
                if self.dataclose[0] <= self.botband[0]:

                    # BUY, BUY, BUY!!! (with all possible default parameters)
                    self.log('{mr}BUY CREATE, %.2f' % self.dataclose[0])

                    # Keep track of the created order to avoid a 2nd order
                    self.order = self.buy(size=size)

                if self.dataclose[0] >= self.topband[0]:
                    # SELL, SELL, SELL!!! (with all possible default parameters)
                    self.log('{mr}SELL CREATE, %.2f' % self.dataclose[0])

                    # Keep track of the created order to avoid a 2nd order
                    self.order = self.sell(size=size)

            #Exit position
            elif self.position.size>0:
                #Reverse the long position
                if self.dataclose[0] >= self.topband[0]:
                    # SELL, SELL, SELL!!! (with all possible default parameters)
                    self.log('{mr}[Reverse]SELL CREATE, %.2f' % self.dataclose[0])

                    # Keep track of the created order to avoid a 2nd order
                    self.close()
                    self.order = self.sell(size=size)

                #Exit the long position
                elif self.dataclose[0] > self.midband[0]:
                    # SELL, SELL, SELL!!! (with all possible default parameters)
                    self.log('{mr}[Exit]SELL CREATE, %.2f' % self.dataclose[0])

                    # Keep track of the created order to avoid a 2nd order
                    self.order = self.close()

            elif self.position.size<0:
                if self.dataclose[0] <= self.botband[0]:

                    # BUY, BUY, BUY!!! (with all possible default parameters)
                    self.log('{mr}[Reverse]BUY CREATE, %.2f' % self.dataclose[0])

                    # Keep track of the created order to avoid a 2nd order
                    self.close()
                    self.order = self.buy(size=size)


                #Exit the short position
                elif self.dataclose[0] < self.midband[0]:

                    self.log('{mr}[Exit]BUY CREATE, %.2f' % self.dataclose[0])

                    # Keep track of the created order to avoid a 2nd order
                    self.order = self.close()
        if  pd.to_datetime(self.params.end_date) == cur_date:
            if self.position.size != 0:
                self.order = self.close() 
 

In [10]:
# test cas
pnl = run_backtest(MeanReversionStrategy, startdate = '2013-05-05', duration = 10, label='mr')     

training while testing
get data from training, and get label from runing the function
features:
1.Close.pct_change

2.Open.pct_change

3.HURST EXPO

4.SHARPE

5.volume

6.ema_diff

7.MACD

8.MACD_LINE

9.MACD_HIST

before the start of test_date:
    for each date:
        record indicators and generate label
at the start of test_date:
    train the model with indicators
    apply the model to each day and generate decision
    
1: mean reversion
0: momentum

In [11]:
class combined_train_test(bt.Strategy):
    params = (
        ('start_date','2010-01-01'),
        ('end_date','2010-01-01'),
        ('collecting_data_date','2018-01-01'),
        ('duration', 30),
        ('period_BB', 15),
        ('devfactor', 1.2),
        ('macd1', 12),
        ('macd2', 26),
        ('macd3', 9),
        ('ema1',8),
        ('ema2',28)
    )
    
    
    def compute_hurst_exponent(arr):
        num_lag = 18
        lag_arr = np.arange(2,2+num_lag)
        tau_arr = np.zeros(num_lag)
                #  Step through the different lags
        for i in range(num_lag):
            lag = lag_arr[i]
                    #  produce price difference with lag
            arr_diff = np.diff(arr,lag)
                    #  Calculate the variance of the differnce vector
            tau_arr[i] = np.sqrt(np.std(arr_diff))
                #  linear fit to double-log graph (gives power)
        m = np.polyfit(np.log10(lag_arr),np.log10(tau_arr),1)
                # calculate hurst
        hurst = m[0]*2
        return hurst
            
    def compute_sharpe(arr):
        ret = np.divide(np.diff(arr),arr[:-1])
        return(np.mean(ret)/np.std(ret))
    
    def log(self, txt, dt=None):
        ''' Logging function fot this strategy'''
        dt = dt or self.datas[0].datetime.date(0)
        #print('%s, %s' % (dt.isoformat(), txt))
        
    def __init__(self):
        # Keep a reference to the "close" line in the data[0] dataseries
        self.dataclose = self.datas[0].close
        self.dataopen = self.datas[0].open
        self.datahigh = self.datas[0].high
        self.datalow = self.datas[0].low
        self.datavolume = self.datas[0].volume
        
        # Add a LSTM
        self.model = Sequential()
        
        self.model.add(LSTM(32, return_sequences=True, activation='sigmoid',
               input_shape=(6, 11)))  # returns a sequence of vectors of dimension 32
        self.model.add(Dropout(0.2))
        self.model.add(LSTM(32, return_sequences=True, activation='sigmoid'))  # returns a sequence of vectors of dimension 32
        self.model.add(Dropout(0.2))
        self.model.add(LSTM(32))  # return a single vector of dimension 32
        self.model.add(Dense(1, activation='sigmoid'))

        self.model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

        self.estimator = GradientBoostingClassifier(n_estimators=500)
        self.scaler = None
        
        # Add a MovingAverageSimple indicator
        self.midband = ma = bt.indicators.SimpleMovingAverage(self.datas[0], period=self.params.period_BB)
        stddev = self.params.devfactor * bt.indicators.StdDev(self.datas[0], ma, period=self.params.period_BB,
                                           movav=bt.indicators.SimpleMovingAverage)
        self.topband = ma + stddev
        self.botband = ma - stddev
        
        
        # Add Momentum indicators
        self.ema_short =  bt.indicators.EMA(self.datas[0], period = self.params.ema1)
        self.ema_long =  bt.indicators.EMA(self.datas[0], period = self.params.ema2)
        self.macd = bt.indicators.MACD(self.data,
                                       period_me1=self.params.macd1,
                                       period_me2=self.params.macd2,
                                       period_signal=self.params.macd3)
        
        self.histo = self.macd.macd - self.macd.signal
        self.crossover = bt.indicators.CrossOver(self.ema_short, self.ema_long)
        self.diff = self.ema_short - self.ema_long
        
        self.long_q = 0
        self.short_q = 0
        self.long_p = 0
        self.short_p = 0
        
        # containers:
        self.date = []
        self.close = []
        self.open = []
        self.high = []
        self.low = []
        self.volume = []
        self.ema1 = []
        self.ema2 = []
        self.macd_line = []
        self.macd_signal = []
        self.macd_hist = []
        self.pnl = []
        
        # define building model_day
        self.model_building_day = pd.to_datetime(self.params.collecting_data_date)+BDay(10)
        self.trade_day = pd.to_datetime(self.params.collecting_data_date)+BDay(30)
        # To keep track of pending orders
        self.total_pnl = 0
        self.mo_value = 0
        self.mr_value = 0
        self.order = None 
    
    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # Buy/Sell order submitted/accepted to/by broker - Nothing to do
            return

        # Check if an order has been completed
        # Attention: broker could reject order if not enough cash
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log('BUY EXECUTED, %.2f' % order.executed.price)
            elif order.issell():
                self.log('SELL EXECUTED, %.2f' % order.executed.price)

            self.bar_executed = len(self)

        elif order.status in [order.Canceled, order.Margin, order.Rejected]:
            self.log('Order Canceled/Margin/Rejected')

        # Write down: no pending order
        self.order = None

    def next(self):
     
        cur_date = pd.to_datetime(self.datetime.date(ago=0))
        if cur_date <= pd.to_datetime(self.params.collecting_data_date):
            
            pnl_mr = run_backtest(MeanReversionStrategy, startdate = cur_date, duration = 30, label='mr') 
            pnl_mo = run_backtest(MomentumStrategy, startdate = cur_date, duration = 30, label='mo')
            if pnl_mr > pnl_mo:
                self.pnl.append(int(1))
            if pnl_mr <= pnl_mo:
                self.pnl.append(int(0))
            self.date.append(cur_date)
            self.close.append(self.dataclose[0])
            self.open.append(self.dataopen[0])
            self.high.append(self.datahigh[0]) 
            self.low.append(self.datalow[0]) 
            self.volume.append(int(self.datavolume[0])) 
            self.ema1.append(self.ema_short[0]) 
            self.ema2.append(self.ema_long[0]) 
            self.macd_line.append(self.macd.macd[0])
            self.macd_signal.append(self.macd.signal[0])
            self.macd_hist.append(self.histo[0])
       
        if cur_date == pd.to_datetime(self.model_building_day):
            # build the framework and start training
            df_date = np.array(self.date)
            df_close = np.array(self.close)
            df_open = np.array(self.open)
            df_high = np.array(self.high)
            df_low = np.array(self.low)
            df_vol = np.array(self.volume)
            df_ema_diff = np.array(self.ema1) - np.array(self.ema2)
            df_macd = np.array(self.macd_line)
            df_signal = np.array(self.macd_signal)
            df_hist = np.array(self.macd_hist)
            df_label = np.array(self.pnl)
            df = pd.DataFrame({'date':df_date, 'close':df_close, 'open':df_open, 'high':df_high,
                            'low':df_low, 'volumn':df_vol, 'ema_diff':df_ema_diff, 'macd':df_macd, 
                            'macd_signal':df_signal, 'macd_hist':df_hist,'label':df_label, }, 
                              
                           columns= ['date','close','open','high','low','volumn','ema_diff','macd',
                                     'macd_signal','macd_hist','label',],
                             )
    
            # define hurst exponent and Sharpe ratio
            
            
            window_indicators = 30
            Indicators = pd.DataFrame(columns=["hurst_exponent",
                                               "sharpe_ratio",
                                                "Volume_pct",
                                                'Open_pct',
                                                'Close_pct',
                                                'High_pct',
                                                'Low_pct',
                                                'ema_diff',
                                                'macd',
                                                'macd_signal',
                                                'macd_hist',
                                                'label'])
            Indicators.loc[:,"hurst_exponent"] =df['close'].rolling(window=window_indicators).apply(compute_hurst_exponent)
            Indicators.loc[:,"sharpe_ratio"] = df['close'].rolling(window=window_indicators).apply(compute_sharpe)
            Indicators.loc[:,"Volume_pct"] = df['volumn'].pct_change()
            Indicators.loc[:,"Low_pct"] = df['low'].pct_change()
            Indicators.loc[:,"High_pct"] = df['high'].pct_change()
            Indicators.loc[:,"Close_pct"] = df['close'].pct_change()
            Indicators.loc[:,"Open_pct"] = df['open'].pct_change()
            Indicators.loc[:,"ema_diff"] = df['ema_diff']
            Indicators.loc[:,"macd"] = df['macd']
            Indicators.loc[:,"macd_signal"] = df['macd_signal']
            Indicators.loc[:,"macd_hist"] = df['macd_hist']
            Indicators.loc[:,'label'] = df['label']
            print (len(Indicators))
            # skip first 35
            offset = 35
            length = 6
            # prepare the data
            features = []
            labels = []
            for i in range(offset, len(Indicators)):
                feature =Indicators[["hurst_exponent",
                                               "sharpe_ratio",
                                                "Volume_pct",
                                                'Open_pct',
                                                'Close_pct',
                                                'High_pct',
                                                'Low_pct',
                                                'ema_diff',
                                                'macd',
                                                'macd_signal',
                                                'macd_hist']][i-length:i].values
                features.append(feature)
                label = Indicators['label'][i]
                labels.append(label)   
   
            features = np.array(features)
            labels= (np.array(labels))
            

            train_len = 200
            test_len = 50
            train_x = np.array(features[:train_len])
            train_y = np.array(labels[:train_len])
            test_x = np.array(features[train_len+30:train_len+30+test_len])
            test_y = np.array(labels[train_len+30:train_len+30+test_len])
            

            def scale(train, test):
                # fit scaler
                a,b,c = train.shape
                train = train.reshape(a ,b * c)
                scaler = MinMaxScaler(feature_range=(-1, 1))
                scaler = scaler.fit(train)
                # transform train
                #train = train.reshape(train.shape[0], train.shape[1])
                train_scaled = scaler.transform(train)
                #train_scaled = train_scaled.reshape(a,b,c)
           
                e,f,g = test.shape
                test = test.reshape(e ,f * g)
                test_scaled = scaler.transform(test)
               # test_scaled = test_scaled.reshape(e,f,g)
         
                return scaler, train_scaled, test_scaled
       
 
            self.scaler, train_x, test_x= scale(train_x, test_x)
            #self.model.fit(train_x, train_y,  batch_size=1, epochs=50)
            self.estimator.fit(train_x, train_y)
            prediction_boost = self.estimator.predict(test_x)
            print('resuls for SGB')
            print('true_label',test_y)
            print('predicted_label', prediction_boost )
            print (metrics.accuracy_score(prediction_boost, test_y))
        # start trading
        if cur_date >= pd.to_datetime(self.trade_day):
            # collect the data first
            close_data = self.dataclose.get(ago=0, size=40)
            open_data = self.dataopen.get(ago=0, size=40)
            high_data = self.datahigh.get(ago=0, size=40)
            low_data = self.datalow.get(ago=0, size=40)
            vol_data = self.datavolume.get(ago=0, size=40)
            ema1_data = self.ema_short.get(ago=0, size=40)
            ema2_data = self.ema_long.get(ago=0, size=40)
            ema_diff_data = np.array(ema1_data) - np.array(ema2_data)
            macd_data = self.macd.macd.get(ago=0, size=40)
            signal_data = self.macd.signal.get(ago=0, size=40)
            hist_data = self.histo.get(ago=0, size=40)
          
            df = pd.DataFrame({ 'close':close_data, 'open':open_data, 'high':high_data,
                            'low':low_data, 'volumn':vol_data, 'ema_diff':ema_diff_data, 'macd':macd_data, 
                            'macd_signal':signal_data, 'macd_hist':hist_data }, 
                              
                           columns= ['close','open','high','low','volumn','ema_diff','macd',
                                     'macd_signal','macd_hist'],
                             )
            
            
            window_indicators = 30
            Indicators = pd.DataFrame(columns=["hurst_exponent",
                                               "sharpe_ratio",
                                                "Volume_pct",
                                                'Open_pct',
                                                'Close_pct',
                                                'High_pct',
                                                'Low_pct',
                                                'ema_diff',
                                                'macd',
                                                'macd_signal',
                                                'macd_hist',
                                                ])
            Indicators.loc[:,"hurst_exponent"] =df['close'].rolling(window=window_indicators).apply(compute_hurst_exponent)
            Indicators.loc[:,"sharpe_ratio"] = df['close'].rolling(window=window_indicators).apply(compute_sharpe)
            Indicators.loc[:,"Volume_pct"] = df['volumn'].pct_change()
            Indicators.loc[:,"Low_pct"] = df['low'].pct_change()
            Indicators.loc[:,"High_pct"] = df['high'].pct_change()
            Indicators.loc[:,"Close_pct"] = df['close'].pct_change()
            Indicators.loc[:,"Open_pct"] = df['open'].pct_change()
            Indicators.loc[:,"ema_diff"] = df['ema_diff']
            Indicators.loc[:,"macd"] = df['macd']
            Indicators.loc[:,"macd_signal"] = df['macd_signal']
            Indicators.loc[:,"macd_hist"] = df['macd_hist']
            features = Indicators.iloc[-6:].values
            b,c = features.shape
            features = features.reshape(1 , b * c)
            features = self.scaler.transform(features)
            regime = self.estimator.predict(features)
            
            #print(regime,'0 is mo, 1 is mr')
            #print('mr makes:', run_backtest(MeanReversionStrategy, startdate = cur_date, duration = 30, label='mr'))
            #print('mo makes:', run_backtest(MomentumStrategy, startdate = cur_date, duration = 30, label='mo'))
            
            # now executing strategy
            # momentum
            self.mo_value += run_backtest(MomentumStrategy, startdate = cur_date, duration = 30, label='mo')
            self.mr_value += run_backtest(MeanReversionStrategy, startdate = cur_date, duration = 30, label='mr') 
            
            if regime == 0:
                self.total_pnl += run_backtest(MomentumStrategy, startdate = cur_date, duration = 30, label='mo') 
                
            if regime == 1:
                self.total_pnl += run_backtest(MeanReversionStrategy, startdate = cur_date, duration = 30, label='mr')
            print(self.trade_day)
            print(self.mo_value)
            print(self.mr_value)
            print(self.total_pnl)
            print('################################')
cerebro = bt.Cerebro()

    
    
cerebro.addstrategy(combined_train_test, collecting_data_date = '2017-06-06')

datapath = os.path.join('../../../datas/spx-2013-2018.txt')
    
    
    
    # Create a Data Feed
    
data = bt.feeds.YahooFinanceCSVData(
        dataname=datapath,
        # Do not pass values before this date
        fromdate=pd.to_datetime('2016-01-01'),
        # Do not pass values before this date
        todate=pd.to_datetime('2018-01-01'),
        # Do not pass values after this date
        reverse=False)

    # Add the Data Feed to Cerebro
cerebro.adddata(data)

    # Set our desired cash start
cerebro.broker.setcash(100000.0)

    # Write output
    #cerebro.addwriter(bt.WriterFile, out='%s.csv'%label,csv=True)

    # Print out the starting conditions
#print('Starting Portfolio Value: %.2f' % cerebro.broker.getvalue())
start_value = cerebro.broker.getvalue()
    # Run over everything
cerebro.run()
    


    # Print out the final result
#print('Final Portfolio Value: %.2f' % cerebro.broker.getvalue())
  
    
    


326
resuls for SGB
true_label [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 0 0 0 0 0 0 0 0 0 0 0]
predicted_label [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 0 0 0 0 0 1 0 1 0 1 1]
0.88
2017-07-18 00:00:00
144.2469261676015
14.370904284529388
14.370904284529388
################################
2017-07-18 00:00:00
157.4305841071182
144.15226426057052
144.15226426057052
################################
2017-07-18 00:00:00
127.84852247260278
240.9495886956429
240.9495886956429
################################
2017-07-18 00:00:00
97.95228505415434
372.98277872522885
372.98277872522885
################################
2017-07-18 00:00:00
964.4870184176398
84.04261389475141
84.04261389475141
################################
2017-07-18 00:00:00
883.0021499754075
274.41788612050004
274.41788612050004
################################
2017-07-18 00:00:00
787.6437741947302
441.059482473589
179.05951033982274
##################

[<__main__.combined_train_test at 0x7fcadf6a1ac8>]