In [1]:
import backtrader as bt
from backtrader import Indicator
import pyfolio as pf
import alphalens
from alphalens import performance as perf
from alphalens import plotting, utils
import mlfinlab as ml
import ta
import pandas as pd
import numpy as np
import itertools
from datetime import datetime
import arrow
import pandas_datareader.data as web
import yfinance as yf
import pickle
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.calibration import CalibratedClassifierCV

from utilities import (
    getIndicator, 
    getForwardReturns, getCleanFactor, addGroupingFactor,
    plotFactorQuantileBars, plotCumulativeReturns, plotQuantileCumulativeReturns,
    InformationTable)

from factor_library import trend, volume, volatility, momentum

  ' to position notionals.'


In [2]:
# alphalens.utils.get_clean_factor()

In [37]:
prices = pickle.load(open('/home/ubuntu/projects/trading/data/Prices_clean.pkl', 'rb'))

In [38]:
symbols = [ 'DTC.JO',
            'MSM.JO',
            'SPG.JO',
            'NPN.JO',
            'PPC.JO',
            'SNT.JO',
            'NFTRCI.JO',
            'ABSP.JO',
            'MTN.JO',
            'LEW.JO',
            'HDC.JO',
            'TKG.JO',
            'BVT.JO',
            'TWR.JO',
            'SNH.JO',
            'EMI.JO',
            'GNDP.JO',
            'EQU.JO',
            'DSY.JO',
            'RBP.JO',
            'NEWUSD.JO',
            'PGR.JO',
            'COM.JO',
            'CML.JO',
            'MRF.JO',
            'ZED.JO',
            'RMI.JO',
            'GLD.JO',
            'MRP.JO',
            'NEWGBP.JO',
            'MCZ.JO',
            'BAW.JO',
            'EPS.JO',
            'DRD.JO',
            'MUR.JO',
            'PAN.JO',
            'EXX.JO',
            'TSG.JO',
            'VOD.JO'
          ]

In [39]:
start = '2013-01-01'
end = '2017-12-31'
train_prices = {}
for symbol, data in prices.items():
    if symbol in symbols:
        data = data.loc[start:end]
        train_prices[symbol] = data

In [40]:
train_prices['BVT.JO']

Unnamed: 0_level_0,high,low,open,close,volume,adj_close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-01-01,5838.069824,5838.069824,5838.069824,5838.069824,0.0,3561.535400
2013-01-02,6015.790039,5878.979980,5933.160156,6014.169922,1673542.0,3668.965332
2013-01-03,6074.580078,5947.250000,5998.990234,6074.580078,2305048.0,3705.819336
2013-01-04,6065.370117,5990.319824,5990.319824,6014.439941,2284499.0,3669.130371
2013-01-07,6083.520020,5943.729980,6083.520020,5957.000000,2594465.0,3634.089355
...,...,...,...,...,...,...
2017-12-25,21300.000000,21300.000000,21300.000000,21300.000000,0.0,19721.218750
2017-12-26,21300.000000,21300.000000,21300.000000,21300.000000,0.0,19721.218750
2017-12-27,21737.000000,21293.000000,21293.000000,21399.000000,325953.0,19812.882812
2017-12-28,21523.000000,21251.000000,21401.000000,21470.000000,356574.0,19878.619141


In [41]:
def getLabels(datas,
              lookback = 5,
              num_days_ahead = 1,
              pt_sl = [1, 1],
              min_ret = 0.005,
              at_open = True):
    
    output = {}
    for symbol, data in datas.items():
        
        try:
            close = data['close']
            open = data['open']

            if len(close) > lookback: 

                daily_vol = ml.util.get_daily_vol(
                                close, 
                                lookback = lookback)

        
                if daily_vol.nunique() != 1:

                    cusum_events = ml.filters.cusum_filter(
                                        close,
                                        threshold = daily_vol)

                    data = data.reset_index()
                    data['event'] = np.where(data['date'].isin(cusum_events), 1, 0)
                    data.set_index('date', inplace = True)

                    vertical_barriers = ml.labeling.add_vertical_barrier(
                                            t_events = cusum_events,
                                            close = close,
                                            num_days = num_days_ahead)

                    data['vertical_barriers'] = vertical_barriers

                    triple_barrier_events = ml.labeling.get_events(
                                    close = close,
                                    t_events = cusum_events,
                                    pt_sl = pt_sl,
                                    target = daily_vol,
                                    min_ret = min_ret,
                                    vertical_barrier_times = vertical_barriers,
                                    num_threads = 1,
                                    verbose = False)

                    data = pd.concat(
                        [data, triple_barrier_events], 
                        axis = 1).drop('trgt', axis = 1)

                    labels = ml.labeling.get_bins(
                        triple_barrier_events, close, open = open, at_open = at_open)

                    data = pd.concat([data, labels], axis = 1)
                    data.index.name = 'date'

                    data['profit_taking'] = data['pt'].mul(data['trgt'])
                    data['stop_loss'] = data['sl'].mul(data['trgt']).mul(-1)

                    data.reset_index(inplace = True)

                    data['holding_period'] = pd.to_datetime(data['t1'])\
                        .sub(pd.to_datetime(data['date']))\
                        .map(lambda x: x.days)
                    data.drop(['trgt', 'pt', 'sl'], axis = 1, inplace = True)
                    data.set_index('date', inplace = True)

                    data.rename(columns = 
                                {'ret':'expected_return', 
                                 'bin':'label'
                                }, inplace = True) 
                    output[symbol] = data[data['label'] != 0]
                    
        except Exception as e:
            print(f"{symbol}: {e}")
    return output

In [42]:
def getTarget(cusumTrainPrices):
    target = pd.DataFrame()
    for symbol, data in cusumTrainPrices.items():
        
        data = data[data['event'] == 1].copy()
        label = data[['label']]
        label['asset'] = symbol
        out = label.reset_index().set_index(['date', 'asset'])
        target = pd.concat([target, out])
        
    return target.sort_index(level = 'date')

In [43]:
def modelFit(clf, X, y):
    
    train_X = X.copy()
    train_y = y.copy()
  
    model = clf
    model.fit(train_X, train_y)
    
    cal_model = CalibratedClassifierCV(model, cv = 'prefit')
    cal_model.fit(train_X, train_y)

    return cal_model

In [44]:
def allIndicators(cusumTrainPrices, trailing_volume_window = 20):

    macd_data = getIndicator(
        cusumTrainPrices,
        trend.macdDiff, n_slow = 6, n_fast = 4, n_sign = 3,
        trailing_volume_n = trailing_volume_window
    )

    volume_osc_data = getIndicator(
        cusumTrainPrices, 
        volume.getVolumeOsc, periods = 10, 
        trailing_volume_n = trailing_volume_window)

    obv_data = getIndicator(
        cusumTrainPrices, 
        volume.obv,
        trailing_volume_n = trailing_volume_window)

    dc_data = getIndicator(
        cusumTrainPrices, 
        volatility.dc, n = 20,
        trailing_volume_n = trailing_volume_window)

    wr_data = getIndicator(
        cusumTrainPrices, 
        momentum.williamsR, lbp = 50,
        trailing_volume_n = trailing_volume_window) 

    mfi_data = getIndicator(
        cusumTrainPrices, 
        volume.mfi, n = 5,
        trailing_volume_n = trailing_volume_window)

    train =  pd.concat([
               macd_data, 
               volume_osc_data,
               obv_data,
               dc_data[['dcwband']],
               wr_data,
               mfi_data
        ], axis = 1)
#     print(train.columns)
    train.columns = ['macd', 'volume_sma', 'obv', 'dochian_channel', 'williams_r', 'mfi']
    return train

In [45]:
# class testStrategy2(bt.Strategy):
    
    params = (
        ('long', 26),
        ('short', 12),
        ('macd_roll', 9),
        ('vol_p', 200),
        ('min_trail_vol', 2500000 * 16.5) #
    )
    
    def log(self, txt, dt = None):
        # logging function from the strategy
        dt = dt or self.data.datetime.date(0)
        print(f"{dt.isoformat()}, {txt}")
        
    def __init__(self):
        
        # To keep track of pending orders and buy price/commission
        self.order = None
        self.buyprice = None
        self.buycomm = None
        self.symbol = None
        self.factor = None
        self.current_position = None
        self.s_pos = 0
        self.s_neg = 0
        
        
        ### FILTERS
        # volume filter
        self.trailing_volume = {}
        for symbol in self.getdatanames():
            self.dataclose = self.getdatabyname(symbol).close
            self.datavolume = self.getdatabyname(symbol).volume
            self.d_volume = self.dataclose * self.datavolume
            self.vol = bt.indicators.SMA(self.d_volume, period = self.p.vol_p)
            
            self.trailing_volume[symbol] = self.vol
            
        # event filter
        self.dailyVolatility = {}
        self.prices = {}
        self.date = {}
        for symbol in self.getdatanames():
            self.dataclose = self.getdatabyname(symbol).close
            
            self.prices[symbol] = self.dataclose 
            
            self.date[symbol] = self.data.datetime
        
        
        ### FACTORS
        # Bollinger bands
        self.devfactor = 2
        self.factor = {}
        for symbol in self.getdatanames():
            self.dataclose = self.getdatabyname(symbol).close

            self.midband = bt.indicators.SimpleMovingAverage(self.dataclose, period = 20)
            self.topband = self.midband + self.devfactor * bt.indicators.StdDev(self.dataclose, period = 20)
            self.botband = self.midband - self.devfactor * bt.indicators.StdDev(self.dataclose, period = 20)
            
            self.factor[symbol] = (self.dataclose - self.botband) / ((self.topband - self.botband) + 0.00001) * -1
            
    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # do nothing
            pass
    
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(f"BUY EXECUTED ({self.symbol}): {order.executed.price}, {order.executed.value}, {order.executed.comm}")
            elif order.issell():
                self.log(f"SELL EXECUTED ({self.symbol}): {order.executed.price}")
            self.bar_executed = len(self)
        elif order.status in [order.Cancelled, order.Margin, order.Rejected]:
            self.log(f"({self.symbol}) Order Cancelled, Margin, Rejected")
            
        # Write down: no pending order
        self.order = None
        
    def prenext(self):
        self.next()
        
    def next(self):
        
        if len(self) >= 200: # and (len(self) % 10) == 0 
        
            volume = {}
            volatility = {}
            
            log_returns = {}
            eventTracker = {}
            dates = {}
            
            factor_values = {}
            
            for symbol in self.getdatanames():

                data_length = len(self.factor[symbol])
                if data_length != 0:

                    date = self.date[symbol].date(0)
                    dates[symbol] = date

                    factor_value = self.factor[symbol][0]
                    factor_values[symbol] = factor_value

                    trail_vol = self.trailing_volume[symbol][0]
                    volume[symbol] = trail_vol

                    # -------------------------------------
                    raw_prices = self.prices[symbol].get(size = 150)
                    
                    date_index = pd.Series(np.array(self.date[symbol].get(size = 150)))
                    date_index = [bt.utils.date.num2date(date) for date in date_index]
                    
                    if len(raw_prices) == len(date_index):
                    
                        prices = pd.Series(np.array(raw_prices), index = date_index)
                    
                        daily_vol = ml.util.get_daily_vol(
                                        prices, 
                                        lookback = 5)
                        threshold = daily_vol.iloc[-1]
                    
                        volatility[symbol] = threshold
                    
                        # log returns
                        raw_time_series = pd.DataFrame(prices)  # Convert to DataFrame
                        raw_time_series.columns = ['price']
                        log_ret = raw_time_series.price.apply(np.log).diff().iloc[-1]
                        log_returns[symbol] = log_ret
                        # -------------------------------------

                        pos = float(self.s_pos + log_ret)
                        neg = float(self.s_neg + log_ret)
                        self.s_pos = max(0.0, pos)
                        self.s_neg = min(0.0, neg)

                        if self.s_neg < -threshold:
                            self.s_neg = 0
                            event = True

                        elif self.s_pos > threshold:
                            self.s_pos = 0
                            event = True

                        else:
                            event = False

                        eventTracker[symbol] = event
                
            out = pd.concat([
                        pd.DataFrame.from_dict(
                            dates, orient = 'index', columns = ['date']),
                        pd.DataFrame.from_dict(
                            factor_values, orient = 'index', columns = ['factor']),
                        pd.DataFrame.from_dict(
                            volume, orient = 'index', columns = ['volume']),
                        pd.DataFrame.from_dict(
                            volatility, orient = 'index', columns = ['threshold']),
                        pd.DataFrame.from_dict(
                            log_returns, orient = 'index', columns = ['log_returns']),
                        pd.DataFrame.from_dict(
                            eventTracker, orient = 'index', columns = ['event'])
                
                        ],axis = 1)
            
            # volume filter
#             out = out[out['volume'] >= self.p.min_trail_vol]
            
#             # event based filter
#             out = out[out['event'] == True]
            
            self.log(f"OUT")
            print(f"{out}")
            
            if len(out) > 3:
                positions = pd.qcut(out['factor'], q = 3, labels = [-1, 0, 1]) #, duplicates = 'drop'

                factor_values = pd.DataFrame.from_dict(factor_values, orient = 'index', columns = ['factor'])
                factor_values['side'] = positions
                factor_values.dropna(inplace = True)

                factor_values = factor_values[factor_values['side'].isin([1, -1])]

                factor_total = factor_values['factor'].sub(factor_values['factor'].mean()).abs().sum()
                demeaned_factor_values = factor_values['factor'].sub(factor_values['factor'].mean()).to_dict()

                factor_weights = {}
                for asset, value in demeaned_factor_values.items():
                    factor_weights[asset] = value / factor_total

                for asset in positions.dropna().index:
                    if asset not in factor_weights.keys():
                        factor_weights[asset] = 0.0        

                positions = dict(positions.dropna())
                self.log(f"{positions}")
                self.log(f"{pd.DataFrame.from_dict(factor_weights, orient = 'index', columns = ['weight'])}\n")


                for symbol, position in positions.items():
                    self.symbol = symbol
                    self.current_position = self.getpositionbyname(self.symbol).size
                    self.log(f"----{self.symbol}----------------------------------------------------------------------------------")
                    self.log(f"- CURRENT POSITION - {self.current_position}")
                    self.log(f"- Recom: {position}")

                    portfolio_value = self.broker.getvalue()
                    cash = self.broker.getcash()
                    self.log(f"Current Portfolio value: {portfolio_value} Cash {cash}\n")

                    if self.current_position > 0:

                        # BUY/
                        if positions[self.symbol] == 1:
                            order_percent = factor_weights[self.symbol]
                            self.log(f"REBALANCE POSITION: Order_percent - {order_percent} Portfolio value - {order_percent * portfolio_value} \n")
                            self.order = self.order_target_percent(self.getdatabyname(self.symbol), order_percent)
                        # SHORT/
                        elif positions[self.symbol] == -1:
                            order_percent = factor_weights[self.symbol]
                            # first close the long position
                            self.order = self.close(data = self.getdatabyname(self.symbol))
                            self.log(f"CLOSE Long Position \n")
                            # then enter the short position
                            self.log(f"SELL-short CREATE: Order_percent - {order_percent} Portfolio value - {order_percent * portfolio_value} \n")
                            self.order = self.order_target_percent(self.getdatabyname(self.symbol), order_percent)
                        # DO NOTHING
                        elif positions[self.symbol] == 0:

                            # close just the long position
                            self.order = self.close(data = self.getdatabyname(self.symbol))
                            self.log(f"CLOSE Long Position \n")

                    elif self.current_position < 0:

                        # BUY/
                        if positions[self.symbol] == 1:
                            order_percent = factor_weights[self.symbol]

                            # first close the short position
                            self.order = self.close(data = self.getdatabyname(self.symbol))
                            self.log(f"CLOSE Short Position \n")
                            # then enter the long position
                            self.order = self.order_target_percent(self.getdatabyname(self.symbol), order_percent)
                            self.log(f"SELL-short CREATE: Order_percent - {order_percent} Portfolio value - {order_percent * portfolio_value} \n")

                        elif positions[self.symbol] == -1:
                            order_percent = factor_weights[self.symbol]
                            self.log(f"REBALANCE POSITION: Order_percent - {order_percent} Portfolio value - {order_percent * portfolio_value} \n")
                            self.order = self.order_target_percent(self.getdatabyname(self.symbol), order_percent)

                        elif positions[self.symbol] == 0:

                            # close just the short position
                            self.order = self.close(data = self.getdatabyname(self.symbol))
                            self.log(f"CLOSE Short Position \n")


                    elif self.current_position == 0:

                        # BUY/
                        if positions[self.symbol] == 1:
                            order_percent = factor_weights[self.symbol]
                            self.log(f"BUY-long CREATE: Order_percent - {order_percent} Portfolio value - {order_percent * portfolio_value} \n")
                            self.order = self.order_target_percent(self.getdatabyname(self.symbol), order_percent)

                        # SHORT/
                        elif positions[self.symbol] == -1:
                            order_percent = factor_weights[self.symbol]
                            self.log(f"SELL-short CREATE: Order_percent - {order_percent} Portfolio value - {order_percent * portfolio_value} \n")
                            self.order = self.order_target_percent(self.getdatabyname(self.symbol), order_percent)

                        # DO NOTHING
                        elif positions[self.symbol] == 0:
                            self.log(f"DO NOTHING \n")

                    self.symbol = None
                print(' ')


In [191]:
class testStrategy2(bt.Strategy):
    
    def log(self, txt, dt = None):
        # logging function from the strategy
        dt = dt or self.data.datetime.date(0)
        print(f"{dt.isoformat()}, {txt}")
        
    def __init__(self):
        
        # To keep track of pending orders and buy price/commission
        self.order = None
        self.buyprice = None
        self.buycomm = None
        self.symbol = None
        self.factor = None
        self.current_position = None
        self.training_set = None
        self.model = None
        self.inds = None
        self.i = 0
        self.s_pos = {}
        self.s_neg = {}
        self.previous_month = None
        self.init_probs = {}
            
        # event filter
        self.dailyVolatility = {}
        self.prices = {}
        self.date = {}
        for symbol in self.getdatanames():
            self.dataclose = self.getdatabyname(symbol).close
            
            self.prices[symbol] = self.dataclose 
            
            self.date[symbol] = self.data.datetime
        
        
        #data
        self.datafiles = {}
        for symbol in self.getdatanames():
            self.datafiles[symbol] = self.getdatabyname(symbol)
            
        # events__
        for symbol in self.getdatanames():
            self.s_pos[symbol] = 0.0
            self.s_neg[symbol] = 0.0
            self.init_probs[symbol] = 0.0
            
    def notify_order(self, order):
        if order.status in [order.Submitted, order.Accepted]:
            # do nothing
            pass
    
        if order.status in [order.Completed]:
            if order.isbuy():
                self.log(f"BUY EXECUTED ({self.symbol}): {order.executed.price}, {order.executed.value}, {order.executed.comm}")
            elif order.issell():
                self.log(f"SELL EXECUTED ({self.symbol}): {order.executed.price}, {order.executed.value}, {order.executed.comm}")
            self.bar_executed = len(self)
        elif order.status in [order.Cancelled, order.Margin, order.Rejected]:
            self.log(f"({self.symbol}) Order Cancelled, Margin, Rejected")
            
        # Write down: no pending order
        self.order = None
        
    def prenext(self):
        self.next()
        
    def next(self):
#         self.log(self.i)
        trail_length = 252
        if len(self) > trail_length:
            
            # collect data
            trainPrices = {}
            
            volatility = {}
            log_returns = {}
            eventTracker = {}
            dates = {}

            for symbol in self.getdatanames():

                data_length = len(self.prices[symbol])
                if data_length != 0:
                    
                    date = self.date[symbol].date(0)
                    dates[symbol] = date
                    
                    date_index = pd.Series(np.array(self.date[symbol].get(size = trail_length + self.i)))
                    date_index = [bt.utils.date.num2date(date) for date in date_index]

                    raw_open = self.getdatabyname(symbol).open.get(size = trail_length + self.i)
                    raw_high = self.getdatabyname(symbol).high.get(size = trail_length + self.i)
                    raw_low = self.getdatabyname(symbol).low.get(size = trail_length + self.i)
                    raw_close = self.getdatabyname(symbol).close.get(size = trail_length + self.i)
                    raw_volume = self.getdatabyname(symbol).volume.get(size = trail_length + self.i)

                    if len(raw_open) == len(date_index):

                        Open = pd.Series(np.array(raw_open), index = date_index)
                        High = pd.Series(np.array(raw_high), index = date_index)
                        Low = pd.Series(np.array(raw_low), index = date_index)
                        Close = pd.Series(np.array(raw_close), index = date_index)
                        Volume = pd.Series(np.array(raw_volume), index = date_index)

                        out = pd.concat([Open, High, Low, Close, Volume], axis = 1)
                        out.index.name = 'date'
                        out.columns = ['open', 'high', 'low', 'close', 'volume']

                        trainPrices[symbol] = out
            
            date = self.data.datetime.date(0)
            
            if date.month != self.previous_month:
                train_model = True
            else:
                train_model = False
            
            self.previous_month = date.month
            
#             arrow_date = arrow.Arrow(date.year, date.month, date.day)
            if train_model: # date >= arrow_date.ceil('month').date() or len(self) == trail_length + 1  
        
                self.log(f"Training ... {self.model} \n")

                cusumTrainPrices = getLabels(
                    trainPrices, lookback = 14, num_days_ahead = 5, pt_sl = [0.0001, 0.0001], at_open = True)                    
                    
                target = getTarget(cusumTrainPrices)
                
                for symbol, data in cusumTrainPrices.items():

                    data = data[data['event'] == 1]
                    cusumTrainPrices[symbol] = data
                    
                forward_returns = getForwardReturns(datas = cusumTrainPrices)
#                 self.log(f"{forward_returns}")
                
                self.inds = allIndicators(cusumTrainPrices)
                self.inds = pd.concat([self.inds, target], axis = 1)
            
                self.inds = self.inds.dropna(subset = self.inds.drop('label', axis = 1).columns)
                self.inds.index.set_names(['date', 'symbol'], inplace = True)
#                 self.log(f"{self.inds} \n")
            
                train = pd.concat([
                    getCleanFactor(
                        self.inds[['macd']], forward_returns, quantiles = 10)['factor_quantile'],
                    getCleanFactor(
                        self.inds[['volume_sma']], forward_returns, quantiles = 4)['factor_quantile'],
                    getCleanFactor(
                        self.inds[['obv']], forward_returns, quantiles = 10)['factor_quantile'],
                    getCleanFactor(
                        self.inds[['dochian_channel']], forward_returns, quantiles = 3)['factor_quantile'],
                    getCleanFactor(
                        self.inds[['williams_r']], forward_returns, quantiles = 5)['factor_quantile'],
                    getCleanFactor(
                        self.inds[['mfi']], forward_returns, quantiles = 2)['factor_quantile']
                ], axis = 1
                )
                
                train.columns = ['macd', 'volume_sma', 'obv', 'dochian_channel', 'williams_r', 'mfi']
#                 self.log(f"{train} \n")
            
                self.training_set = pd.concat([train, target], axis = 1)
                

                self.training_set = self.training_set.dropna(subset = self.training_set.drop('label', axis = 1).columns)
                self.training_set.index.set_names(['date', 'asset'], inplace = True)
                
                
                self.training_set = self.training_set.unstack().iloc[-252:].stack()
#                 self.log(f"{self.training_set.unstack()} \n")
        
                train_X = self.training_set.loc[:, ['macd', 'volume_sma', 'obv', 'dochian_channel', 'williams_r', 'mfi']]
                train_y = self.training_set['label']
#                 print(train_X)
#                 print(train_y)

                clf = LogisticRegression()
                self.model = modelFit(clf, train_X, train_y)
#                 self.log(train_y.value_counts())
#                 self.log(self.training_set)

            else:
        
                self.log(f"Just predicting ... {self.model} \n")
                
                cusumTrainPrices = getLabels(
                    trainPrices, lookback = 14, num_days_ahead = 5, pt_sl = [0.0001, 0.0001], at_open = True)                    
                    
                target = getTarget(cusumTrainPrices)
#                 self.log(target)
                
                for symbol, data in cusumTrainPrices.items():

                    data = data[data['event'] == 1]
                    cusumTrainPrices[symbol] = data
                    
                
                self.inds = allIndicators(cusumTrainPrices)
                self.inds = pd.concat([self.inds, target], axis = 1)
            
                self.inds = self.inds.dropna(subset = self.inds.drop('label', axis = 1).columns)
                self.inds.index.set_names(['date', 'symbol'], inplace = True)
#                 self.log(f"{self.inds} \n")

            
            if date in self.inds.unstack().index:
                cols = self.inds.iloc[:, :-1].columns
                no_of_quantiles = {'macd':10, 'volume_sma':4, 'obv':10, 'dochian_channel':3, 'williams_r':5, 'mfi':2}

    #             self.log(no_of_quantiles)

                rules = pd.DataFrame()
                for col, quants in no_of_quantiles.items():

    #                 self.log(self.inds.loc[ date, col ])

                    rules[f"{col}"] = pd.qcut(
                                self.inds.loc[ date, col ], 
                                q = quants,
                                labels = [i for i in range(1, quants + 1)],
                                duplicates = 'drop'
                                )
    #             self.log(rules)

                probabilities = {}
                for symbol in rules.index:
    #                 self.log(pd.DataFrame(rules.loc[symbol]).transpose())

                    inputt = pd.DataFrame(rules.loc[symbol]).transpose()
                    predicted_probs = self.model.predict_proba(inputt)

                    prob = pd.DataFrame(
                                predicted_probs, 
                                columns = self.model.classes_, 
                                index = [symbol]
                                ).loc[:, 1] * -1

                    probabilities[symbol] = prob[0]

    #                 self.init_probs[symbol] = prob[0]

    #             self.log(probabilities)


                out = pd.concat([
                            pd.DataFrame.from_dict(
                                probabilities, orient = 'index', columns = ['factor'])
                            ],axis = 1)

                out['factor_quantile'] = pd.qcut(out['factor'], q = 10, 
                                                 labels = [i for i in range(1, 11)],
                                                duplicates = 'drop')
    #             self.log(f"Predicting... {self.model}")
    #             print(f"{out}")
    #             self.log(f"DONE!!!!!!!!!!!!!!!!! ")


                factor_values = out[out['factor_quantile'].isin([1, 10])]

                factor_total = factor_values['factor'].sub(factor_values['factor'].mean()).abs().sum()
                demeaned_factor_values = factor_values['factor'].sub(factor_values['factor'].mean()).to_dict()

                ### remember to provision for current holding as well

                factor_weights = {}
                for asset, value in demeaned_factor_values.items():
                    factor_weights[asset] = value / factor_total
    #                 self.log(f"Weighting ------ {symbol}: {value}... {factor_total}")      

    #             self.log(f"{factor_weights}\n")

                    for symbol in self.getdatanames():

                        if symbol not in factor_weights.keys():
                            order_percent = 0.0
                            self.order = self.order_target_percent(self.getdatabyname(symbol), order_percent)

                        else:
                            self.symbol = symbol
                            self.current_position = self.getpositionbyname(self.symbol).size
                            self.log(f"----{self.symbol}-------------------------------------------------------------------")
                            self.log(f"- CURRENT POSITION - {self.current_position}")
                            self.log(f"- Recom: {factor_values.loc[self.symbol]}")

                            portfolio_value = self.broker.getvalue()
                            cash = self.broker.getcash()
                            self.log(f"Current Portfolio value: {portfolio_value} Cash {cash}")

                            order_percent = factor_weights[self.symbol]
                            self.order = self.order_target_percent(self.getdatabyname(self.symbol), order_percent)
                            self.log(f"REBALANCE - prop_portfolio_value = {order_percent * portfolio_value} \n")
                        self.symbol = None

                    print(' ')
    #             self.i += 1

In [192]:
cerebro2 = bt.Cerebro() #stdstats=False

cerebro2.addstrategy(testStrategy2)

0

In [193]:
# symbols = list(prices.keys())
# symbols
# symbols.remove('WHL.JO')

In [194]:
class PandasData(bt.feed.DataBase):
    params = (
        ('datetime', None),
        ('open', -1),
        ('high', -1),
        ('low', -1),
        ('close', -1),
        ('volume', -1),
        ('openinterest', None),
    )

start = datetime(2013, 1, 1)
end = datetime(2017, 12, 31)

for s in  symbols: # ['WHL.JO'] + 
    data = train_prices[s].copy()
    if len(data) > 150:

#             data['open'] = data['close'].shift(1)

        data = bt.feeds.PandasData(dataname = data, fromdate = start, todate = end)
        cerebro2.adddata(data, name = s)    # Give the data to cerebro

In [195]:
# Set our desired cash start
cerebro2.broker.setcash(1000000.0)
# 0.1% ... divide by 100 to remove the %
# cerebro.broker.setcommission(commission = 0.001)

# # add analyzer
cerebro2.addanalyzer(bt.analyzers.SharpeRatio, _name = 'sharpe')
cerebro2.addobserver(bt.observers.DrawDown)

# Print out the starting conditions
print('Starting Portfolio Value: %.2f' % cerebro2.broker.getvalue())
# 
# Run over everything
strat = cerebro2.run(runonce = False, maxcpus = 4)

# Print out the final result
print('Final Portfolio Value: %.2f' % cerebro2.broker.getvalue())

Starting Portfolio Value: 1000000.00
2013-12-19, Training ... None 



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Dropped 0.6% entries from factor data: 0.0% in forward returns computation and 0.6% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!
Dropped 0.6% entries from factor data: 0.0% in forward returns computation and 0.6% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!
Dropped 0.6% entries from factor data: 0.0% in forward returns computation and 0.6% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!
Dropped 0.6% entries from factor data: 0.0% in forward returns computation and 0.6% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!
Dropped 0.6% entries from factor data: 0.0% in forward returns computation and 0.6% in binning phase (set max_loss=0 to see potentially suppressed Exceptions).
max_loss is 35.0%, not exceeded: OK!
Dropped 0.6% en

TypeError: float() argument must be a string or a number, not 'pandas._libs.interval.Interval'

In [None]:
sharpe = strat[0].analyzers.getbyname('sharpe')
sharpe.get_analysis()

In [373]:
del cerebro2

In [80]:
train_prices['MSM.JO'].loc['2016-01-04':]

Unnamed: 0_level_0,high,low,open,close,volume,adj_close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2016-01-04,10173.0,9760.0,10173.0,9778.0,314496.0,9056.099609
2016-01-05,10500.0,9582.0,10450.0,9864.0,316250.0,9135.748047
2016-01-06,9775.0,9559.0,9559.0,9740.0,135219.0,9020.904297
2016-01-07,9675.0,9304.0,9549.0,9580.0,195167.0,8872.716797
2016-01-08,9625.0,9344.0,9391.0,9550.0,202686.0,8844.932617
...,...,...,...,...,...,...
2017-12-25,13495.0,13495.0,13495.0,13495.0,0.0,12961.088867
2017-12-26,13495.0,13495.0,13495.0,13495.0,0.0,12961.088867
2017-12-27,13673.0,13277.0,13277.0,13549.0,225183.0,13012.953125
2017-12-28,14498.0,13056.0,13057.0,13649.0,519514.0,13108.996094
