### Environment Setup

In [None]:
import pandas as pd
import datetime as dt
import yfinance as yf
import backtrader as bt
import numpy as np
import warnings
import riskfolio as rp
import matplotlib.pyplot as plt
import os

from simple_report import simple_backtest

warnings.filterwarnings("ignore")

In [None]:
from Data_Preprocess import IAM_data_setup, download_IAM, SelectIndex, generate_interval
from Data_Preprocess import calculate_returns, calculate_date_range

### Different Ticker sets from US and Hong Kong among equity/Bond, etc

In [None]:
def IndexSymbols(idx_n):
    symbols =[]
    if idx_n=="HSI":
        mkt = pd.read_html('https://en.wikipedia.org/wiki/Hang_Seng_Index')
        alist = mkt[6]['Ticker'].to_list()
        # print(alist)
        symbols= [sy.replace('SEHK:\xa0', '').zfill(4)+".HK" for sy in alist]
        symbols.sort()
    elif idx_n=="DJI":
        dji = pd.read_html('https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average')
        symbols= dji[2].Symbol.tolist()       
        symbols.sort()
    print(symbols)
    return symbols      

In [None]:
def sample_markets(i):
    # Tickers of assets
    stocks_=[]
    sector_=[]
    class_=[]
    # 0 selection
    stocks_.append( ['JCI', 'TGT', 'CMCSA', 'CPB', 'MO', 'APA', 'MMC', 'JPM',
              'ZION', 'PSA', 'BAX', 'BMY', 'LUV', 'PCAR', 'TXT', 'TMO',
              'DE', 'MSFT', 'HPQ', 'SEE', 'VZ', 'CNP', 'NI', 'T', 'BA',
               ])
    sector_.append(['Consumer Discretionary','Consumer Discretionary',
                                  'Consumer Discretionary', 'Consumer Staples',
                                  'Consumer Staples','Energy','Financials',
                                  'Financials','Financials','Financials',
                                  'Health Care','Health Care','Industrials','Industrials',
                                  'Industrials','Health Care','Industrials',
                                  'Information Technology','Information Technology',
                                  'Materials','Telecommunications Services','Utilities',
                                  'Utilities','Telecommunications Services','Financials',
                    ])
    class_.append([
                'Equity','Equity','Equity','Equity','Equity',
                'Equity','Equity','Equity','Equity','Equity',
                'Equity','Equity','Equity','Equity','Equity',
                'Equity','Equity','Equity','Equity','Equity',
                'Equity','Equity','Equity','Equity','Equity',
                ])
   # 1 selection
    stocks_.append(['JCI', 'TGT', 'CMCSA', 'CPB', 'MO', 'APA', 'MMC', 'JPM',
                  'ZION', 'PSA', 'BAX', 'BMY', 'LUV', 'PCAR', 'TXT', 'TMO',
                  'DE', 'MSFT', 'HPQ', 'SEE', 'VZ', 'CNP', 'NI', 'T', 'BA',
                    'HYG','LQD','TLT'
                  ])
    sector_.append(['Consumer Discretionary','Consumer Discretionary',
                      'Consumer Discretionary', 'Consumer Staples',
                      'Consumer Staples','Energy','Financials',
                      'Financials','Financials','Financials',
                      'Health Care','Health Care','Industrials','Industrials',
                      'Industrials','Health Care','Industrials',
                      'Information Technology','Information Technology',
                      'Materials','Telecommunications Services','Utilities',
                      'Utilities','Telecommunications Services','Financials',
                         'Corporate','Corporate','Treasury',
                    ])
    class_.append([
                    'Equity','Equity','Equity','Equity','Equity',
                    'Equity','Equity','Equity','Equity','Equity',
                    'Equity','Equity','Equity','Equity','Equity',
                    'Equity','Equity','Equity','Equity','Equity',
                    'Equity','Equity','Equity','Equity','Equity',
                   'Fixed Income','Fixed Income','Fixed Income', 
                ])
    # 2 selection
    stocks_.append([
                    'AMAT', 'AMD','AVGO',  
                    'BAC','BKR','BMY','BSX',
                    'C','CMCSA',
                    'CSCO','CSX','CVS','CVX',
                    'DIS','DVN',
                    'FCX','FNF','GEN','GILD',
                    'GM','HAL','HPQ','INTC',
                    'IPG','JNJ','KDP',
                    'KKR','KMI','KO',
                    'MDLZ','MO','MRK','MRO',
                    'MRVL','MSFT','MU','NEM',
                ])
    
    sector_.append([
                    'Technology','Technology','Technology',
                    'Financial Services','Energy','Healthcare','Healthcare',
                    'Financial Services','Communication Services',
                    'Technology','Industrials','Healthcare','Energy',
                    'Communication Services','Energy',
                    'Basic Materials','Financial Services','Technology','Healthcare',
                    'Consumer Cyclical','Energy','Technology','Technology',
                    'Communication Services','Healthcare','Consumer Defensive',
                    'Financial Services','Energy','Consumer Defensive',
                    'Consumer Defensive','Consumer Defensive','Healthcare','Energy',
                    'Technology','Technology','Technology','Basic Materials',
            ])
    class_.append([
                    'Equity', 'Equity','Equity',  
                    'Equity','Equity','Equity','Equity',
                    'Equity','Equity',
                    'Equity','Equity','Equity','Equity',
                    'Equity','Equity',
                    'Equity','Equity','Equity','Equity',
                    'Equity','Equity','Equity','Equity',
                    'Equity','Equity','Equity',
                    'Equity','Equity','Equity',
                    'Equity','Equity','Equity','Equity',
                    'Equity','Equity','Equity','Equity',
                ])
    # 3 selection - ALL US ETF
    stocks_.append(['HYG','LQD','TLT',
                    'EFA','EWW','EWZ',
                    'GLD','SLV','XME',
                    'XLE','XOP',
                    'DBA',
                    'XBI',
                    'VNQ','XHB',
                    'IWM','QQQ', 'SPY'
                   ])
    sector_.append(['Corporate','Corporate','Treasury',
                    'International','International','International',
                    'Metal','Metal','Metal',
                    'Energy','Energy',
                    'Agriculture',
                    'Healthcare',
                    'Real Estate','Real Estate',
                    'US Market','US Market','US Market',
                    ])
    class_.append( ['Fixed Income','Fixed Income','Fixed Income',
                    'Equity', 'Equity','Equity',  
                    'Commodity','Commodity','Commodity',
                    'Commodity','Commodity',
                    'Commodity',
                    'Equity',
                    'Equity','Equity',
                    'Equity','Equity','Equity',
                    ])
    if i=="US-ETF":
        return IAM_data_setup('US-ETF.txt')

    if i=="TM-CHINA":
        return IAM_data_setup('TM-China-2022.txt')

    if i=="TM-ASIA":
        return IAM_data_setup('TM-Asia-2022.txt')
     
    if i=="IAM":
        return IAM_data_setup()

    if i=="IAMHKD":
        a, df = IAM_data_setup()
        df = df[df['Currency']=='HKD']
        return df.Assets.to_list(), df
     
    if i=="IAMUSD":
        a, df = IAM_data_setup()
        df = df[df['Currency']=='USD']
        return df.Assets.to_list(), df

    if i=="IAMHKDUST":
        a, df = IAM_data_setup()
        df[(df['Currency']=='HKD') | (df['Sector']=='Treasury')]
        return df.Assets.to_list(), df
    if (i=="HSI")or (i=="DJI"):
        s_list = IndexSymbols(i)
        c_l = ['Equity']*len(s_list)
        df = pd.DataFrame({'Assets': s_list, 'Class':c_l})
        df['Sector'] = 'General'
        if (i=='DJI'):
            df['Currency'] = "USD"
            df['Rate'] = 1.0
        elif (i=="HSI"):
            df['Currency'] = "HKD"
            df['Rate'] = 0.1282
        print(df)
        return s_list, df
        
    assets = stocks_[i]
    asset_classes_dict = {'Assets': assets,  
                     'Class': class_[i],
                     'Sector': sector_[i],
                    }
    
    asset_classes = pd.DataFrame(asset_classes_dict)
    asset_classes = asset_classes.sort_values(by=['Assets'])
    asset_classes['Currency'] = "USD"
    asset_classes['Rate'] = 1.0
    
    assets.sort()

    return assets, asset_classes

### Parameters Setup

In [None]:
# rebalance mode
RebalanceMode = True
backtest_flag = "bt2"

In [None]:
if RebalanceMode:
    # end is today 
    n = 1600
    # end = dt.datetime.today().date()
    end = dt.datetime(2024, 12, 7)
    start = end - dt.timedelta(days=n)
    datetag = end.strftime("RB_%Y-%m-%d")
    
    subfd = "Rebalance"
else:
    start = '2000-01-01'
    end = '2024-09-30'
    # start = '2019-01-01'
    # end = '2024-11-01'
    datetag = "BT"
    subfd = "BT-2000-2024"
print(start, ',', end, "    tag=", datetag)    

In [None]:
plotFlag = True
useConstraint = True
useView = True

# Risk Measures available:
#
# 'MV': Standard Deviation.
# 'MAD': Mean Absolute Deviation.
# 'MSV': Semi Standard Deviation.
# 'FLPM': First Lower Partial Moment (Omega Ratio).
# 'SLPM': Second Lower Partial Moment (Sortino Ratio).
# 'CVaR': Conditional Value at Risk.
# 'EVaR': Entropic Value at Risk.
# 'WR': Worst Realization (Minimax)
# 'MDD': Maximum Drawdown of uncompounded cumulative returns (Calmar Ratio).
# 'ADD': Average Drawdown of uncompounded cumulative returns.
# 'CDaR': Conditional Drawdown at Risk of uncompounded cumulative returns.
# 'EDaR': Entropic Drawdown at Risk of uncompounded cumulative returns.
# 'UCI': Ulcer Index of uncompounded cumulative returns.

rms = ['MV', 'MAD', 'MSV', 'FLPM', 'SLPM', 'CVaR',
       'EVaR', 'WR', 'MDD', 'ADD', 'CDaR', 'UCI', 'EDaR']
# rms = ['MV','WR','CVaR']

# Objective Functions 
objectives = ['Sharpe', 'MinRisk', 'MaxRet']
# objectives = [ 'MaxRet']

# rebalance interval: Monthly, Quarterly, Semiannually
# reb_interval = ["W","M","Q","S"]
reb_interval = ["W"]
in_str = "".join(reb_interval)

In [None]:
# Date range

params = []
params.append({"dset":"IAM", "const":3})
params.append({"dset":"US-ETF", "const":3})
params.append({"dset":"HSI", "const":3})
params.append({"dset":"DJI", "const":3})
params.append({"dset":"TM-CHINA", "const":3})
params.append({"dset":"TM-ASIA", "const":3})
pi=5
assets_selection = params[pi]["dset"]
constraints_selection = params[pi]["const"]

assets, asset_classes = sample_markets(assets_selection)

display(asset_classes['Sector'].unique())
display(asset_classes['Class'].unique())
print(assets)

outputp = os.path.join(subfd, f"Classic_{assets_selection}_C{constraints_selection}_{in_str}_{backtest_flag}_{datetag}")
print(outputp)
if not os.path.exists(outputp):
        os.makedirs(outputp)

In [None]:
print(f"** Number of assets:{len(assets)} ** \n")
print(assets)
print(f"\n ** Asset Classes Shape: {asset_classes.shape} **")
display(asset_classes)

### Setup constraints and Views

In [None]:
###########################################################
# Building Constraints
############################################################
def Gen_Constraints():
    constraints = []
    constraints.append(pd.DataFrame({'Disabled': [False, True, False,False,True],
                   'Type': ['All Assets', 'All Classes', 'Classes','Classes','All Assets'],
                   'Set': ['', 'Sector', 'Class','Class',''],
                   'Position': ['', '', 'Equity','Fixed Income',''],
                   'Sign': ['<=', '<=', '<=','<=','>='],
                   'Weight': [0.10, 0.20, 0.6, 0.4, 0.02],
                   'Type Relative': ['', '', '','',''],
                   'Relative Set': ['', '', '','',''],
                   'Relative': ['', '', '','',''],
                   'Factor': ['', '', '','','']}))
    
    constraints.append(pd.DataFrame({'Disabled': [False, False,False],
                   'Type': ['All Assets', 'Classes','Classes'],
                   'Set': ['', 'Class','Class'],
                   'Position': ['', 'Equity','Fixed Income'],
                   'Sign': ['<=', '<=','<='],
                   'Weight': [0.10, 0.6, 0.4],
                   'Type Relative': ['', '', ''],
                   'Relative Set': ['', '', ''],
                   'Relative': ['', '', ''],
                   'Factor': ['', '', '']}))
    
    constraints.append(pd.DataFrame({'Disabled': [False, False, False],
                   'Type': ['All Assets', 'All Classes', 'All Classes'],
                   'Set': ['', 'Sector', 'Sector'],
                   'Position': ['', '', ''],
                   'Sign': ['<=', '<=', '>='],
                   'Weight': [0.10, 0.20, 0.03],
                   'Type Relative': ['', '', ''],
                   'Relative Set': ['', '', ''],
                   'Relative': ['', '', ''],
                   'Factor': ['', '', '']}))
    
    constraints.append(pd.DataFrame({'Disabled': [False],
                   'Type': ['All Assets'],
                   'Set': [''],
                   'Position': [''],
                   'Sign': ['<='],
                   'Weight': [0.10],
                   'Type Relative': [''],
                   'Relative Set': [''],
                   'Relative': [''],
                   'Factor': ['']
                  }))

    constraints.append(pd.DataFrame({'Disabled': [False],
                   'Type': ['All Assets'],
                   'Set': [''],
                   'Position': [''],
                   'Sign': ['<='],
                   'Weight': [0.15],
                   'Type Relative': [''],
                   'Relative Set': [''],
                   'Relative': [''],
                   'Factor': ['']
                  }))

    constraints.append(pd.DataFrame({'Disabled': [False],
                   'Type': ['All Assets'],
                   'Set': [''],
                   'Position': [''],
                   'Sign': ['<='],
                   'Weight': [0.25],
                   'Type Relative': [''],
                   'Relative Set': [''],
                   'Relative': [''],
                   'Factor': ['']
                  }))
    
    return constraints

global_constraint_list = Gen_Constraints()
max_g_const = len(global_constraint_list)
print(f"Total {max_g_const} constraints for selection")
constraints = global_constraint_list[constraints_selection]
display("Current selected constraints:")
display(constraints)

In [None]:
############################################################
# Building View for Black Litterman
############################################################
views = {'Disabled': [False, False, False],
         'Type': ['Classes', 'Classes', 'Classes'],
         'Set': ['Sector', 'Sector', 'Sector'],
         'Position': ['Technology', 'Energy', 'Healthcare'],
         'Sign': ['>=', '>=', '>='],
         'Weight': [0.20, 0.1, 0.09], # Annual terms 
         'Type Relative': ['Classes', 'Classes', 'Classes'],
         'Relative Set': ['Sector', 'Sector', 'Sector'],
         'Relative': ['Financial Services', 'Commodity', 'Consumer Defensive']}

views = pd.DataFrame(views)

display(views)

In [None]:
# Methods to create Constraint and Views by a subset of tickers

def get_Constraint(subSet):
    sub_asset_classes = asset_classes.loc[asset_classes['Assets'].isin(subSet)]
    # print(f'Constraint-> subSet={subSet}, sub_asset_classes={sub_asset_classes}\n')
    return rp.assets_constraints(constraints, sub_asset_classes)


def get_Views(subSet):
    sub_asset_classes = asset_classes.loc[asset_classes['Assets'].isin(subSet)]
    # print(f'subSet={subSet}, sub_asset_classes={sub_asset_classes}\n')
    return rp.assets_views(views, sub_asset_classes)
    

### Download Data
Full data download from yfinance to **prices** dataframe

In [None]:
# Downloading data
print('# of assets: ', len(assets))
if (type(assets_selection)==str) and ("IAM" in assets_selection) and (not RebalanceMode) :
    prices = download_IAM()
else:
    prices = yf.download(assets, start=start, end=end)

In [None]:
print('Data Size: ', prices.shape)
prices.columns[prices.isna().sum()==prices.shape[0]]

In [None]:
display(prices.index[0], prices.index[-1])
display(prices.info())
display(prices.head())

In [None]:
############################################################
# Calculate assets returns
############################################################

returns = calculate_returns(prices)[assets]
print(f"** Number of assets in returns DF: {returns.shape[1]} **")
print(f"** days of assets in returns DF: {returns.shape[0]} **")
print(f"** begin date: {returns.index[0]}, last date: {returns.index[-1]} **")
print("\n** Assets in Returns Set: ", returns.columns.to_list(), " \n")
display(returns)
returns.reset_index().to_csv(os.path.join(outputp,"Full_Returns.csv"), index=False)

In [None]:
#  all test is from the 1004th day and finish at the last testdata day.
#
start_test = 1004
end_test = prices.shape[0] - 1
test_size = 1000
print(f" Testing data from {start_test} to {end_test}")

In [None]:
#
# from the full pricess datafraes, create the intercepted date/index for each ticker
#
assets_dt_range = calculate_date_range(prices, returns.columns.to_list())

In [None]:
display(assets_dt_range)

### Building the Backtest Function with Backtrader

In [None]:
############################################################
# Defining the backtest function 
############################################################

def backtest(datas, strategy, start, end, plot=False, **kwargs):
    cerebro = bt.Cerebro()

    # print(datas)
    print(f"backtest:  start={start} - end={end}\n")
    # Here we add transaction costs and other broker costs
    cerebro.broker.setcash(1000000.0)
    cerebro.broker.setcommission(commission=0.005) # Commission 0.5%
    cerebro.broker.set_slippage_perc(0.005, # Slippage 0.5%
                                     slip_open=True,
                                     slip_limit=True,
                                     slip_match=True,
                                     slip_out=False)
    # for data in datas:
    for data in datas:
        # print(f"setting {data}")
        cerebro.adddata(data['data'], name=data['name'])

    # Here we add the indicators that we are going to store
    cerebro.addanalyzer(bt.analyzers.TimeReturn, timeframe=bt.TimeFrame.Days)
    cerebro.addanalyzer(bt.analyzers.SharpeRatio, riskfreerate=0.0)
    cerebro.addanalyzer(bt.analyzers.Returns)
    cerebro.addanalyzer(bt.analyzers.LogReturnsRolling)
    cerebro.addanalyzer(bt.analyzers.DrawDown)
    cerebro.addanalyzer(bt.analyzers.PositionsValue)
    cerebro.addanalyzer(bt.analyzers.TradeAnalyzer)
    cerebro.addanalyzer(bt.analyzers.PeriodStats)
    cerebro.addanalyzer(bt.analyzers.Transactions)
    cerebro.addanalyzer(bt.analyzers.VWR)
    cerebro.addstrategy(strategy, **kwargs)
    cerebro.addobserver(bt.observers.Value)
    cerebro.addobserver(bt.observers.DrawDown)
    results = cerebro.run(stdstats=False)
    if plot:
        print(f"backtest.plot: {start}-{end}")
        cerebro.plot(iplot=False, start=start, end=end)
        plt.show()
    return results[0]


### Building Data Feeds for Backtesting
**asset_prices** = list of all asset except 'SPY' in the bt.feeds of *OHLC + Volume*    
**benchmark**  = 'SPY' *OHLC+Volume* in bt.feeds

In [None]:
i_asset_classes = asset_classes.set_index('Assets')

In [None]:
############################################################
# Create objects that contain the prices of assets
############################################################
# Creating Assets bt.feeds
assets_prices = []
for i in assets:
    # if i not in benchmark_symbols:
    
    # prices_ = prices.drop(columns='Adj Close').loc[:, (slice(None), i)].dropna()
    prices_ = prices.drop(columns='Adj Close').loc[:, (slice(None), i)]
    prices_.columns = ['Close', 'High', 'Low', 'Open', 'Volume']
    prices_.index = pd.to_datetime(prices_.index.date)
    prices_.index.names = ['Date']
    nan_cnt = prices_.isna().sum().sum()
    R = i_asset_classes.loc[i, 'Rate']
    display(f"{i}, nan_cnt={nan_cnt} , {prices_.index[0]}, {prices_.index[-1]}, Rate: {R}")
    for c in ['Close', 'High', 'Low', 'Open']:
        prices_[c] = prices_[c]*R
    # display(prices_)
    assets_prices.append({'data': bt.feeds.PandasData(dataname=prices_, name=i, plot=False), 'name':i})
        
print(assets_prices)

### Buy and Hold for the BenchMark 

In [None]:
############################################################
# Building the Buy and Hold strategy
############################################################

class BuyAndHold(bt.Strategy):

    def log(self, txt, dt=None):
        dt = dt or self.datas[0].datetime.date(0)
        print('%s %s' % (dt.isoformat(), txt))
        
    def __init__(self):
        self.counter = 0

    def next(self):
        if self.counter >= start_test:
            tc = self.data.close[0]
            th = self.data.high[0]
            td = self.data.datetime.datetime()
            if self.getposition(self.data).size == 0:
                print(f'BUY @ {self.counter} - close:{tc}, high:{th} - date:{td} - name:{self.data._name}')
                self.order_target_percent(self.data, target=0.99)
        self.counter += 1 

In [None]:
############################################################
# Run the backtest for the bench mark(s)
############################################################
%matplotlib inline
# # Creating Benchmark bt.feeds  
# from datetime import datetime
# import pytz

benchmark_results = {}
bm_list = ["SPY", "^HSI","QQQ","DIA","2822.HK","2801.HK", "AAXJ"]

In [None]:
def Backtest_BenchMark(symbol):

    plt.rcParams["figure.figsize"] = (10, 6) # (w, h)
    plt.plot() # We need to do this to avoid errors in inline plot

    print(symbol)
    bm_prices = yf.download(symbol, start=start, end=end)
    prices_ = bm_prices.drop(columns='Adj Close')
    # prices_ = prices.drop(columns='Adj Close').loc[:, (slice(None), symbol)].dropna()
    prices_.columns = ['Close', 'High', 'Low', 'Open', 'Volume']
    # prices_.index = pd.to_datetime(prices_.index.date)
    # prices_.index.names = ['Date']
    display(symbol, prices_.index[0], prices_.index[-1])
    # display(prices_)
    print("==>", start_test, end_test)
    in_data = {}
    in_data['data'] = bt.feeds.PandasData(dataname=prices_, name=symbol, plot=False)
    in_data['name'] = symbol
    result = backtest([in_data],
                    BuyAndHold,
                    start=start_test,
                    end=end_test,
                    plot=True)
    return result

In [None]:
def RetreiveStats(bt_result, rm, obj, r_int):
    dd = bt_result.analyzers.drawdown.get_analysis()['max']['drawdown']
    cagr= bt_result.analyzers.returns.get_analysis()['rnorm100']
    sharpe =bt_result.analyzers.sharperatio.get_analysis()['sharperatio']

    return {'Risk_measure':rm, 'Objective':obj, 'R_Interval': r_int, 'Max DrawDown':dd/100.0, 'CAGR': cagr/100.0, 'Sharpe Ratio':sharpe}

In [None]:
#
# Retreive Daily Return from bt
#
def RetreiveDailyReturn(bt_result, s_name):
    tret_analyzer = bt_result.analyzers.getbyname('timereturn')
    ret_ = tret_analyzer.get_analysis()
    return pd.DataFrame(ret_.items(), columns=['Date', s_name])


In [None]:
BM_metric_list = []
BM_return = {}

if backtest_flag == "bt2":
    bm_prices = yf.download(bm_list, start=start, end=end)
    pm_returns = calculate_returns(bm_prices)
    display("==>", bm_list, bm_prices.index[0], bm_prices.index[-1])
    # display(prices_)
    results = {}
    for sy in bm_list:
        result, BM_ret = simple_backtest(None, pm_returns[sy].to_frame(), sy, "N/A", "N/A")
        BM_metric_list.append(result)
        BM_ret['Date'] = BM_ret['Date'].dt.date
        BM_ret.round(4).to_csv(os.path.join(outputp, f"{sy}_ret.csv"), index=False)
else:
    for sym in bm_list:
        result0 = Backtest_BenchMark(sym)
        BM_metric_list.append(RetreiveStats(result0, sym, 'N/A', 'N/A'))
        BM_ret = RetreiveDailyReturn(result0, sym)
        BM_ret['Date'] = BM_ret['Date'].dt.date
        BM_ret.round(4).to_csv(os.path.join(outputp, f"{sym}_ret.csv"), index=False)

In [None]:
display(start_test, end_test, BM_metric_list)

### Rebalancing Monthly, Quarterly, Semiannually using Riskfolio-Lib

In [None]:
rebalance_index = {}
rebalance_index["W"] = SelectIndex(returns, "W", start_test)
rebalance_index["M"] = SelectIndex(returns, "M", start_test)
rebalance_index["Q"] = SelectIndex(returns, "Q", start_test)
rebalance_index["S"] = SelectIndex(returns, "S", start_test)

In [None]:
r_index={}
r_index["W"] = generate_interval(returns, rebalance_index["W"], os.path.join(outputp, "rebalance_index_W.csv"))
r_index["M"] = generate_interval(returns, rebalance_index["M"], os.path.join(outputp, "rebalance_index_M.csv"))
r_index["Q"] = generate_interval(returns, rebalance_index["Q"], os.path.join(outputp, "rebalance_index_Q.csv"))
r_index["S"] = generate_interval(returns, rebalance_index["S"], os.path.join(outputp, "rebalance_index_S.csv"))

In [None]:
r_index["M"]

In [None]:
def get_return_set(ret, s_i, e_i):
    # i_ret = ret.reset_index()
    i_ret = ret.iloc[s_i: e_i,:].dropna(axis=1)
    return i_ret

nY = get_return_set(returns, 40, 1040)
display(nY)

* The optimization is based on the returns of previous 1000 days from last date of each quarter, which is about 4 years

In [None]:
import multiprocessing
import time

def opt_worker(d_cnt, data):
    created = multiprocessing.Process()
    current = multiprocessing.current_process()
    print(f' running: {current.name}, {current._identity}\n created: {created.name}, {created._identity}\n')

    results=[]
    for i in range(len(data)):
        obj, r, rm = data[i]['keys']
        print(f'started for {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}\n')

        result={}
        result['keys'] = data[i]['keys']
        
        start_time = time.time()
        weights = pd.DataFrame(columns=returns.columns.to_list())
        # for j in rebalance_index[r]:
        for j, dt in r_index[r].iterrows():

            try:
                # Y = returns.iloc[j-test_size:j,:] # taking last 4 years (250 trading days per year)
                Y = get_return_set(returns, j-test_size, j)
                orig_size = len(Y.columns.to_list())
                # filter holidays i.e. all ticker has no data at 'all'
                # Y = Y.dropna(axis=0, how='all')
                # then filter out ticker that cannot supply full data set
                # Y = Y.dropna(axis=1, how='any')
                aSet = Y.columns.to_list()
                col_num = len(Y.columns.to_list())
               
                # Building the portfolio object
                print(f'Create Port: {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}, Y cols={col_num}/{orig_size}\n')
                port = rp.Portfolio(returns=Y)
                
                port.alpha = 0.05
                model='BL' # Could be Classic (historical), BL (Black Litterman) or FM (Factor Model)
                hist = False # Use historical scenarios for risk measures that depend on scenarios
                rf = 0 # Risk free rate
                l = 0 # Risk aversion factor, only useful when obj is 'Utility'
                
                # Add portfolio constraints
                if useView:
                    P,Q = get_Views(aSet)
                if useConstraint:
                    A, B = get_Constraint(aSet)
                    port.ainequality = A
                    port.binequality = B
                
                # Calculating optimum portfolio
                
                # Select method and estimate input parameters:
                
                method_mu='hist' # Method to estimate expected returns based on historical data.
                method_cov='hist' # Method to estimate covariance matrix based on historical data.
                
                # print(f'assets_stats: {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}\n')
                port.assets_stats(method_mu=method_mu, method_cov=method_cov)
                
                # Estimate optimal portfolio:
                # print(f'optimization(Classic): {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}\n')
                out_w = port.optimization(model='Classic', rm=rm, obj=obj, rf=rf, l=l, hist=True)
                print(f"opt result for {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}--out_w: {len(out_w)}:{out_w}\n")
                w = out_w
                
                # # Estimate Black Litterman inputs:
                # print(f'blacklitterman_stats: {obj}, {r}, {rm} == {i}/{len(data)} {current.name}\n')
                # port.blacklitterman_stats(P, Q/252, rf=rf, w=w, delta=None, eq=True)
                
                # if rm == 'MV':
                #     hist = False
                # else:
                #     hist = True
                # print(f'optimization({model}): {obj}, {r}, {rm} == {i}/{len(data)} {current.name}\n')
                # w = port.optimization(model=model, rm=rm, obj=obj, rf=rf, l=l, hist=hist)
            except Exception as e:
                print(f'Exception:{e} -- {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}\n')
                w = None
            
            if w is None:
                w = weights.tail(1).T
            weights = pd.concat([weights, w.T], axis = 0)
     
        totalt = time.time()-start_time
        print(f"Finished for {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}--{totalt} sec--\n")
        # if len(weights) == len(rebalance_index[r]):
        #     weights.index = rebalance_index[r]
        if len(weights) == len(r_index[r]):
            weights.index = r_index[r].index  
            weights.insert(0, 'Date', r_index[r]['Date'])
             
        result['weights'] = weights
        results.append(result) 
    print(f'Existing: {current.name}, {current._identity}\n')
    return results

In [None]:
def opt_workerv2(d_cnt, data):
    created = multiprocessing.Process()
    current = multiprocessing.current_process()
    print(f' running: {current.name}, {current._identity}\n created: {created.name}, {created._identity}\n')

    results=[]
    for i in range(len(data)):
        obj, r, rm = data[i]['keys']
        print(f'started for {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}\n')

        result={}
        result['keys'] = data[i]['keys']
        
        start_time = time.time()
        weights = pd.DataFrame(columns=["index","Date"] + returns.columns.to_list())
        datasets = []
        result['datasets'] = datasets
        count=0
        for j, dt in r_index[r].iterrows():
            dataset = {}
            dataset['data_block'] = (j-test_size, j, dt['Date'])

            try:
                orig_size = len(returns.columns.to_list())            
                Y = get_return_set(returns, j-test_size, j)
                aSet = Y.columns.to_list()
                col_num = len(Y.columns.to_list())
               
                # Building the portfolio object
                # print(f'Create Port[{j}]: {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}, Y cols={col_num}/{orig_size}\n')
                port = rp.Portfolio(returns=Y)
                dataset['data_size'] = Y.shape
                dataset['data_asset'] = Y.columns.to_list()
                dataset['assets_ratio'] = (orig_size, col_num)
                
                port.alpha = 0.05
                model='BL' # Could be Classic (historical), BL (Black Litterman) or FM (Factor Model)
                hist = False # Use historical scenarios for risk measures that depend on scenarios
                rf = 0 # Risk free rate
                l = 0 # Risk aversion factor, only useful when obj is 'Utility'
                
                # Add portfolio constraints
                if useView:
                    P,Q = get_Views(aSet)
                if useConstraint:
                    A, B = get_Constraint(aSet)
                    # print(f'==> Port Constraint[{j}]: {obj}, {r}, {rm} == A:{A.shape},B:{B.shape}, Y cols={col_num}/{orig_size}\n')
                    port.ainequality = A
                    port.binequality = B
                
                # Calculating optimum portfolio
                
                # Select method and estimate input parameters:
                
                method_mu='hist' # Method to estimate expected returns based on historical data.
                method_cov='hist' # Method to estimate covariance matrix based on historical data.
                
                # print(f'assets_stats: {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}\n')
                port.assets_stats(method_mu=method_mu, method_cov=method_cov)
                
                # Estimate optimal portfolio:
                # print(f'optimization(Classic): {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}\n')
                w = port.optimization(model='Classic', rm=rm, obj=obj, rf=rf, l=l, hist=True)
                # print(f"opt result for {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}--out_w=:\n{w}\n")
            except Exception as e:
                print(f'Exception[{j}]:{e} -- {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}\n')
                w = None
            
            if (w is None) and (len(weights)>0):
                w = weights.tail(1)
                if "index" in w.columns.to_list():
                    w.drop(columns=["index"], inplace=True)
                if "Date" in w.columns.to_list():
                    w.drop(columns=["Date"], inplace=True)
                w = w.T            
            count +=1
            if (w is not None) and (len(w)>0):
                dataset['w_asset'] = w.columns.to_list()
                wT = w.T
                wT.insert(0, "Date", [dt.Date])
                wT.insert(0, "index", [j])
                # display(weights)
                # display(wT)
                weights = pd.concat([weights, wT], axis = 0)
                # display(weights)
                # print(f"*** [{j}] {obj}, {r}, {rm} == weights.size:{weights.shape[0]}/{count}/{len(r_index[r])}    {current.name}\n")
            datasets.append(dataset)
        totalt = time.time()-start_time
        # print(f"Finished for {obj}, {r}, {rm} == {i}/{d_cnt} {current.name}--{totalt} sec--\n")
        # display(weights)
        weights = weights.set_index("index")
        if len(weights) != len(r_index[r]):    
            lw = len(weights)
            lr = len(r_index[r])
            print(f"*** [{j}] {obj}, {r}, {rm} == weights.len/r_index.len   {lw}/{lr}    {current.name}\n")
            
        result['weights'] = weights
        results.append(result) 
    print(f'Existing: {current.name}, {current._identity}\n')
    return results

In [None]:
##
# Generate a list of parameters for the multi-processing optimization function
##

models = {}
data_chunks = []
for obj in objectives:
    models[obj] = {}
    for r in reb_interval:
        models[obj][r] = {}
        for rm in rms:
            print(obj, ",", r, ",", rm)
            weights = pd.DataFrame([])
            data_chunks.append({"keys":(obj, r, rm)})

# display(data_chunks)

In [None]:
import random

random.shuffle(data_chunks)
display(data_chunks)

In [None]:
%%time

from data_processing_v2 import run_pool_v2, set_debug

set_debug(True)

# Run with a specified number of CPUs

opt_resultsv2 = run_pool_v2(data_chunks, opt_workerv2)

In [None]:
# opt_resultsv2

In [None]:
display(f"Optimized result length: {len(opt_resultsv2)}")
models = []
for batch in opt_resultsv2:
    for result in batch:
        obj,r,rm = result['keys']
        # w = result['weights'].fillna(0)
        w = result['weights']
        print(obj,r,rm)
        # print('w.shape: ', w.shape)
        # print(returns.isna().sum())
        # print(w.columns[w.isna().sum()==w.shape[0]])      
        w.round(4).to_csv(os.path.join(outputp,f"weights_{obj}_{r}_{rm}.csv"))
        model = {'keys': result['keys'], 'weights': w}
        models.append(model)
        
display(len(models))

In [None]:
#
# create parameters list for process pool
#
# target = ('MaxRet','S','WR')
target=None
parameters=[]
for arg in models:
    if (target is None) or (arg['keys'] == target):
        param={}
        param['keys'] = arg['keys']
        display(param['keys'])
        param['weights'] = arg['weights'].round(4)
        # display(param['weights'])
        parameters.append(param)

In [None]:
# display(parameters)

In [None]:
############################################################
# Building the Asset Allocation Class
############################################################

class AssetAllocation(bt.Strategy):

    def log(self, txt, dt=None):
        dt = dt or self.data.datetime[0]
        dt = bt.num2date(dt)
        print('\n{}: {}'.format(dt.isoformat(), txt))

    def __init__(self, **kwargs):
        self._n_assets = len(self.datas)
        self._weights = None
        self._assets = None
        self._data_key = "N/A"
        self.counter = 0
        print(f"length of datas: {self._n_assets}")
        
        for key, value in kwargs.items():
            print(f"init(key: {key})")
            if key=="data_keys":
                self._data_key = value
                
            if key=="assets":
                self._assets = value
                l_assets = len(self._assets)
                print(f"init({self._data_key}) _assets({l_assets}): {self._assets}")
                for i in range(self._n_assets):
                    _n = self.datas[i]._name
                    if _n in self._assets:
                        print(f"init>> assets[{i}]-{_n} in weights")
                        setattr(self, _n, self.datas[i])

            if key=="weights":
                self._weights = value
                w_names = self._weights.columns.to_list()
                l_assets = len(self._assets)
                l_weights = len(self._weights)
                print(f"init(weights, key={self._data_key}).names({l_weights}):  {w_names}")
                print(f"l_assets:{l_assets} == l_weights:{l_weights} : ", l_assets==l_weights)

            if key=='dt_index':
                self._dt_index = value
                print(f"set dt_index: length={len(self._dt_index )}")

    def notify_order(self, order):
        if order.status in [bt.Order.Submitted, bt.Order.Accepted]:
            return  # Await further notifications

        if order.status == order.Completed:
            if order.isbuy():
                self.log('BUY COMPLETE, {:.2f}'.format(order.executed.price))
            else:
                selltxt = 'SELL COMPLETE, {:.2f}'.format(order.executed.price)
                self.log(selltxt, order.executed.dt)
        elif order.status in [order.Expired, order.Canceled, order.Margin]:
            self.log('Order Status: {} '.format(order.Status[order.status]))
            pass  # Simply log

        # Allow new orders
        self.orderid = None
         
    def next(self):
        if self.counter in self._weights.index.tolist():
            wght = self._weights.loc[self.counter,:].dropna()
            dt = self._dt_index.loc[self.counter].Date
            ww = wght
            if ww is not None:
                ww = ww.T
            print(f"next({self.counter}).Date({dt})--> wght  is:\n {ww}")
            for i,w in wght.items():
                # print(f"==> Target.order.perc:--> {self.counter},{i},{w}")
                self.order_target_percent(getattr(self, i), target=w)
        self.counter += 1

In [None]:
assets_prices_names=[]
for i in range(len(assets_prices)):
    assets_prices_names.append(assets_prices[i]['name'])
print("length:", len(assets_prices_names))
print(assets_prices_names)

In [None]:
def bt_worker(d_cnt, data):
    created = multiprocessing.Process()
    current = multiprocessing.current_process()
    print(f' running: {current.name}, {current._identity}\n created: {created.name}, {created._identity}\n')    
    # aKey = data['keys']
    # print(f'bt_worker: data.length={len(data)}')
    results = []
    for i in range(len(data)):
        obj, r, rm = data[i]['keys']
        _weights = data[i]['weights'].drop(columns=['Date'])
        print("weights of ", data[i]['keys'])
        display(_weights)
        w_n = _weights.columns.to_list()
        _assets = assets_prices_names
        print(f"bt_worker: {obj},{r},{rm} == weight.length:{len(_weights)},,{w_n}  {current.name}\n")
        
        start_time = time.time()
        print(f'bt_worker: {obj},{r},{rm} == {i}/{d_cnt} {current.name}\n')
        result={}
        result['keys'] = (obj,r,rm)

        if len(_weights) > 0:
            result0 = backtest(assets_prices,
                            AssetAllocation,
                            start=start_test,
                            end=end_test,
                            plot=plotFlag,
                            assets=_assets,
                            weights=_weights,
                            dt_index = r_index[r],
                            data_keys=data[i]['keys']
                            )     
        
            result['metric'] = RetreiveStats(result0, rm, obj, r)
            result['Dret'] = RetreiveDailyReturn(result0, 'Return')
            result['ptr'] = result0
        else:
            result['metric'] = None
            result['Dret'] = None
            
        result['w'] = _weights
        totalt = time.time()-start_time
        result['timestamp'] = f"--{totalt}--"

        results.append(result)
    return results

In [None]:
from simple_report import simple_backtest

In [None]:
print("param size: ", len(parameters))
obj, r, rm = parameters[1]['keys']
w = parameters[0]['weights']
print(obj, r, rm)
# display(w)

In [None]:
def simple_bt_worker(d_cnt, data):
    created = multiprocessing.Process()
    current = multiprocessing.current_process()
    print(f' running: {current.name}, {current._identity}\n created: {created.name}, {created._identity}\n')    
    # aKey = data['keys']
    # print(f'bt_worker: data.length={len(data)}')
    results = []
    for i in range(len(data)):
        obj, r, rm = data[i]['keys']
        _weights = data[i]['weights']
        print("weights of ", data[i]['keys'])
        # display(_weights)
        w_n = _weights.drop(columns=['Date']).columns.to_list()
        _assets = assets_prices_names
        print(f"simply_bt_worker: {obj},{r},{rm} == weight.length:{len(_weights)},,{w_n}  {current.name}\n")
        
        start_time = time.time()
        print(f'simply_bt_worker: {obj},{r},{rm} == {i}/{d_cnt} {current.name}\n')
        result={}
        result['keys'] = (obj,r,rm)

        if len(_weights) > 0:
            result0, ret_df = simple_backtest(_weights, returns, rm, obj, r)
            result['metric'] = result0
            result['Dret'] = ret_df
        else:
            result['metric'] = None
            result['Dret'] = None
            
        result['w'] = _weights.drop(columns=['Date'])
        totalt = time.time()-start_time
        result['timestamp'] = f"--{totalt}--"

        results.append(result)
    return results

In [None]:
%%time

# Run with a specified number of CPUs
if backtest_flag == "bt2":
    bt_results = run_pool_v2(parameters, simple_bt_worker)
else:
    bt_results = run_pool_v2(parameters, bt_worker)

In [None]:
import datetime

check_flag=True

print(f'bt_results.length = {len(bt_results)}')
metric_list=[]
for batches in bt_results:
    print(f'batches.length = {len(batches)}')
    for result in batches:
        print(f'bt_results.length = {len(bt_results)}')
        aKey = result['keys']
        print(aKey)
        obj, r, rm = aKey
        print(f'obj={obj},r={r},rm={rm}')
        if len(result['Dret']):
            # print(result['Dret'])
            r_fn = f"DailyRet_{obj}_{r}_{rm}.csv"
            ret_df = result['Dret']
            ret_df['Date'] = pd.to_datetime(ret_df['Date'])
            ret_df['Date'] = ret_df['Date'].dt.date
            ret_df.round(4).to_csv(os.path.join(outputp,r_fn), index=False)
            print(f"Daily return {r_fn} saved")
        if len(result['w']) > 0:
            w=result['w'].iloc[-1,:].dropna()
            print(w)
            if check_flag:
                # We need matplotlib >= 3.3.0 to use this function
                ax = rp.plot_pie(w=w, title=f'{obj}-{rm}-{r}', others=0.05, nrow=25, cmap = "tab20",
                                height=6, width=10, ax=None)
                # w.plot.pie(subplots=True, figsize=(8, 8))
                plt.show()
            ############################################################
            # Composition per Industry
            ############################################################
            w_classes = pd.concat([asset_classes.set_index('Assets'), w], axis=1)
            # display(w_classes)
            w_classes = w_classes.groupby(['Sector']).sum()
            # w_classes.columns = ['weights']  
            # display(w_classes)
        if result['metric'] is not None:
            # display(result['metric'])
            metric_list.append(result['metric'])


In [None]:
BM_metric_list_df = pd.DataFrame(BM_metric_list)
metric_df = pd.DataFrame(metric_list)
metric_df = metric_df.sort_values(by=['CAGR','Sharpe Ratio'], ascending=False)
full_metric = pd.concat([BM_metric_list_df, metric_df])
display(full_metric.head(15))

In [None]:
full_metric.to_csv(os.path.join(outputp,"Port_Metric.csv"), index=False)
constraints.to_csv(os.path.join(outputp,"Constraints.csv"), index=False)

In [None]:
display(metric_df[metric_df['R_Interval']=='Q'].sort_values(by=['Sharpe Ratio','CAGR'], ascending=False))

In [None]:
display(metric_df[metric_df['R_Interval']=='S'].sort_values(by=['Sharpe Ratio','CAGR'], ascending=False))