# Backtesting Framework. 

In [2]:
# Python module. 
import re, os, csv, time 
import numpy as np 
import pandas as pd 
import pandas_datareader.data as web 
import matplotlib.pyplot as plt 
import pyfolio as pf 
import backtrader as bt 
from backtrader.feeds import PandasData 

# Change the current directory from (./notebook) to root directory. 
while not re.match(r".+MADS-CAP$", os.getcwd()): 
	os.chdir("..") 
	
print(f"Current directory: ({os.getcwd()})") 

# For clearing safe warnings. Not important. 
from IPython.display import clear_output

# Custom modules. 
from source.modules.manage_files import ManageFiles 

# Custom configs. 
from source.config_py.config import (
	DIR_DATASET_CONSOLIDATED, 
	TICKER_TO_COLLECT, TICKER_TO_EXCLUDE, TICKER_DATE_COLLECT, 
	PARAM_SEED, 
)

Current directory: (/Users/lioneltay/Dropbox/Courses/michigan_mads/SIADS_697_/submission/MADS-CAP)


## Configurations (general). 

In [3]:
# Matplotlib setting. 
%matplotlib inline 

# Pandas DF config. 
pd.set_option("display.max_rows", 50, "display.max_columns", 50, "display.max_colwidth", 50)

# File management setup. 
manage_files = ManageFiles() 

# Ensure reproducibility. 
np.random.seed(PARAM_SEED) 

# List of ticker to collect data. 
ticker_to_collect = TICKER_TO_COLLECT.difference(TICKER_TO_EXCLUDE) 

# Date range. 
date_beg, date_end = TICKER_DATE_COLLECT 

# For clearing the output. Not important. 
clear_output()

## Load consolidated ticker data. 

In [3]:
filepath = os.path.join(DIR_DATASET_CONSOLIDATED, "consolidated_feature.parquet")
df_feature = pd.read_parquet(filepath) 

# Preview. 
df_feature 

Unnamed: 0,date,open,high,low,close,volume,dividends,stock_splits,ticker,return_c2c_lag1,...,candle_cdleveningstar,candle_cdlhammer,candle_cdlhangingman,candle_cdlharami,candle_cdlinvertedhammer,candle_cdlmorningdojistar,candle_cdlmorningstar,candle_cdlrickshawman,candle_cdlshootingstar,candle_cdltristar
0,1998-11-30,154.66,154.84,147.12,147.85,2063440.0,0.0,0.0,C,,...,0,0,0,0,0,0,0,0,0,0
1,1998-12-01,143.44,147.49,141.42,147.12,2291020.0,0.0,0.0,C,-0.004937,...,0,0,0,0,0,0,0,0,0,0
2,1998-12-02,146.02,150.61,145.28,148.04,1852540.0,0.0,0.0,C,0.006253,...,0,0,0,0,0,0,0,0,0,0
3,1998-12-03,146.93,147.12,141.23,141.79,2681740.0,0.0,0.0,C,-0.042218,...,0,0,0,0,0,0,0,0,0,0
4,1998-12-04,144.54,146.02,140.50,145.83,2304760.0,0.0,0.0,C,0.028493,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174254,2022-02-18,149.69,151.66,148.70,149.70,11733300.0,0.0,0.0,JPM,0.004698,...,0,0,0,0,0,0,0,0,0,0
174255,2022-02-22,148.18,150.78,147.99,149.43,11333500.0,0.0,0.0,JPM,-0.001804,...,0,0,0,0,0,0,0,0,0,0
174256,2022-02-23,150.66,150.84,145.59,146.30,11799000.0,0.0,0.0,JPM,-0.020946,...,0,0,0,0,0,0,0,0,0,0
174257,2022-02-24,140.64,142.66,137.53,142.23,25655100.0,0.0,0.0,JPM,-0.027820,...,0,0,0,0,0,0,0,0,0,0


In [5]:
pd.Timestamp("2020-05-30 00:00:00") - pd.tseries.offsets.DateOffset(days=1) + pd.tseries.offsets.QuarterEnd(-1)

Timestamp('2020-03-31 00:00:00')

In [11]:
dft["date"].loc[0]

Timestamp('1998-11-30 00:00:00')

In [9]:
dft = df_feature.copy()
dft["date"] = pd.to_datetime(dft["date"]) 

## Backtesting. 

### Backtesting setup. 

In [None]:
def format_time(t):
    m_, s = divmod(t, 60)
    h , m = divmod(m_, 60)
    return f"{h:>02.0f}:{m:>02.0f}:{s:>02.0f}"


class FixedCommisionScheme(bt.CommInfoBase):
    """
    Define the commission scheme. 
        — https://github.com/mementum/backtrader/blob/e2674b1690f6366e08646d8cfd44af7bb71b3970/backtrader/comminfo.py#L30
    """
    
    params = (
        ("commission", .02),
        ("stocklike", True),
        ("commtype", bt.CommInfoBase.COMM_FIXED),
    )

    def _getcommission(self, size, price, pseudoexec):
        return abs(size) * self.p.commission


class SignalData(PandasData):
	"""
	Define pandas DataFrame structure. 
        — https://github.com/mementum/backtrader/blob/e2674b1690f6366e08646d8cfd44af7bb71b3970/backtrader/feeds/pandafeed.py#L107
	"""
    
	ohlvc =["open", "high", "low", "close", "volume"]
	cols = ohlvc + ["predicted"]

	# All parameters related to lines must have numeric values as indices into the tuples. 
	lines = tuple(cols)

	# Define parameters. 
    # — (None)   : Col is the "index" in the Pandas Dataframe. 
    # — (-1)     : No index, autodetect columns. 
    # — (>= 0)   : Numeric index for the colum identifier. 
    # — (string) : Column name (as index) in the pandas dataframe. 
	params = {c: -1 for c in cols}
	params.update({"date": None})
	params = tuple(params.items())


class MLStrategy(bt.Strategy):
    """
    Define the investment or trading strategy. 
        — https://github.com/mementum/backtrader/blob/e2674b1690f6366e08646d8cfd44af7bb71b3970/backtrader/strategy.py#L107
    """

    params = (
        ("n_positions", 10),
        ("min_positions", 5),
        ("verbose", False),
        ("log_file", "backtest.csv"),
    )

    def log(self, txt, dt=None):
        """ Logger for the strategy."""

        dt = dt or self.datas[0].date.date() 
        with open(self.p.log_file, mode="a") as f:
            log_writer = csv.writer(f)
            log_writer.writerow([dt.isoformat()] + txt.split(","))

    def notify_order(self, order):
        """Notify the completed orders."""

        if order.status in [order.Submitted, order.Accepted]:
            return

        # Check if an order has been completed broker could 
        # reject order if not enough cash. 
        if self.p.verbose:
            if order.status in [order.Completed]:
                p = order.executed.price
                if order.isbuy():
                    self.log(f"{order.data._name}, BUY executed,{p:.2f}")
                elif order.issell():
                    self.log(f"{order.data._name}, SELL executed,{p:.2f}")

            elif order.status in [order.Canceled, order.Margin, order.Rejected]:
                self.log(f"{order.data._name}, Order Canceled/Margin/Rejected")

    def prenext(self):
        """
        (bt) calls (prenext) instead of next unless all datafeeds have 
        current values so call next to avoid duplicating logic. 
        """ 
        self.next()

    def next(self):
        """Execute the strategy. Take or close the position at each iteration."""

        today = self.datas[0].date.date() 

        # # Only trade on Mondays. 
        # if today.weekday() not in [0, 3]: 
        #     return 

        up, down = {}, {}
        missing = not_missing = 0

        for data in self.datas:
            # Assign the predicted value to the (predicted) column for each date. 
            # Key = ticker name. Value = predicted forward return. 
            if data.date.date() == today:
                if data.predicted[0] > 0:
                    up[data._name] = data.predicted[0]
                elif data.predicted[0] < 0:
                    down[data._name] = data.predicted[0]

        # Sort dictionaries ascending/descending by valu. Returns list of tuples. 
        # Get the top N tickers for the highest return (long) and lowest return (short). 
        ls_short = sorted(down, key=down.get)[:self.p.n_positions]
        ls_longs = sorted(up, key=up.get, reverse=True)[:self.p.n_positions]
        n_short, n_longs = len(ls_short), len(ls_longs)
        
        # Only take positions if at least min N longs and short. 
        if n_short < self.p.min_positions or n_longs < self.p.min_positions:
            ls_longs, ls_short = [], []

        # Close the position. 0 here will set the target percentage to 0 when multiplied. 
        positions = [d._name for d, pos in self.getpositions().items() if pos]
        for ticker in positions:
            if ticker not in ls_longs + ls_short:
                self.order_target_percent(data=ticker, target=0)
                self.log(f"{ticker}, CLOSING ORDER CREATED")

        # Set target percentage of porfolio to invest into. X % of the porfolio will be 
        # divided equally among the N position or short / long. Short "borrows cash" so 
        # you have another X % of "borrowed money" for short here. 
        short_target = -1 / max(self.p.n_positions, n_short)
        longs_target = 1 / max(self.p.n_positions, n_longs)

        # Take or exit the position. If (data:=str) it will automatically get the ticker data. 
        # The target percent here indicates the percentage of porfolio to invest into. 
        for ticker in ls_short:
            self.order_target_percent(data=ticker, target=short_target)
            self.log("{ticker}, SHORT ORDER CREATED")
        for ticker in ls_longs:
            self.order_target_percent(data=ticker, target=longs_target)
            self.log("{ticker}, LONG ORDER CREATED")

### Execute the backtesting. 

In [None]:
"""
Cerebro documentation: 
	— https://github.com/mementum/backtrader/blob/e2674b1690f6366e08646d8cfd44af7bb71b3970/backtrader/cerebro.py#L747
	— https://github.com/mementum/backtrader/blob/e2674b1690f6366e08646d8cfd44af7bb71b3970/backtrader/brokers/bbroker.py
	— https://github.com/mementum/backtrader/blob/e2674b1690f6366e08646d8cfd44af7bb71b3970/backtrader/brokers/ibbroker.py
	— https://github.com/mementum/backtrader/blob/e2674b1690f6366e08646d8cfd44af7bb71b3970/backtrader/comminfo.py
    — https://github.com/mementum/backtrader/blob/master/backtrader/analyzers/pyfolio.py
"""

# Initiate a "Cerebro" instance and other parameters. 
cerebro = bt.Cerebro()
cash = 100000
comminfo = FixedCommisionScheme()

# Broker setup. 
cerebro.broker.addcommissioninfo(comminfo)
cerebro.broker.setcash(cash)

# Add multiple ticker data. 
for ticker in ticker_to_collect:
    df_data = df_feature[df_feature["ticker"] == ticker]
    bt_data = SignalData(dataname=df_data)
    cerebro.adddata(bt_data, name=ticker)

# You can add multiple analyser. 
cerebro.addanalyzer(bt.analyzers.PyFolio, _name="pyfolio")

# You can add multiple strategies. 
cerebro.addstrategy(MLStrategy, n_positions=25, min_positions=20, verbose=True, log_file="bt_log.csv")

# Execute the backtesting and get the return value of the strategy. 
start = time()
results = cerebro.run()
ending_value = cerebro.broker.getvalue()
duration = time() - start

print(f"Final Portfolio Value: {ending_value:,.2f}")
print(f"Duration: {format_time(duration)}")

### Visualise the backtesting result. 

In [None]:
# # Plot the results. Disabled because large number of datafeeds all plot separately.
# cerebro.plot() 
# figure = cerebro.plot(style="candlebars")[0][0]
# figure.savefig(f"backtrader.png")

## Porfolio assessment. 

### Get S&P benchmark. 

In [None]:
df_benchmark = web.DataReader("SP500", "fred", "2014", "2018").squeeze()
df_benchmark = df_benchmark.pct_change().tz_localize("UTC")

### Get the transactions. 

In [None]:
# Prepare (pyfolio) inputs. 
pyfolio_analyzer = results[0].analyzers.getbyname("pyfolio")
returns, positions, transactions, gross_lev = pyfolio_analyzer.get_pf_items() 

# returns.to_hdf("backtrader.h5", "returns")
# positions.to_hdf("backtrader.h5", "positions")
# transactions.to_hdf("backtrader.h5", "transactions/")
# gross_lev.to_hdf("backtrader.h5", "gross_lev")

# returns = pd.read_hdf("backtrader.h5", "returns")
# positions = pd.read_hdf("backtrader.h5", "positions")
# transactions = pd.read_hdf("backtrader.h5", "transactions/")
# gross_lev = pd.read_hdf("backtrader.h5", "gross_lev")

# Get the transaction for longs and short. 
df_transactions = transactions.groupby(level=0)
df_longs = df_transactions.value.apply(lambda x: x.where(x > 0).sum())
df_short = df_transactions.value.apply(lambda x: x.where(x < 0).sum())

### Visualise the result. 

In [None]:
fig, axes = plt.subplots(ncols=2, figsize=(15, 5))

# Merge by date index. 
df_compare_benchmark = returns.to_frame("Strategy").join(df_benchmark.to_frame("Benchmark (S&P 500)"))

# Compute the cumulative return. 
df_compare_benchmark.add(1).cumprod().sub(1).plot(ax=axes[0], title="Cumulative Return")

df_longs.plot(label="Longs", ax=axes[1], title="Positions")
df_short.plot(label="Short", ax=axes[1], title="Positions")
positions.cash.plot(label="PF Value", ax=axes[1])
axes[1].legend()

fig.tight_layout()

In [None]:
pf.create_full_tear_sheet(
	returns,
	transactions=transactions,
	positions=positions,
	benchmark_rets=df_benchmark.dropna()
)