In [205]:
# Importing the pandas library for data manipulation and analysis
import pandas as pd

# Importing the matplotlib library for creating visualizations
import matplotlib.pyplot as plt

# Importing the backtrader library for backtesting trading strategies
import backtrader as bt

# Importing the numpy library for numerical computing
import numpy as np

# Importing matrix, solvers, and blas from cvxopt library for convex optimization
from cvxopt import matrix, solvers, blas

# Importing the tabulate library for creating tables
from tabulate import tabulate

# Importing the datetime module to work with dates and times
import datetime

# Importing the matplotlib library again (this line is redundant)
import matplotlib.pyplot as plt

# Importing the cvxopt library for convex optimization 
import cvxopt as opt

# Importing quantStats for quantitative financial analysis
import quantstats as qs

import math

#import yfinance as yf

#from strategies import *

In [206]:
# Setting the maximum number of columns to display in pandas to None, which means all columns will be displayed
pd.set_option('display.max_columns', None)

# Setting the maximum number of rows to display in pandas to None, which means all rows will be displayed
pd.set_option('display.max_rows', None)

# Using the magic command %matplotlib inline to display matplotlib plots inline in a Jupyter notebook
%matplotlib inline

# Setting the random seed for numpy to 42 for reproducibility
np.random.seed(42)

# Setting the 'show_progress' option for solvers in cvxopt library to False, which means no progress will be shown while solving
solvers.options['show_progress'] = False

In [207]:
# Reading a CSV file named 'prices.csv' into a pandas DataFrame
df = pd.read_csv('prices.csv')

# Converting the 'date' column to a datetime object with the specified format
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

# Getting the minimum and maximum dates in the 'date' column
start_date = df['date'].min()
end_date = df['date'].max()

# store starting dates for each ticker
start_dates = df.groupby('ticker')['date'].min()

# Creating a DataFrame with all combinations of tickers and dates
tickers = df['ticker'].unique()
dates = pd.date_range(start=start_date, end=end_date, freq='D')
index = pd.MultiIndex.from_product([tickers, dates], names=['ticker', 'date'])
dummy_df = pd.DataFrame(index=index).reset_index()

# Merging the dummy DataFrame with the original DataFrame on the 'ticker' and 'date' columns
merged_df = pd.merge(dummy_df, df, on=['ticker', 'date'], how='left')

# Forward filling missing values in the 'close' column with the next available value within each group of 'ticker'
merged_df['close'] = merged_df.groupby('ticker')['close'].fillna(method='bfill')

# Backward filling remaining missing values in the 'close' column with the previous available value within each group of 'ticker'
merged_df['close'] = merged_df.groupby('ticker')['close'].fillna(method='ffill')

# do the same for open, high, low
merged_df['open'] = merged_df.groupby('ticker')['open'].fillna(method='bfill')
merged_df['open'] = merged_df.groupby('ticker')['open'].fillna(method='ffill')

merged_df['high'] = merged_df.groupby('ticker')['high'].fillna(method='bfill')
merged_df['high'] = merged_df.groupby('ticker')['high'].fillna(method='ffill')

merged_df['low'] = merged_df.groupby('ticker')['low'].fillna(method='bfill')
merged_df['low'] = merged_df.groupby('ticker')['low'].fillna(method='ffill')

merged_df['open']=merged_df['open']
merged_df['high']=merged_df['high']
merged_df['low']=merged_df['low']

# Updating the original DataFrame with the merged DataFrame
df = merged_df

# Converting the 'date' column to a datetime object with the specified format again (this line is redundant)
df['date'] = pd.to_datetime(df['date'], format='%Y-%m-%d')

# Setting the 'date' column as the index of the DataFrame
df = df.set_index('date')

# Printing the unique values in the 'ticker' column of the DataFrame
tickersList = df.ticker.unique()
tickersList

print(len(df))


95400


In [212]:
# Defining a class named 'MeanVarOpt' that inherits from the 'bt.Strategy' class
class MeanVarOpt(bt.Strategy):

    # Defining the strategy parameters as a tuple of tuples
    params = (
        ('cnt',10),
        ('sma', 50),  # Simple moving average period
        ('lookback', 700),  # Lookback period for calculating returns
        ('alpha',0.9),  # maximum portfolio value to be invested on a single day
        ('beta',3) # number of days you want to carry forward buying alpha % of stocks
    )
    
    # Defining the __init__ method to initialize the instance variables
    def __init__(self):
        # Initializing instance variables to store simple moving averages and closing prices over simple moving averages
        self.SMA = []
        self.curr = 0
        self.close_over_sma = []
        
        # Initializing the portfolio weights to be equal for all assets and scaled by the risk tolerance parameter
        # self.weights = np.ones(len(self.datas)) / len(self.datas)
        
        # Looping over each data feed and calculating its simple moving average using the 'bt.indicators.SMA' class
        for i, d in enumerate(self.datas):
            self.SMA.append(bt.indicators.SMA(d.close, period=self.params.sma))
        
        # Initializing an instance variable to store the selected stocks
        self.selected_stocks = []
        
        # Adding all tickers to the list of selected stocks
        for i, d in enumerate(self.datas):
            self.selected_stocks.append(d._name)
        
        self.timer=0
        self.weights=np.zeros(len(self.datas))
        self.month = None


    # Defining a method named 'calculate_portfolio_weights' to calculate the optimal portfolio weights using mean-variance optimization
    def calculate_portfolio_weights(self):
        # Checking if there is enough data to calculate returns
        if len(self.datas[0]) < self.params.lookback:
            return np.zeros(len(self.datas))
        
        # Initializing variables to store prices and returns for each data feed
        n_portfolios = len(self.datas)
        prices = np.zeros((n_portfolios, self.params.lookback))
        returns = np.zeros((n_portfolios, self.params.lookback))
        
        # Looping over each data feed and getting its closing prices and returns
        for i, d in enumerate(self.datas):
            prices[i, :] = d.close.get(size=self.params.lookback)
            returns[i, 1:] = np.diff(prices[i, :]) / prices[i, :-1]
        
        # Converting the returns array to a matrix
        returns = np.asmatrix(returns)
        
        # Defining the number of portfolios on the efficient frontier and their expected returns
        N = 100
        mus = [10**(5.0 * t/N - 1.0) for t in range(N)]
        
        # Calculating the covariance matrix of returns and converting it to a cvxopt matrix
        S = opt.matrix(np.cov(returns))
        
        # Calculating the mean returns and converting it to a cvxopt matrix
        pbar = opt.matrix(np.mean(returns, axis=1))
        
        # Creating constraint matrices for quadratic programming
        G = -opt.matrix(np.eye(n_portfolios))   # negative n x n identity matrix
        h = opt.matrix(0.0, (n_portfolios ,1))
        A = opt.matrix(1.0, (1, n_portfolios))
        b = opt.matrix(1.0)
        
        # Calculating efficient frontier weights using quadratic programming for each expected return on the frontier
        portfolios = [solvers.qp(mu*S, -pbar, G, h, A, b)['x'] for mu in mus]
        
        # Calculating risks and returns for frontier portfolios
        returns = [blas.dot(pbar, x) for x in portfolios]
        risks = [np.sqrt(blas.dot(x, S*x)) for x in portfolios]

        risks = np.array(risks)
        risks = risks * (1)
        
        # Calculating the 2nd degree polynomial of the frontier curve
        m1 = np.polyfit(returns, risks, 2)
        
        # Calculating the expected return that minimizes portfolio variance (i.e., risk)
        x1 = np.sqrt(m1[2] / m1[0])
        
        # Calculating the optimal portfolio weights using quadratic programming
        wt = solvers.qp(opt.matrix(x1 * S), -pbar, G, h, A, b)['x']
        
        # Updating the instance variable with the calculated weights and normalizing them to sum to 1
        weights = np.asarray(wt)
        weights[weights < 0] = 0

        # if stock is not selected set weight to 0
        for i, d in enumerate(self.datas):
            if(d._name not in self.selected_stocks):
                weights[i] = 0

        if(np.sum(weights) == 0):
            return weights
        
        weights /= np.sum(weights)

        return weights

    
    def buy_residual_stocks(self):
        #weights=self.weights*self.params.alpha
        weights = self.weights
        portfolio_value = self.broker.getvalue()
        for i, d in enumerate(self.datas):
            if  start_dates[d._name] < self.data.datetime.date(0) - datetime.timedelta(days=self.params.lookback):
                # Placing an order to adjust the position size to match the calculated weight
                no_of_stocks=portfolio_value*weights[i]/d.open[1]

                # round down
                no_of_stocks = int(no_of_stocks)
                self.buy(data=d, size=no_of_stocks)
        
        #self.weights-=weights
        

    # Defining the 'next' method to execute on each iteration of the backtesting loop
    def nextnexttest(self):
        # Getting the current date of the first data feed
        curr_date = self.data.datetime.date(0)
        # Checking if the current day is the rebalance day
        if self.month == self.data.datetime.date().month:
            #if(self.timer > 0):
                #self.timer -= 1
                #self.buy_residual_stocks()
            return
            
        elif self.month == None or self.month!=self.data.datetime.date().month:
            
            # Resetting the list of selected stocks
            self.selected_stocks = []

            # Looping over each data feed and checking if its current closing price is above its simple moving average
            for i, d in enumerate(self.datas):
                if d.close[0] > self.SMA[i][0]:
                    # Adding the ticker to the list of selected stocks
                    self.selected_stocks.append(d._name)

            # Calculating the optimal portfolio weights
            self.weights=self.calculate_portfolio_weights()

            for i, d in enumerate(self.datas):
                self.close(data=d)
            
            self.month = self.data.datetime.date().month

            #self.timer=self.params.beta
            self.buy_residual_stocks()
            
    def next(self):
        if self.curr % self.params.cnt ==0:
            if self.params.cnt == 1 and self.curr > 95395:
                return 
            self.selected_stocks = []

            # Looping over each data feed and checking if its current closing price is above its simple moving average
            for i, d in enumerate(self.datas):
                if d.close[0] > self.SMA[i][0]:
                    # Adding the ticker to the list of selected stocks
                    self.selected_stocks.append(d._name)

            # Calculating the optimal portfolio weights
            self.weights=self.calculate_portfolio_weights()

            for i, d in enumerate(self.datas):
                self.close(data=d)
            
            
            self.buy_residual_stocks()
        
        self.curr+=1
            

In [214]:
MeanVarOpt.cat = 20

In [215]:
# Creating an instance of the 'bt.Cerebro' class
#cb = bt.Cerebro()
cb = bt.Cerebro()
cb.addanalyzer(bt.analyzers.SharpeRatio, riskfreerate=0.0)
cb.addanalyzer(bt.analyzers.Returns)
cb.addanalyzer(bt.analyzers.DrawDown)
# Adding the 'MeanVarOpt' strategy to the Cerebro instance
cb.addstrategy(MeanVarOpt)
#cb.optstrategy(MeanVarOpt, sma=range(50, 250), lookback=range(50, 500))

# Setting the starting portfolio value
starting_portfolio_value = 10000000.0
cb.broker.setcash(starting_portfolio_value)

# Looping over each unique value in the 'ticker' column of the DataFrame
for ticker in df['ticker'].unique():
    # Creating a new DataFrame with only rows where the 'ticker' column is equal to the current ticker
    df1 = df[df['ticker'] == ticker]
    
    # Creating a data feed from the new DataFrame using the 'bt.feeds.PandasData' class
    df_feed = bt.feeds.PandasData(dataname=df1,
                                  open='open',
                                  close='close',
                                  high='high',
                                  low='low',
                                  volume=None,
                                  openinterest=None,
                                  datetime=None,
                                  fromdate=datetime.datetime(2010, 1, 1),
                                  todate=datetime.datetime(2020, 12, 31))
    
    # Adding the data feed to the Cerebro instance with a name equal to the current ticker
    cb.adddata(df_feed, name=ticker)

# add analyzers
cb.addanalyzer(bt.analyzers.PyFolio, _name='pyfolio')
#cb.broker.setcommission(commission=2.0, margin=2000.0, mult=10.0)

# Running the backtesting using the Cerebro instance
results=cb.run()


  if  start_dates[d._name] < self.data.datetime.date(0) - datetime.timedelta(days=self.params.lookback):


In [216]:
print(results[0].analyzers.drawdown.get_analysis()['max']['drawdown'])
print(results[0].analyzers.returns.get_analysis()['rnorm100'])
print(results[0].analyzers.sharperatio.get_analysis()['sharperatio'])

38.96866717088283
13.164931788525722
0.7746176481748461


In [120]:
#200 400 0.447
#250 400 0.563
#200 500 0.544

#changing rebalancing freq for 250 500 --- Sharpe, CAGR 
# 50 days ---> 0.582, 12.16
# 20 days ~ month --> 0.539, 10.21 
# 10 days ---> 0.53, 13.048
# 5 days ---> 0.576 , 13.62
# 3 days ---> 0.487 , 10.39
# 1 day ---> 


#changing rebalancing freq for 50 700 --- Sharpe, CAGR 
# 50 days ---> 0.774, 16.02
# 20 days ~ month --> 0.79 , 15.12 
# 10 days ---> 0.65 , 14.16
# 5 days --->  0.736 , 13.79 
# 3 days ---> 0.60 , 13.92
# 1 day ---> 

# Low SMA with less frequent rebalancing and Higher SMA with more frequent rebalancing 

In [211]:
# Getting the final portfolio value from the broker in the Cerebro instance
final_portfolio_value = cb.broker.getvalue()

# Printing the starting and final portfolio values
print(f'Starting Portfolio Value: {starting_portfolio_value:.2f}')
print(f'Final Portfolio Value: {final_portfolio_value:.2f}')

# Calculating and printing the absolute and percentage profit
print(f'Profit: {final_portfolio_value - starting_portfolio_value:.2f}')
print(f'% Profit: {(final_portfolio_value - starting_portfolio_value) / starting_portfolio_value * 100:.2f}%')

Starting Portfolio Value: 10000000.00
Final Portfolio Value: 65065551.23
Profit: 55065551.23
% Profit: 550.66%


In [186]:
%matplotlib inline
# Setting the default figure size for matplotlib
plt.rcParams['figure.figsize'] = [15, 100]

# Updating the default font size for matplotlib
plt.rcParams.update({'font.size': 12})

# Plotting the results of the backtesting using the 'plot' method of the Cerebro instance
cb.plot()

<IPython.core.display.Javascript object>

ValueError: Axis limits cannot be NaN or Inf

In [None]:
strat = results[-1]
portfolio_stats = strat.analyzers.getbyname('pyfolio')
returns, positions, transactions, gross_lev = portfolio_stats.get_pf_items()
returns.index = returns.index.tz_convert(None)
qs.reports.html(returns, output='stats.html', title='SMA Strat-Project 1')