# MMF1921 (Summer 2024) - Project 1
 
 The purpose of this program is to implement the following factor models
 
     a) Multi-factor OLS regression
     b) Fama-French 3-factor model
     c) LASSO
     d) Best Subset Selection
 
 and to use these factor models to estimate the asset expected returns and covariance matrix. 
 
These parameters will then be used to test the out-of-sample performance using MVO to construct optimal portfolios.
 
 Use can use this template to write your program.

     Student Name: Kaiwen Shen
     Student ID: 1009970239

In [25]:
%load_ext autoreload
%autoreload 2

In [2]:
import time
import math
from scipy.stats import gmean
import matplotlib.pyplot as plt
from functions.BSS import *
from functions.FF import *
from functions.LASSO import *
from functions.MVO import *
from functions.OLS import *
import pandas as pd

adjClose = pd.read_csv("MMF1921_AssetPrices.csv", index_col=0)
factorRet = pd.read_csv("MMF1921_FactorReturns.csv", index_col=0)

In [3]:
adjClose.index = pd.to_datetime(adjClose.index)
factorRet.index = pd.to_datetime(factorRet.index)

In [4]:
#rf and factor returns
riskFree = factorRet['RF']
factorRet = factorRet.loc[:, factorRet.columns != 'RF'];

In [5]:
#Identify the tickers and the dates
tickers = adjClose.columns
dates = factorRet.index

In [6]:
# Calculate the stocks monthly excess returns
# pct change and drop the first null observation
returns = adjClose.pct_change(1).iloc[1:, :]

returns = returns - np.diag(riskFree.values) @ np.ones_like(returns.values)
# Align the price table to the asset and factor returns tables by discarding the first observation.
adjClose = adjClose.iloc[1:, :]

In [7]:
assert adjClose.index[0] == returns.index[0]
assert adjClose.index[0] == factorRet.index[0]

# 2. Define your initial parameters

In [8]:
#Initial budget to invest ($100,000)
initialVal = 100000

#Start of in-sample calibration period
calStart = pd.to_datetime('2008-01-01', format='%Y-%m-%d')
calEnd = calStart + pd.offsets.DateOffset(years=4) - pd.offsets.DateOffset(days=1)

#Start of out-of-sample test period
testStart = pd.to_datetime('2012-01-01', format='%Y-%m-%d')
testEnd = testStart + pd.offsets.DateOffset(years=1) - pd.offsets.DateOffset(days=1)

#Number of investment periods (each investment period is 1 year long)
NoPeriods = 5

#Factor models
#Note: You must populate the functions OLS.py, FF.py, LASSO.py and BSS.py with your own code.
FMList = [OLS, FF, LASSO, BSS]
NoModels = len(FMList)

#Tags for the portfolios under the different factor models
tags = ['OLS portfolio', 'FF portfolio', 'LASSO portfolio', 'BSS portfolio']

# Collecting data for the input of factor combining model

In [9]:
train_period_ret = []
train_period_factor_ret = []
for t in range(NoPeriods):
    # Subset the returns and factor returns corresponding to the current calibration period.
    periodReturns = returns[(calStart <= returns.index) & (returns.index <= calEnd)]
    periodFactRet = factorRet[(calStart <= factorRet.index) & (factorRet.index <= calEnd)]
    
    # Update your calibration and out-of-sample test periods
    calStart = calStart + pd.offsets.DateOffset(years=1)
    calEnd = calStart + pd.offsets.DateOffset(years=4) - pd.offsets.DateOffset(days=1)
    train_period_ret.append(periodReturns)  
    train_period_factor_ret.append(periodFactRet)

In [24]:
OLS(train_period_ret[0], train_period_factor_ret[0],0,0)

TypeError: OLS() missing 2 required positional arguments: 'lambda_' and 'K'

In [13]:
train_period_ret[0]

Unnamed: 0_level_0,F,CAT,DIS,MCD,KO,PEP,WMT,C,WFC,JPM,AAPL,IBM,PFE,JNJ,XOM,MRO,ED,T,VZ,NEM
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2008-01-31,-0.015473,-0.018099,-0.077689,-0.092577,-0.040718,-0.105526,0.065436,-0.034369,0.126751,0.093596,-0.31874,-0.011258,0.025617,-0.055473,-0.07991,-0.228686,-0.110186,-0.066509,-0.104237,0.108897
2008-02-29,-0.017866,0.017576,0.084826,0.015525,-0.010453,0.02089,-0.023965,-0.159624,-0.134773,-0.143705,-0.077689,0.065824,-0.034022,-0.013543,0.010064,0.133412,-0.049913,-0.09639,-0.065941,-0.058074
2008-03-31,-0.125743,0.0807,-0.03348,0.028978,0.046209,0.041791,0.065628,-0.098284,-0.006147,0.054881,0.146116,0.009542,-0.062292,0.045266,-0.029628,-0.143912,-0.030802,0.097927,0.001879,-0.114691
2008-04-30,0.442256,0.048846,0.031661,0.066516,-0.034657,-0.052631,0.098807,0.177939,0.020537,0.116747,0.210395,0.046489,-0.040978,0.032422,0.098578,-0.002458,0.046059,0.019325,0.07105,-0.025862
2008-05-31,-0.178555,0.007482,0.034278,-0.006164,-0.029148,-0.005156,-0.001822,-0.12489,-0.065469,-0.099387,0.083282,0.074847,-0.02363,-2.1e-05,-0.043814,0.13098,0.004983,0.028941,-0.00206,0.073296
2008-06-30,-0.294347,-0.108428,-0.073129,-0.047837,-0.087874,-0.064725,-0.028371,-0.236054,-0.140256,-0.203793,-0.114601,-0.085915,-0.099324,-0.03766,-0.008798,0.00764,-0.055211,-0.157339,-0.081502,0.098054
2008-07-31,-0.003579,-0.054243,-0.028744,0.062001,-0.010734,0.045205,0.04156,0.132748,0.273026,0.19593,-0.052205,0.078227,0.067189,0.06269,-0.088871,-0.04777,0.014105,-0.075887,-0.028225,-0.082021
2008-08-31,-0.072133,0.016105,0.064598,0.042687,0.009768,0.027546,0.010511,0.014751,0.009917,-0.05397,0.065262,-0.046454,0.039125,0.033951,-0.001431,-0.085301,0.043534,0.036999,0.030427,-0.060933
2008-09-30,0.164419,-0.158859,-0.052814,-0.007141,0.021183,0.045635,0.012382,0.078542,0.238341,0.211802,-0.331058,-0.040685,-0.03656,-0.017828,-0.030871,-0.116876,0.048867,-0.128727,-0.087776,-0.14
2008-10-31,-0.579646,-0.353507,-0.156551,-0.061902,-0.167593,-0.200884,-0.068925,-0.327149,-0.093526,-0.110769,-0.054205,-0.205913,-0.040388,-0.115407,-0.046383,-0.270928,0.00758,-0.026481,-0.060875,-0.321233


In [16]:
train_period_ret[0]

(48, 20)

In [23]:
np.linalg.inv(train_period_factor_ret[0].T@train_period_factor_ret[0])@train_period_factor_ret[0].T@train_period_ret[0]

Unnamed: 0,F,CAT,DIS,MCD,KO,PEP,WMT,C,WFC,JPM,AAPL,IBM,PFE,JNJ,XOM,MRO,ED,T,VZ,NEM
0,2.453207,2.075546,1.168497,0.637974,0.840885,0.617847,0.610607,1.68992,0.756886,0.63753,1.611563,0.544077,0.648099,0.595626,0.982943,1.317044,0.411077,0.917225,0.963543,0.894223
1,0.345559,-0.041315,0.23235,-0.48593,-0.710597,-0.341574,-0.859078,-2.017873,-0.391265,-0.69779,0.278328,0.071926,-0.948474,-0.206513,-1.022254,0.144176,-0.244903,-0.290616,-0.496232,-1.069428
2,-1.203626,0.043108,0.598185,0.308976,-0.180318,0.216121,0.080789,3.898695,2.003844,0.8708,-1.202773,-0.257439,0.363318,0.189532,-0.116739,-0.058537,0.107312,-0.473687,-0.452783,-0.271194
3,4.612604,2.195475,0.329043,1.131191,0.862618,0.310051,1.549609,-1.331522,1.854509,0.855621,0.131173,0.1062,0.483729,0.272808,0.435866,-0.94306,0.263261,0.332799,0.374375,0.928234
4,0.903673,1.649337,-0.189192,-0.708705,0.320987,-0.20456,-0.287609,-3.226086,-0.477758,-0.800047,-0.877985,0.005751,0.088985,-0.398182,0.694087,-0.276166,0.281554,0.720293,1.046372,0.086264
5,-2.035803,-0.155217,0.034405,0.113649,-0.026778,0.179011,0.042484,-0.114242,-0.671359,-0.532335,-0.071084,-0.152585,-0.092179,0.07102,0.246597,0.221133,0.053875,0.066846,0.132381,-0.071515
6,-0.436452,0.388193,-0.092943,0.05682,-0.027924,0.07961,0.296003,-0.376844,-0.031166,0.354205,-0.780969,0.360011,0.357965,0.048845,-0.172872,-0.053645,-0.007534,-0.32821,-0.201262,-0.051625
7,1.23668,-0.134988,-0.313289,0.110598,0.167087,0.127938,0.727436,0.463114,0.812769,1.449203,0.43288,-0.206418,0.328199,-0.002763,-0.0292,-0.710944,-0.123572,0.258562,0.037533,-0.508265


# 3. Construct and rebalance your portfolios

Here you will estimate your input parameters (exp. returns and cov. matrix etc) from the Fama-French factor models.
You will have to re-estimate your parameters at the start of each rebalance period, and then re-optimize and rebalance your portfolios accordingly.

Ensure you re-initialize the dates above if you run this cell repeatedly. 

In [None]:
# Initiate counter for the number of observations per investment period
toDay = 0

# Preallocate the space for the per period value of the portfolios 
currentVal = {i: np.zeros(NoPeriods) for i in range(NoModels)}

# Number of assets
n = len(tickers)

# Preallocate space for the portfolio weights
x = {i: np.zeros([n, NoPeriods]) for i in range(NoModels)}

# Initialize dictionaries to hold Q, mu and the number of shares 
# for each model. These are overwritten at each rebalancing point
mu = {}
Q = {}
NoShares = {}

# Empty lists to measure the value of the portfolio over the period
portfValue = {i: [] for i in range(NoModels)}

#--------------------------------------------------------------------------
# Set the value of lambda and K for the LASSO and BSS models, respectively
#--------------------------------------------------------------------------
lambda_ = 0.5
K = 4

for t in range(NoPeriods):
    # Subset the returns and factor returns corresponding to the current calibration period.
    periodReturns = returns[(calStart <= returns.index) & (returns.index <= calEnd)]
    periodFactRet = factorRet[(calStart <= factorRet.index) & (factorRet.index <= calEnd)]

    current_price_idx = (calEnd - pd.offsets.DateOffset(days=7) <= adjClose.index) & (adjClose.index <= calEnd)
    currentPrices = adjClose[current_price_idx]

    # Subset the prices corresponding to the current out-of-sample test period.
    periodPrices_idx = (testStart <= adjClose.index) & (adjClose.index <= testEnd)
    periodPrices = adjClose[periodPrices_idx]

    assert len(currentPrices) == 1
    # Set the initial value of the portfolio or update the portfolio value
    if t == 0:
        for i in range(NoModels):
            currentVal[i][0] = initialVal  # all models start with the same amount of $
    else:
        for i in range(NoModels):
            currentVal[i][t] = (currentPrices @ NoShares[i].values.T).squeeze()

    # Update counter for the number of observations per investment period
    fromDay = toDay
    toDay = toDay + len(periodPrices)

    # Calculate 'mu' and 'Q' using the 4 factor models.
    # Note: You need to write the code for the 4 factor model functions. 
    for i in range(NoModels):
        mu[i], Q[i] = FMList[i](periodReturns, periodFactRet, lambda_, K)

    # Optimize your portfolios to get the weights 'x'
    # Note: You need to write the code for MVO with no short sales
    for i in range(NoModels):
        # Define the target return as the geometric mean of the market 
        # factor for the current calibration period
        targetRet = gmean(periodFactRet.iloc[:, 0] + 1) - 1

        x[i][:, t] = MVO(mu[i], Q[i], targetRet)

        # Calculate the optimal number of shares of each stock you should hold
    for i in range(NoModels):
        # Number of shares your portfolio holds per stock
        NoShares[i] = x[i][:, t] * currentVal[i][t] / currentPrices

        # Weekly portfolio value during the out-of-sample window
        portfValue[i].append(periodPrices @ NoShares[i].values.T)

    # Update your calibration and out-of-sample test periods
    calStart = calStart + pd.offsets.DateOffset(years=1)
    calEnd = calStart + pd.offsets.DateOffset(years=4) - pd.offsets.DateOffset(days=1)

    testStart = testStart + pd.offsets.DateOffset(years=1)
    testEnd = testStart + pd.offsets.DateOffset(years=1) - pd.offsets.DateOffset(days=1)

for i in range(NoModels):
    portfValue[i] = pd.concat(portfValue[i], axis=0)

# Overwrite into a dataframe
portfValue = pd.DataFrame([portfValue[i].values.squeeze() for i in range(NoModels)],
                          index=tags, columns=portfValue[0].index).T


# 4. Results

In [None]:
#--------------------------------------------------------------------------
# 4.1 Evaluate any measures of fit of the regression models to assess their
# in-sample quality. You may want to modify Section 3 of this program to
# calculate the quality of fit each time the models are recalibrated.
#--------------------------------------------------------------------------

#--------------------------------------------------------------------------
# 4.2 Calculate the portfolio average return, variance (or standard 
# deviation), and any other performance and/or risk metric you wish to 
# include in your report.
#--------------------------------------------------------------------------

In [None]:
#--------------------------------------------------------------------------
# 4.3 Plot the portfolio wealth evolution 
# 
# Note: The code below plots all portfolios onto a single plot. However,
# you may want to split this into multiple plots for clarity, or to
# compare a subset of the portfolios. 
#--------------------------------------------------------------------------
# Calculate the dates of the out-of-sample period

fig = plt.figure(1)
portfValue.plot(title='Portfolio wealth evolution',
                ylabel='Total wealth',
                figsize=(6, 3),
                legend=True)
plt.savefig("images/wealth.svg")

#--------------------------------------------------------------------------
# 4.4 Plot the portfolio weights period-over-period
#--------------------------------------------------------------------------
# OLS Portfolio weights

fig2 = plt.figure(2)
x[0][x[0] < 0] = 0
weights = pd.DataFrame(x[0][(x[0] > 0).any(axis=1)], index=tickers[(x[0] > 0).any(axis=1)])
weights.columns = [col + 1 for col in weights.columns]
weights.T.plot.area(title='Portfolio weights',
                    ylabel='Weights', xlabel='Rebalance Period',
                    figsize=(6, 3),
                    legend=True, stacked=True)
plt.legend(loc='center left', bbox_to_anchor=(1.0, 0.5))
plt.savefig("images/weights.svg")
#
# ###########################################################################
# # Program End