#  **COMM475--Investment Policies: The Chen, Roll, and Ross (1986) model**

Instructor: Lorenzo Garlappi © 2024*

TA: Tianping Wu




In the following notebok we provide the code to estimate factor exposures ($\beta$'s) and risk premia ($\lambda$'s) in the Chen, Roll and Ross (1986) model.


We first import some packages:

In [14]:
import numpy as np
import statsmodels.api as sm #package for the regression
from datetime import datetime
import pandas as pd

import certifi
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

# import urllib.request

# # Create unverified context
# ctx = ssl.create_default_context()
# ctx.check_hostname = False
# ctx.verify_mode = ssl.CERT_NONE

We then read the data:

In [15]:
#read the data
#Earliest date is 1941-01-01
#Latest date is 2023-12-01

url = 'https://raw.githubusercontent.com/lorenzogarlappi/COMM475/main/Data/size_20_pivot.csv'
data_df = pd.read_csv(url)

url = 'https://raw.githubusercontent.com/lorenzogarlappi/COMM475/main/Data/Macro_PMF.csv'
Macro_PMF = pd.read_csv(url)




Let's take a peak at the data

In [16]:
# These are 20 stock portfolios sorted by size from small to large
data_df.head()

Unnamed: 0,DATE,q5,q10,q15,q20,q25,q30,q35,q40,q45,...,q55,q60,q65,q70,q75,q80,q85,q90,q95,q100
0,1926-01-01,0.070273,0.082493,0.041497,0.118465,0.014394,0.013319,0.015596,0.020442,0.027572,...,0.02722,0.013469,0.01419,0.001326,-0.006344,0.001702,0.00647,0.002636,0.00374,0.000541
1,1926-02-01,-0.105512,-0.031236,-0.080682,-0.071919,-0.081469,-0.043153,-0.051504,-0.076944,-0.055061,...,-0.054275,-0.043196,-0.048626,-0.060541,-0.057929,-0.042949,-0.063193,-0.022489,-0.008666,-0.03486
2,1926-03-01,-0.122931,-0.167902,-0.133267,-0.128605,-0.132859,-0.084897,-0.118602,-0.102891,-0.115693,...,-0.062408,-0.095772,-0.117057,-0.109062,-0.047465,-0.074271,-0.042725,-0.080428,-0.061029,-0.06389
3,1926-04-01,-0.058916,0.039744,0.032174,0.0181,-2.6e-05,0.008829,0.049754,0.043046,0.037695,...,0.041214,0.017511,0.057184,0.050187,0.05747,0.025805,0.043356,0.040189,0.034364,0.039071
4,1926-05-01,-0.097616,-0.02338,-0.032791,-0.011486,0.009077,0.038148,0.011658,0.005412,0.003077,...,-0.005173,0.01024,0.022679,0.005009,0.009269,0.011008,0.016329,0.017278,0.009732,0.012198


In [17]:
# These are the macroeconomic variables
Macro_PMF.head()

Unnamed: 0,DATE,INDPRO,BAA,AAA,L1_INDPRO,MP,UPR,UTS,UTS_Interpolated,UI,DEI,TB1M
0,1941-01-31,10.0019,4.38,2.75,,,0.0163,,,,,
1,1941-02-28,10.2976,4.42,2.78,10.0019,0.029136,0.0164,,,,,
2,1941-03-31,10.6203,4.38,2.8,10.2976,0.030856,0.0158,,,,,
3,1941-04-30,10.6471,4.33,2.82,10.6203,0.00252,0.0151,,,,,
4,1941-05-31,11.1311,4.32,2.81,10.6471,0.044455,0.0151,,,,,


We now perform some preliminary adjustment on the data to prepare them for the time-series and cross sectional analysis

In [18]:
#datatime
Macro_PMF['DATE'] = pd.to_datetime(Macro_PMF['DATE'], format='%Y-%m-%d')
data_df['DATE'] = pd.to_datetime(data_df['DATE'], format='%Y-%m-%d')

# Adjust 'DATE' in Macro_PMF to the start of the month
Macro_PMF['DATE'] = Macro_PMF['DATE'].apply(lambda d: pd.Timestamp(year=d.year, month=d.month, day=1))


#Left join the data_df with Macro_PMF
data_df = pd.merge(data_df, Macro_PMF, on=['DATE'], how='left')

# Set the 'Date' column as the index
data_df.set_index('DATE', inplace=True)

#to numeric 
data_df['UI'] = pd.to_numeric(data_df['UI'], errors='coerce')
data_df['DEI'] = pd.to_numeric(data_df['DEI'], errors='coerce')


data_df.head()  

Unnamed: 0_level_0,q5,q10,q15,q20,q25,q30,q35,q40,q45,q50,...,BAA,AAA,L1_INDPRO,MP,UPR,UTS,UTS_Interpolated,UI,DEI,TB1M
DATE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1926-01-01,0.070273,0.082493,0.041497,0.118465,0.014394,0.013319,0.015596,0.020442,0.027572,0.000793,...,,,,,,,,,,
1926-02-01,-0.105512,-0.031236,-0.080682,-0.071919,-0.081469,-0.043153,-0.051504,-0.076944,-0.055061,-0.055372,...,,,,,,,,,,
1926-03-01,-0.122931,-0.167902,-0.133267,-0.128605,-0.132859,-0.084897,-0.118602,-0.102891,-0.115693,-0.104275,...,,,,,,,,,,
1926-04-01,-0.058916,0.039744,0.032174,0.0181,-2.6e-05,0.008829,0.049754,0.043046,0.037695,0.04994,...,,,,,,,,,,
1926-05-01,-0.097616,-0.02338,-0.032791,-0.011486,0.009077,0.038148,0.011658,0.005412,0.003077,0.001915,...,,,,,,,,,,


# Step A: We choose the start month (Jan 1958) and end month (Dec 1984):

In [19]:
# Define the start month and end month for the analysis - Here you can choose a different time period at your convenience
start_month = pd.to_datetime('1958-01-01') #Earliest date is 1941-01-01
end_month = pd.to_datetime('1984-12-01')   #latest date is 2023-12-01

# Step B: Use previous 60 months to estimate factor sensitivities, $\beta_{j p}$ :

$$
r_{p}=\alpha_{p}+I_{1} \beta_{1 p}+I_{2} \beta_{2 p}+I_{3} \beta_{3 p}+I_{4} \beta_{4 p}+I_{5} \beta_{5 p} + e_{s}
$$

We need to run 20 rolling multiplde regressions (one for each of the 20 different portfolios, $p$ ), each with 60 observations. This gives estimates of $\beta_{j p}$ for $j=1,5$ and $p=1,20$ for each month

We first define the function to estimate beta:

In [20]:
# Define the example function
def estimate_beta(returns, shocks):
    # Ensure that the returns and shocks have the same index
    common_index = returns.index.intersection(shocks.index)
    returns = returns.loc[common_index]#portfolio returns
    shocks = shocks.loc[common_index]#economic shocks

    # Add a constant to the model (for the intercept)
    X = sm.add_constant(shocks)

    # Perform the regression of portfolio returns on the economic shocks
    model = sm.OLS(returns, X).fit()

    # Return the estimated coefficients
    return model.params

We define five economic shocks:

In [21]:
# demean the macro factors 
columns_to_demean = ['MP', 'DEI', 'UI', 'UPR', 'UTS']
# Demean the specified columns
for column in columns_to_demean:
    data_df[column] = data_df[column] - data_df[column].mean()

# Create labels for the five economic shocks
shocks = ['MP', 'DEI', 'UI', 'UPR', 'UTS']

#Create labels for the 20 portfolios sorted by size
portfolios = ['q5', 'q10', 'q15', 'q20', 'q25', 'q30', 'q35', 'q40', 'q45', 'q50', 
                'q55', 'q60', 'q65', 'q70', 'q75', 'q80', 'q85', 'q90', 'q95', 'q100']

We then run rolling 60-month regressions to generate the beta for each portfolio in each month:

In [22]:
# Length of estimation window in months
window_length = 60

# Placeholder for storing the beta estimates for each month
monthly_beta_estimates = {}

# 12 months in a year
num_of_month = 12

def calculate_month_diff(timestamp1, timestamp2):
    "this function calculates the difference in months between two timestamps"
    
    months_diff = (timestamp2.year - timestamp1.year) * num_of_month + timestamp2.month - timestamp1.month
    return months_diff

#number of months in the whole sample
num_month = calculate_month_diff(start_month, end_month) + 1     

# Loop through each month from start_month  to end_month1970 to December 1984
for i in range(num_month):
    # Determine the 60-month period that precedes the current month
    period_start = start_month - pd.DateOffset(months = window_length - i)
    period_end = period_start + pd.DateOffset(months = window_length)
    
    # Filter the data for this 60-month period
    period_data = data_df[(data_df.index >= period_start) & (data_df.index < period_end)]
    
    # extract the value of economic shocks in the period
    shocks_period = period_data[shocks]

    
    # extract the value of portfolio returns in the period
    portfolios_period = period_data[portfolios]
    
    # Estimate the betas for each portfolio using the filtered data
    # betas = ['beta' + str(i + 1) for i in range(len(shocks))]
    # define betas as beta + the strings contained in the array shocks = ['MP', 'DEI', 'UI', 'UPR', 'UTS'], that is beta_MP, beta_DEI, beta_UI, beta_UPR, beta_UTS
    betas = ['beta_' + shock for shock in shocks]

    df = estimate_beta(portfolios_period, shocks_period).T  #the .T attribute swaps rows with columns.
    df.columns = ['const'] + betas #It concatenates the list ['const'] with the list of beta names betas. As a result, the column names of df become 'const' followed by the beta names.
    df = df.drop(['const'], axis = 1) # drop the label "const" from the columns of df
    
    # Import the return of each portfolio
    df_return = data_df.loc[period_end] 
    df['return'] = df_return[portfolios].values
    df.index = df_return[portfolios].index
    
    # Store the results for the current month
    monthly_beta_estimates[period_end] = df


In [31]:
## You are free to search betas for each month
## for example:
year = 1970
month = 7

year_start = start_month.year #1970
month_start = start_month.month #1

#the rank of the month from the start month
key_num = (year - year_start)* num_of_month + month - month_start

list(monthly_beta_estimates.values())[key_num]

Unnamed: 0,beta_MP,beta_DEI,beta_UI,beta_UPR,beta_UTS,return
q5,0.269114,2.999871,-3.09413,0.236897,5.002535,0.01627
q10,0.901579,1.108077,-4.207763,2.331567,4.708922,0.046365
q15,0.911679,0.485452,-3.543294,2.640206,4.973444,0.062226
q20,1.154734,1.85979,-3.036203,0.790312,4.175372,0.060029
q25,0.914255,1.835422,-3.984835,0.760029,3.118303,0.075475
q30,1.1537,-1.498617,-4.540084,2.789238,4.23283,0.08551
q35,1.171698,2.179321,-1.986029,1.680209,3.373652,0.081691
q40,0.94435,0.414455,-4.507187,1.406828,3.130408,0.096015
q45,0.922474,2.185047,-1.739834,4.088665,3.331039,0.095403
q50,1.093034,-1.699899,-5.29991,0.99115,2.456156,0.082849


# Step C: For each month, do a cross-sectional regression with 20 observations:

$$
\operatorname{E}\left[r_{p}\right]=\mu+\lambda_{1} \beta_{1 p}+\lambda_{2} \beta_{2 p}+\lambda_{3} \beta_{3 p}+\lambda_{4} \beta_{4 p}+\lambda_{5} \beta_{5 p}+e_{s}
$$

Cross sectional regressions wll give us estimates of market prices of risk for each month: $\lambda_{j}, j=1,5$.

We define the function to estmate the price of risk lambda:

In [23]:
# Estimates the market prices of risk (lambda coefficients) using a cross-sectional regression where Y = return and X=betas.
def estimate_lambda(betas, returns):
    
#avg_beta: average beta coefficients for portfolios at month t.
#avg_returns: average returns of portfolios at month t.
    

    # Add a constant to the model
    X = sm.add_constant(betas)

    # Perform the cross-sectional regression
    model = sm.OLS(returns, X).fit()

    # Return the estimated lambda coefficients
    return model.params

Let's estimate the lambda for each period:

In [24]:
# create a dataframe for storing lambda
df_lambda = pd.DataFrame(np.ones((num_month, len(betas) + 1)))
# df_lambda.columns = ['const'] + ['lambda' + str(i+1) for i in range(len(betas))]
df_lambda.columns = ['const'] + ['lambda_' + shock for shock in shocks]

df_lambda.index = [start_month + pd.DateOffset(months = i) for i in range(num_month)]
months = list(monthly_beta_estimates.keys())

# estimate lambda for each regression
for i in range(num_month):
    df_reg = monthly_beta_estimates[months[i]]
    params = estimate_lambda(df_reg[betas], df_reg['return'])
    df_lambda.iloc[i,] = params
df_lambda

Unnamed: 0,const,lambda_MP,lambda_DEI,lambda_UI,lambda_UPR,lambda_UTS
1958-01-01,0.185306,0.070907,0.015365,-0.020633,-0.005967,-0.023793
1958-02-01,-0.002152,-0.016441,-0.008074,0.005969,-0.000205,-0.009152
1958-03-01,0.012892,-0.030345,-0.002917,-0.000041,0.001995,0.009890
1958-04-01,0.052358,0.011137,0.013226,-0.010485,0.000289,0.002428
1958-05-01,0.046654,0.016278,-0.005347,0.004473,-0.000437,-0.009060
...,...,...,...,...,...,...
1984-08-01,0.040637,-0.066017,0.001391,0.001343,0.009570,0.019644
1984-09-01,-0.044411,-0.016408,-0.009521,0.008386,0.010440,0.058928
1984-10-01,-0.006140,-0.026280,-0.009710,-0.000080,0.000780,0.037126
1984-11-01,-0.033811,-0.041735,-0.003183,0.004265,0.006764,0.014895


Let's compute the summary statistics for df_lambda 


In [25]:
# summary statistics for lmbda
df_lambda.describe()

Unnamed: 0,const,lambda_MP,lambda_DEI,lambda_UI,lambda_UPR,lambda_UTS
count,324.0,324.0,324.0,324.0,324.0,324.0
mean,0.005182,-0.004553,0.001668,-0.0013,0.001413,-0.000487
std,0.05399,0.026536,0.007863,0.006974,0.007309,0.021892
min,-0.158265,-0.067857,-0.029944,-0.0288,-0.025748,-0.0774
25%,-0.028786,-0.021502,-0.003059,-0.005034,-0.002424,-0.011145
50%,0.00217,-0.006394,0.000974,-0.001094,0.000704,-0.002811
75%,0.032054,0.010767,0.005557,0.002678,0.004123,0.00948
max,0.226147,0.08961,0.04221,0.035948,0.033293,0.094571


In [None]:
# print the mean of each lambda
df_lambda.mean()
