In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [2]:
data = pd.read_csv('TrainExer 3-1.csv')

In [3]:
data.head()

Unnamed: 0,Year,Index,Dividends,Riskfree,LogEqPrem,BookMarket,NTIS,DivPrice,EarnPrice,Inflation
0,1927,17.66,0.77,0.0317,0.280823,0.374689,0.073955,-3.133,-2.767,-0.021176
1,1928,24.35,0.85,0.0426,0.313824,0.259667,0.042639,-3.355,-2.87,-0.009793
2,1929,21.45,0.97,0.0303,-0.112428,0.338458,0.165576,-3.096,-2.589,0.00183
3,1930,15.34,0.98,0.0148,-0.288025,0.554745,0.131525,-2.751,-2.761,-0.060665
4,1931,8.12,0.82,0.0241,-0.563742,1.170732,-0.011032,-2.293,-2.589,-0.095711


# Section A - Regression Comparison

In [4]:
y = data['LogEqPrem']
x_all = data[['BookMarket', 'NTIS', 'DivPrice', 'EarnPrice', 'Inflation']]
x_book = data[['BookMarket']]

In [5]:
lm_all = LinearRegression()
lm_book = LinearRegression()

In [6]:
lm_all.fit(x_all, y)
lm_book.fit(x_book, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [7]:
predictions_all = lm_all.predict(x_all)
predictions_book = lm_book.predict(x_book)

In [9]:
print('All Variables:')
print(r2_score(y, predictions_all))
print('Just Book Market:')
print(r2_score(y, predictions_book))

All Variables:
0.108480428171
Just Book Market:
0.0633476320596


# Section B - Ramsey RESET Test

In [19]:
data['FittedLogEqPrem'] = np.square(predictions_book)

In [20]:
x_ramsey = data[['BookMarket', 'FittedLogEqPrem']]

In [21]:
import statsmodels
import statsmodels.api as sm

In [22]:
ramsey_model = sm.OLS(y, x_ramsey)
result = ramsey_model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:              LogEqPrem   R-squared:                       0.079
Model:                            OLS   Adj. R-squared:                  0.057
Method:                 Least Squares   F-statistic:                     3.636
Date:                Thu, 04 Jan 2018   Prob (F-statistic):             0.0305
Time:                        21:19:24   Log-Likelihood:                 19.838
No. Observations:                  87   AIC:                            -35.68
Df Residuals:                      85   BIC:                            -30.74
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
BookMarket          0.0201      0.037     

# Section C - Chow Break Stat

In [68]:
residuals_p = np.subtract(y, predictions_book )
rss_p = np.sum(np.square(residuals_p))
print(rss_p)

2.991714491142858


In [58]:
data_1 = data[data['Year'] < 1980]
data_2 = data[data['Year'] > 1979]

In [59]:
x_1 = data_1[['BookMarket']]
x_2 = data_2[['BookMarket']]
y_1 = data_1['LogEqPrem']
y_2 = data_2['LogEqPrem']

In [60]:
lm_1 = LinearRegression()
lm_2 = LinearRegression()

In [61]:
lm_1.fit(x_1, y_1)
lm_2.fit(x_2, y_2)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [62]:
residuals_1 =  y_1 - lm_1.predict(x_1) 
residuals_2 = y_2 - lm_2.predict(x_2)

In [63]:
rss_1 = np.sum(np.square(residuals_1))
rss_2 = np.sum(np.square(residuals_2))

print(rss_1)
print(rss_2)

1.9814773125345166
0.8551614618905872


In [76]:
chow_nom = (rss_p - (rss_1 + rss_2)) / 2
chow_denom = (rss_1+rss_2) / (87 - (2 *2))

In [77]:
chow = chow_nom / chow_denom

In [78]:
print(chow)

2.268756354108251
