In [31]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

In [32]:
data = pd.read_csv('TrainExer 3-1.csv')

In [69]:
data

Unnamed: 0,Year,Index,Dividends,Riskfree,LogEqPrem,BookMarket,NTIS,DivPrice,EarnPrice,Inflation,FittedLogEqPrem
0,1927,17.66,0.770,0.0317,0.280823,0.374689,0.073955,-3.133,-2.767,-0.021176,0.009329
1,1928,24.35,0.850,0.0426,0.313824,0.259667,0.042639,-3.355,-2.870,-0.009793,0.013894
2,1929,21.45,0.970,0.0303,-0.112428,0.338458,0.165576,-3.096,-2.589,0.001830,0.010669
3,1930,15.34,0.980,0.0148,-0.288025,0.554745,0.131525,-2.751,-2.761,-0.060665,0.004002
4,1931,8.12,0.820,0.0241,-0.563742,1.170732,-0.011032,-2.293,-2.589,-0.095711,0.002576
5,1932,6.89,0.500,0.0004,-0.094602,1.442084,-0.003959,-2.623,-2.822,-0.103578,0.010196
6,1933,10.10,0.440,0.0029,0.422211,0.829026,0.007437,-3.134,-3.134,0.004299,0.000156
7,1934,9.50,0.450,0.0023,-0.017260,0.773741,0.022024,-3.050,-2.965,0.020104,0.000516
8,1935,13.43,0.470,0.0015,0.379098,0.559911,0.009720,-3.353,-2.872,0.029605,0.003882
9,1936,17.18,0.720,0.0012,0.286110,0.458588,0.024279,-3.172,-2.824,0.011963,0.006570


# Section A - Regression Comparison

In [34]:
y = data['LogEqPrem']
x_all = data[['BookMarket', 'NTIS', 'DivPrice', 'EarnPrice', 'Inflation']]
x_book = data[['BookMarket']]

In [35]:
lm_all = LinearRegression()
lm_book = LinearRegression()

In [36]:
lm_all.fit(x_all, y)
lm_book.fit(x_book, y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [37]:
predictions_all = lm_all.predict(x_all)
predictions_book = lm_book.predict(x_book)

In [38]:
print('All Variables:')
print(r2_score(y, predictions_all))
print('Just Book Market:')
print(r2_score(y, predictions_book))

All Variables:
0.108480428171
Just Book Market:
0.0633476320596


# Section B - Ramsey RESET Test

In [39]:
data['FittedLogEqPrem'] = np.square(predictions_book)

In [40]:
x_ramsey = data[['BookMarket', 'FittedLogEqPrem']]

In [41]:
import statsmodels
import statsmodels.api as sm

  from pandas.core import datetools


In [42]:
ramsey_model = sm.OLS(y, x_ramsey)
result = ramsey_model.fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:              LogEqPrem   R-squared:                       0.079
Model:                            OLS   Adj. R-squared:                  0.057
Method:                 Least Squares   F-statistic:                     3.636
Date:                Fri, 05 Jan 2018   Prob (F-statistic):             0.0305
Time:                        15:39:56   Log-Likelihood:                 19.838
No. Observations:                  87   AIC:                            -35.68
Df Residuals:                      85   BIC:                            -30.74
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                      coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------
BookMarket          0.0201      0.037     

# Section C - Chow Break Stat

In [43]:
residuals_p = np.subtract(y, predictions_book )
rss_p = np.sum(np.square(residuals_p))
print(rss_p)

2.991714491142858


In [44]:
data_1 = data[data['Year'] < 1980]
data_2 = data[data['Year'] > 1979]

In [45]:
x_1 = data_1[['BookMarket']]
x_2 = data_2[['BookMarket']]
y_1 = data_1['LogEqPrem']
y_2 = data_2['LogEqPrem']

In [46]:
lm_1 = LinearRegression()
lm_2 = LinearRegression()

In [47]:
lm_1.fit(x_1, y_1)
lm_2.fit(x_2, y_2)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [48]:
residuals_1 =  y_1 - lm_1.predict(x_1) 
residuals_2 = y_2 - lm_2.predict(x_2)

In [49]:
rss_1 = np.sum(np.square(residuals_1))
rss_2 = np.sum(np.square(residuals_2))

print(rss_1)
print(rss_2)

1.9814773125345166
0.8551614618905872


In [50]:
chow_nom = (rss_p - (rss_1 + rss_2)) / 2
chow_denom = (rss_1+rss_2) / (87 - (2 *2))

In [51]:
chow = chow_nom / chow_denom

In [52]:
print(chow)

2.268756354108251


In [72]:
from chow_test import chow_test

In [73]:
test = chow_test.calculate(1980, data, 'Year', 'BookMarket', 'LogEqPrem')

TypeError: object of type 'float' has no len()

In [55]:
len(data)

87

In [62]:
def calculate(break_point, data, timeColumn, x_column, y_column):
    rss_total = find_rss(data, x_column, y_column)
    rss_1 = find_rss(data[data[timeColumn] < break_point], x_column, y_column)
    rss_2 = find_rss(data[data[timeColumn] > (break_point - 1)], x_column, y_column)
    n_1 = len(data[data[timeColumn] < break_point])
    n_2 = len(data[data[timeColumn] > (break_point - 1)])

    chow_nom = (rss_total - (rss_1 + rss_2)) / 2
    chow_denom = (rss_1+rss_2) / (n_1 + n_2 - (2 *2))
    chow = chow_nom / chow_denom
    return chow

def find_rss (data, x_column, y_column):
    x = data[[x_column]]
    y = data[y_column]

    lm = LinearRegression()
    lm.fit(x, y)
    rss = np.sum(np.square(y - lm.predict(x)))
    print(rss)
    return rss

In [64]:
test = chow_test.calculate(1980, data, 'Year', 'BookMarket', 'LogEqPrem')
print(test)

TypeError: object of type 'float' has no len()