In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")
%matplotlib inline
import pandas as pd
import seaborn as sns
sns.set(style="white",color_codes=True)
plt.rcParams['figure.figsize'] = (15,9.27)
# Set the font set of the latex code to computer modern
matplotlib.rcParams['mathtext.fontset'] = "cm"

In [2]:
def reg(y,name_list,*args):
    import statsmodels.api as sm
    x = np.vstack((args)).T
    mat_x = sm.add_constant(x)
    res = sm.OLS(y,mat_x).fit()
    print(res.summary(xname=['const']+name_list))

In [46]:
mul = pd.read_excel('data/mul.xlsx')
mul.set_index(['date','id'],inplace=True)
np.random.seed(123)
mul['z'] = 2*mul.x + 3*mul.y + np.random.randn(48)

In [47]:
mul

Unnamed: 0_level_0,Unnamed: 1_level_0,y,x,z
date,id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-01-01,1,1.22,8.39,19.354369
2012-01-01,2,4.07,6.76,26.727345
2012-01-01,3,6.95,0.75,22.632978
2012-01-01,4,6.38,2.54,22.713705
2012-02-01,1,0.35,2.48,5.4314
2012-02-01,2,4.11,4.9,23.781437
2012-02-01,3,2.95,1.16,8.743321
2012-02-01,4,7.83,1.85,26.761087
2012-03-01,1,7.06,8.32,39.085936
2012-03-01,2,0.08,6.1,11.57326


In [24]:
mul.loc[(slice(None),1),:]['y']

date        id
2012-01-01  1     0.1
2012-02-01  1     0.2
2012-03-01  1     0.4
Name: y, dtype: float64

In [48]:
z = mul.xs((slice(None),1))['z']
y = mul.xs((slice(None),1))['y']
x = mul.xs((slice(None),1))['x']

In [49]:
reg(z,['y','x'],y,x)

                            OLS Regression Results                            
Dep. Variable:                      z   R-squared:                       0.986
Model:                            OLS   Adj. R-squared:                  0.983
Method:                 Least Squares   F-statistic:                     315.2
Date:                Sun, 30 Sep 2018   Prob (F-statistic):           4.66e-09
Time:                        10:54:12   Log-Likelihood:                -18.715
No. Observations:                  12   AIC:                             43.43
Df Residuals:                       9   BIC:                             44.89
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.0798      1.127      0.071      0.9

In [108]:
import statsmodels.api as sm

def first_reg(df,y,xx):
    indexs = df.index.levels[1].unique()
    col = ['const'] + ['beta'+str(i) for i in range(1,len(xx)+1)]
    betas_df = pd.DataFrame()
    for id in indexs:
        dep = df.xs((slice(None),id))[y]
        indep = sm.add_constant(df.xs((slice(None),id))[xx])
        beta = sm.OLS(dep,indep).fit().params
        betas_df[id] = beta
    new = betas_df.transpose()
    new.columns = col
    return new

In [125]:
betas = first_reg(mul,'z',['y','x'])

In [1]:
def second_reg(df,betas,y):
    betas = sm.add_constant(betas.drop('const',axis=1))
    indexs = df.index.levels[0]
    col = ['const'] + ['premium'+str(i) for i in range(1,betas.shape[1])]
    premiums_df = pd.DataFrame()
    for id in indexs:
        dep = df.xs(id)[y]
        indep = sm.add_constant(betas.drop('const', axis=1))
        premium = sm.OLS(dep,indep).fit().params
        premiums_df[id] = premium
    new = premiums_df.transpose()
    new.columns = col
    return new        

In [2]:
second_reg(mul,betas,'z')

NameError: name 'mul' is not defined

In [140]:
dep = mul.xs('2012-1-1')['z']
b1 = betas['beta1']
b2 = betas['beta2']
reg(dep,['beta1','beta2'],b1,b2)

                            OLS Regression Results                            
Dep. Variable:                      z   R-squared:                       0.459
Model:                            OLS   Adj. R-squared:                 -0.624
Method:                 Least Squares   F-statistic:                    0.4236
Date:                Sun, 30 Sep 2018   Prob (F-statistic):              0.736
Time:                        12:31:24   Log-Likelihood:                -8.2910
No. Observations:                   4   AIC:                             22.58
Df Residuals:                       1   BIC:                             20.74
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         25.4472    101.854      0.250      0.8

In [189]:
import statsmodels.api as sm

class FamaMacbethReg:
    
    def __init__(self,df,y,xx):
        self.df = df
        self.y = y
        self.xx = xx
    
    def first_reg(self):
        indexs = self.df.index.levels[1].unique()
        col = ['const'] + ['beta'+str(i) for i in range(1,len(self.xx)+1)]
        betas_df = pd.DataFrame()
        for id in indexs:
            dep = self.df.xs((slice(None),id))[self.y]
            indep = sm.add_constant(self.df.xs((slice(None),id))[self.xx])
            beta = sm.OLS(dep,indep).fit().params
            betas_df[id] = beta
        new = betas_df.transpose()
        new.columns = col
        return new

    def second_reg(self):
        betas = self.first_reg()
        indexs = self.df.index.levels[0]
        col = ['const'] + ['premium'+str(i) for i in range(1,betas.shape[1])]
        premiums_df = pd.DataFrame()
        for id in indexs:
            dep = self.df.xs(id)[self.y]
            indep = sm.add_constant(betas.drop('const', axis=1))
            premium = sm.OLS(dep,indep).fit().params
            premiums_df[id] = premium
        new = premiums_df.transpose()
        new.columns = col
        return new  

In [190]:
res = FamaMacbethReg(df=mul,y='z',xx=['y','x'])

In [219]:
res.second_reg()

Unnamed: 0,const,premium1,premium2
2012-01-01,25.447156,-9.394269,12.768732
2012-02-01,-197.760644,-3.306648,109.643143
2012-03-01,-13.141898,74.853365,-90.835553
2012-04-01,272.401225,-69.791068,-16.801975
2012-05-01,413.677159,-79.82892,-74.795251
2012-06-01,-132.500554,-17.466123,103.177548
2012-07-01,543.945506,-56.149953,-169.867097
2012-08-01,308.921723,-38.653815,-85.32534
2012-09-01,-176.486395,28.281984,56.900306
2012-10-01,181.563223,-33.739497,-26.915027


In [229]:
import statsmodels.api as sm

class FamaMacbethRegression:
    
    def __init__(self,data,Y,X):
        self.data = data
        self.Y = Y
        self.X = X
    
    def first_reg(self):
        assets = self.data.index.levels[1].unique()
        Y = [self.data.xs(asset, level=1)[self.Y] for asset in assets] 
        X = [sm.add_constant(self.data.xs(asset, level=1)[self.X]) for asset in assets]
        reg_results = [sm.OLS(y, x).fit().params for y, x in zip(Y, X) if not(x.empty or y.empty)]
        indices = [asset for y, x, asset in zip(Y, X, assets) if not(x.empty or y.empty)]
        col = ['const'] + ['beta_'+ x for x in self.X]
        betas = pd.DataFrame(reg_results, index=indices)
        betas.columns = col
        return betas
    
    def second_reg(self):
        betas = self.first_reg()
        times = self.data.index.levels[0].unique()
        Y = [self.data.xs(time)[self.Y] for time in times] 
        X = [sm.add_constant(betas.drop('const',axis=1))]*len(times)
        reg_results = [sm.OLS(y, x).fit().params for y, x in zip(Y, X) if not(x.empty or y.empty)]
        indices = [time for y, x, time in zip(Y, X, times) if not(x.empty or y.empty)]
        col = ['const'] + ['premium_'+ x for x in self.X]
        premiums = pd.DataFrame(reg_results, index=indices)
        premiums.columns = col
        return premiums

In [230]:
aaa = FamaMacbethRegression(mul,'z',['y','x'])

In [231]:
aaa.first_reg()

Unnamed: 0,const,beta_y,beta_x
1,0.079778,3.051775,1.960437
2,0.187667,2.892485,2.097472
3,-0.817442,3.193811,1.966909
4,-1.138988,3.073865,2.148427


In [232]:
aaa.second_reg()

Unnamed: 0,const,premium_y,premium_x
2012-01-01,25.447156,-9.394269,12.768732
2012-02-01,-197.760644,-3.306648,109.643143
2012-03-01,-13.141898,74.853365,-90.835553
2012-04-01,272.401225,-69.791068,-16.801975
2012-05-01,413.677159,-79.82892,-74.795251
2012-06-01,-132.500554,-17.466123,103.177548
2012-07-01,543.945506,-56.149953,-169.867097
2012-08-01,308.921723,-38.653815,-85.32534
2012-09-01,-176.486395,28.281984,56.900306
2012-10-01,181.563223,-33.739497,-26.915027
