In [1]:
import numpy as np
import pandas as pd


from fixedeffect.iv import iv2sls, ivgmm, ivtest
from fixedeffect.fe import fixedeffect, did, getfe
from fixedeffect.utils.panel_dgp import gen_data

### Testing IV regression with fixed effect
- generate random data and test

In [2]:
N = 100
T = 10
beta = [-3,1,2,3,4]
ate = 1
exp_date = 5
df = gen_data(N, T, beta, ate, exp_date)

In [3]:
df.head(3)

Unnamed: 0,x_0,x_1,x_2,x_3,x_4,xb,id,time,c_i,a_t,error,post,treatment,y
0,1.0,1.710837,1.414415,-0.063661,-1.579931,-4.97104,1.0,1.0,-1.532921,-0.000417,1.764052,0,1,-4.740326
1,1.0,-2.832012,-1.083427,-0.13062,1.400689,-2.78797,1.0,2.0,-1.532921,0.374155,0.400157,0,1,-3.546579
2,1.0,-0.651656,0.504815,1.303181,0.128536,1.781663,1.0,3.0,-1.532921,0.409552,0.978738,0,1,1.637032


#### GMM regression

In [4]:
exog_x = ['x_1']
endog_x = ['x_2']
iv = ['x_3','x_4']
y = ['y']

model_ivgmm = ivgmm(data_df = df,
                      dependent = y,
                      exog_x = exog_x,
                      endog_x = endog_x,
                      category = ['id','time'],
                      iv = iv)

result_gmm = model_ivgmm.fit()

In [12]:
print(dir(result_gmm)) #know the attributes of result

['Covariance_Type', '_OLSFixed__init', '__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__str__', '__subclasshook__', '__weakref__', 'bse', 'category_input', 'cluster', 'cluster_method', 'conf_int', 'data_df', 'demeaned_df', 'dependent', 'df', 'endog_x', 'exog_x', 'f_df_full', 'f_df_proj', 'f_pvalue', 'f_stat_first_stage', 'f_stat_first_stage_pval', 'full_f_pvalue', 'full_fvalue', 'full_rsquared', 'full_rsquared_adj', 'fvalue', 'iv', 'model', 'nobs', 'orignal_exog_x', 'params', 'pvalues', 'resid', 'resid_std_err', 'rsquared', 'rsquared_adj', 'summary', 'to_excel', 'treatment_input', 'tvalues', 'variance_matrix', 'x_second_stage', 'xname', 'yname']


In [9]:
print("Coefficients:\n", result_iv.params)
print("\nStandard Errors:\n", result_iv.bse)
print("\nT-Stats:\n", result_iv.tvalues)
print("\nP-Values:\n", result_iv.pvalues)
print("\nR-squared:\n", result_iv.rsquared)

Coefficients:
 const      -1.404416
x_1        -1.522474
hat_x_2    97.958438
dtype: float64

Standard Errors:
 const      0.102075
x_1        0.120269
hat_x_2    2.256885
dtype: float64

T-Stats:
 const     -13.758714
x_1       -12.658905
hat_x_2    43.404272
dtype: float64

P-Values:
 const       3.440192e-39
x_1         6.906593e-34
hat_x_2    8.156502e-222
dtype: float64

R-squared:
 -286.96428071586564


#### 2SLS regression

In [5]:
exog_x = ['x_1']
endog_x = ['x_2']
iv = ['x_3','x_4']
y = ['y']
category = ['id', 'time']


model_iv2sls = iv2sls(data_df = df,
                      dependent = y,
                      exog_x = exog_x,
                      endog_x = endog_x,
                      category = category,
                      iv = iv)

result_iv = model_iv2sls.fit()



In [8]:
print("Coefficients:\n", result_iv.params)
print("\nStandard Errors:\n", result_iv.bse)
print("\nT-Stats:\n", result_iv.tvalues)
print("\nP-Values:\n", result_iv.pvalues)
print("\nR-squared:\n", result_iv.rsquared)

Coefficients:
 const      -1.404416
x_1        -1.522474
hat_x_2    97.958438
dtype: float64

Standard Errors:
 const      0.102075
x_1        0.120269
hat_x_2    2.256885
dtype: float64

T-Stats:
 const     -13.758714
x_1       -12.658905
hat_x_2    43.404272
dtype: float64

P-Values:
 const       3.440192e-39
x_1         6.906593e-34
hat_x_2    8.156502e-222
dtype: float64

R-squared:
 -286.96428071586564


### Apply to index_ownership regression data

#### Sample Testing for df_regresison_iv
- choose one dependent var - [bas]

In [10]:
df_regression_iv = pd.read_csv("df_regression_iv.csv")

In [11]:
df_regression_iv['const'] = 1.0

#### collinrarity, generate lagged value for mktcap and price for control

In [29]:
df_regression_iv['mktcap_lag1'] = df_regression_iv.groupby('cusip')['mktcap'].shift(1)
df_regression_iv['price_ind_lag1'] = df_regression_iv.groupby('cusip')['price_ind'].shift(1)
df_regression_iv['volume_lag1'] = df_regression_iv.groupby('cusip')['volume'].shift(1)
df_regression_iv['ins_own_lag1'] = df_regression_iv.groupby('cusip')['ins_own'].shift(1)

In [32]:
df_regression_iv['log_mktcap_lag1'] = np.log(df_regression_iv['mktcap_lag1'].replace(0, np.nan))

In [38]:
df_regression_iv.head(2)

Unnamed: 0,cusip,date,bas,tno,mktcap,price_ind,volume,illiq,volatility,synch,...,auto_lag3,auto_lag4,assigned_to_R2000,const,hat_ind_own,mktcap_lag1,log_mktcap_lag1,price_ind_lag1,volume_lag1,ins_own_lag1
0,30710,2014-12-31,0.084839,332.131683,178.587706,0.039206,7.001668,1.135563,0.277043,-3.648,...,0.203,0.078,0,1.0,0.003597,0.0,0.0,0.0,0.0,0.0
1,30710,2015-03-31,0.051639,443.806176,274.286735,0.03411,9.355878,1.095236,0.317542,-1.841,...,0.025,0.305,0,1.0,0.030658,178.587706,5.18508,0.039206,7.001668,0.234046


In [40]:
df_regression_iv['date'] = pd.to_datetime(df_regression_iv['date'])  # ensure datetime format

df_regression_iv['quarter'] = df_regression_iv['date'].dt.to_period('Q')  # gives '2015Q1', '2015Q2', etc.

In [41]:
df_iv_reg = df_regression_iv.drop(columns=['mktcap', 'price_ind','volume']).copy()

In [42]:
df_iv_reg.shape

(290302, 23)

In [35]:
# Define independent variables
# market_quality_vars = ['bas', 'tno', 'illiq', 'volatility', 'synch', 'auto_lag1', 'auto_lag2', 'auto_lag3', 'auto_lag4']
Y_test = ['bas']

# Define X vars
X_endog = ['ind_own']
X_exog = ['log_mktcap_lag1', 'price_ind_lag1', 'volume_lag1', 'ins_own_lag1'] #control vars are exogenous

# Define Instruments
Z_instr = ['assigned_to_R2000']

In [22]:
category_ind = ['date']

In [43]:
model_iv2sls_ind_test = iv2sls(data_df = df_iv_reg,
                      dependent = Y_test,
                      exog_x = X_exog,
                      endog_x = X_endog,
                      category = category_ind,
                      iv = Z_instr)

result_iv_ind_test = model_iv2sls_ind_test.fit()



In [37]:
print("Coefficients:\n", result_iv_ind_test.params)
print("\nStandard Errors:\n", result_iv_ind_test.bse)
print("\nT-Stats:\n", result_iv_ind_test.tvalues)
print("\nP-Values:\n", result_iv_ind_test.pvalues)
print("\nR-squared:\n", result_iv_ind_test.rsquared)

Coefficients:
 const              0.230028
log_mktcap_lag1   -0.007226
price_ind_lag1    -0.230335
volume_lag1       -0.000023
ins_own_lag1      -0.006216
hat_ind_own       -1.241778
dtype: float64

Standard Errors:
 const              0.003528
log_mktcap_lag1    0.000549
price_ind_lag1     0.006932
volume_lag1        0.000003
ins_own_lag1       0.008497
hat_ind_own        0.119996
dtype: float64

T-Stats:
 const              65.205676
log_mktcap_lag1   -13.163312
price_ind_lag1    -33.225826
volume_lag1        -7.099704
ins_own_lag1       -0.731562
hat_ind_own       -10.348534
dtype: float64

P-Values:
 const               0.000000e+00
log_mktcap_lag1     1.464716e-39
price_ind_lag1     1.302604e-241
volume_lag1         1.253092e-12
ins_own_lag1        4.644367e-01
hat_ind_own         4.292380e-25
dtype: float64

R-squared:
 0.006528918333289724
