In [1]:
# Import libraries
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.metrics import r2_score

In [2]:
# Change the number of rows and columns to display
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_columns', 20)
pd.set_option('display.max_colwidth', 50)
pd.set_option('display.precision', 4)

In [3]:
# Define a path for import and export
path = '/Users/admin/fin427/module3/'

In [4]:
# Import data
returns1 = pd.read_excel(path + 'Excel03 Data 20230128.xlsx', sheet_name='ret06')

In [5]:
print(returns1.head())
print(returns1.columns)

   PERMNO       DATE     CUSIP                        COMNAM TICKER  PERMCO  \
0   50906 1995-01-31  00088630  A D C TELECOMMUNICATIONS INC   ADCT    2902   
1   50906 1995-02-28  00088630  A D C TELECOMMUNICATIONS INC   ADCT    2902   
2   50906 1995-03-31  00088630  A D C TELECOMMUNICATIONS INC   ADCT    2902   
3   50906 1995-04-28  00088630  A D C TELECOMMUNICATIONS INC   ADCT    2902   
4   50906 1995-05-31  00088630  A D C TELECOMMUNICATIONS INC   ADCT    2902   

   SHRCD    VOL     RET    RETX  ...  d_4801_Banks  d_4802_Finance_NEC  \
0     11  34941 -0.0200 -0.0200  ...             0                   0   
1     11  60361  0.1378  0.1378  ...             0                   0   
2     11  87371  0.0583  0.0583  ...             0                   0   
3     11  78824  0.1186  0.1186  ...             0                   0   
4     11  95578 -0.0606 -0.0606  ...             0                   0   

   d_4803_Insurance  d_4885_Real_Estate_Dev d_4890_REIT  \
0                 0  

In [6]:
# Regression using statsmodels
# Adjusted abnormal stock returns regressed on LN mkt cap and industry indicator variables
# We will not add a constant because we have an industry indicator variable for every observation
# I have left in the add constant code, but it is swtiched off
y = returns1['abretadj']
x = returns1[['lag1lnmc', 
    'd_1100_Non_Energy_Minerals',
    'd_1200_Producer_Manufacturing',
    'd_1300_Electronic_Technology',
    'd_1400_Consumer_Durables', 
    'd_2100_Energy_Minerals',
    'd_2200_Process_Industries',
    'd_2300_Health_Technology',
    'd_2400_Consumer_Non_Durables',
    'd_3100_Industrial_Services',
    'd_3200_Commercial_Services',
    'd_3250_Distribution_Services',
    'd_3300_Technology_Services',
    'd_3350_Health_Services',
    'd_3400_Consumer_Services', 
    'd_3500_Retail_Trade',
    'd_4600_Transportation',
    'd_4700_Utilities',
    'd_4801_Banks',
    'd_4802_Finance_NEC',
    'd_4803_Insurance',
    'd_4885_Real_Estate_Dev',
    'd_4890_REIT',
    'd_4900_Communications',
    'd_6000_Miscellaneous']]
# x = sm.add_constant(x)
model = sm.OLS(y, x).fit()
# To generate clustered standard errors use the line below
# model = sm.OLS(y, x).fit(cov_type='cluster', cov_kwds={'groups': returns1['CUSIP']})
predictions = model.predict(x)
print_model = model.summary()
b_coef = model.params
b_err  = model.bse

influence = model.get_influence()
cooks_d = influence.cooks_distance[0]
analysis = pd.DataFrame({'predictions': predictions, 'cooks_d': cooks_d})

print(print_model)
print(f'R-squared: {model.rsquared:.4f}')
print(b_coef)
print(b_err)
print(analysis)

                            OLS Regression Results                            
Dep. Variable:               abretadj   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     45.21
Date:                Mon, 30 Jan 2023   Prob (F-statistic):          1.84e-213
Time:                        14:42:52   Log-Likelihood:             2.3087e+05
No. Observations:              309269   AIC:                        -4.617e+05
Df Residuals:                  309244   BIC:                        -4.614e+05
Df Model:                          24                                         
Covariance Type:            nonrobust                                         
                                    coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------
lag1lnmc      

In [7]:
# Regression using statsmodels
# Unadjusted abnormal stock returns regressed on LN mkt cap and industry indicator variables
# We will not add a constant because we have an industry indicator variable for every observation
# I have left in the add constant code, but it is swtiched off
y1 = returns1['abret']
x1 = returns1[['lag1lnmc', 
    'd_1100_Non_Energy_Minerals',
    'd_1200_Producer_Manufacturing',
    'd_1300_Electronic_Technology',
    'd_1400_Consumer_Durables', 
    'd_2100_Energy_Minerals',
    'd_2200_Process_Industries',
    'd_2300_Health_Technology',
    'd_2400_Consumer_Non_Durables',
    'd_3100_Industrial_Services',
    'd_3200_Commercial_Services',
    'd_3250_Distribution_Services',
    'd_3300_Technology_Services',
    'd_3350_Health_Services',
    'd_3400_Consumer_Services', 
    'd_3500_Retail_Trade',
    'd_4600_Transportation',
    'd_4700_Utilities',
    'd_4801_Banks',
    'd_4802_Finance_NEC',
    'd_4803_Insurance',
    'd_4885_Real_Estate_Dev',
    'd_4890_REIT',
    'd_4900_Communications',
    'd_6000_Miscellaneous']]
# x = sm.add_constant(x)
model1 = sm.OLS(y1, x1).fit()
# To generate clustered standard errors use the line below
# model = sm.OLS(y, x).fit(cov_type='cluster', cov_kwds={'groups': returns1['CUSIP']})
predictions1 = model1.predict(x1)
print_model1 = model1.summary()
b_coef1 = model1.params
b_err1  = model1.bse

influence1 = model1.get_influence()
cooks_d1 = influence1.cooks_distance[0]
analysis1 = pd.DataFrame({'predictions': predictions1, 'cooks_d': cooks_d1})

print(print_model1)
print(f'R-squared: {model1.rsquared:.4f}')
print(b_coef1)
print(b_err1)
print(analysis1)

                            OLS Regression Results                            
Dep. Variable:                  abret   R-squared:                       0.004
Model:                            OLS   Adj. R-squared:                  0.004
Method:                 Least Squares   F-statistic:                     47.28
Date:                Mon, 30 Jan 2023   Prob (F-statistic):          5.10e-224
Time:                        14:42:52   Log-Likelihood:             2.0919e+05
No. Observations:              309269   AIC:                        -4.183e+05
Df Residuals:                  309244   BIC:                        -4.181e+05
Df Model:                          24                                         
Covariance Type:            nonrobust                                         
                                    coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------
lag1lnmc      

In [8]:
# Regression using statsmodels
# Adjusted abnormal stock returns regressed on industry indicator variables
# We will not add a constant because we have an industry indicator variable for every observation
# I have left in the add constant code, but it is swtiched off
y2 = returns1['abretadj']
x2 = returns1[[
    'd_1100_Non_Energy_Minerals',
    'd_1200_Producer_Manufacturing',
    'd_1300_Electronic_Technology',
    'd_1400_Consumer_Durables', 
    'd_2100_Energy_Minerals',
    'd_2200_Process_Industries',
    'd_2300_Health_Technology',
    'd_2400_Consumer_Non_Durables',
    'd_3100_Industrial_Services',
    'd_3200_Commercial_Services',
    'd_3250_Distribution_Services',
    'd_3300_Technology_Services',
    'd_3350_Health_Services',
    'd_3400_Consumer_Services', 
    'd_3500_Retail_Trade',
    'd_4600_Transportation',
    'd_4700_Utilities',
    'd_4801_Banks',
    'd_4802_Finance_NEC',
    'd_4803_Insurance',
    'd_4885_Real_Estate_Dev',
    'd_4890_REIT',
    'd_4900_Communications',
    'd_6000_Miscellaneous']]
# x = sm.add_constant(x)
model2 = sm.OLS(y2, x2).fit()
# To generate clustered standard errors use the line below
# model = sm.OLS(y, x).fit(cov_type='cluster', cov_kwds={'groups': returns1['CUSIP']})
predictions2 = model2.predict(x2)
print_model2 = model2.summary()
b_coef2 = model2.params
b_err2  = model2.bse

influence2 = model2.get_influence()
cooks_d2 = influence2.cooks_distance[0]
analysis2 = pd.DataFrame({'predictions': predictions2, 'cooks_d': cooks_d2})

print(print_model2)
print(f'R-squared: {model2.rsquared:.4f}')
print(b_coef2)
print(b_err2)
print(analysis2)

                            OLS Regression Results                            
Dep. Variable:               abretadj   R-squared:                       0.001
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     8.197
Date:                Mon, 30 Jan 2023   Prob (F-statistic):           5.96e-28
Time:                        14:42:53   Log-Likelihood:             2.3042e+05
No. Observations:              309269   AIC:                        -4.608e+05
Df Residuals:                  309245   BIC:                        -4.605e+05
Df Model:                          23                                         
Covariance Type:            nonrobust                                         
                                    coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------------------
d_1100_Non_Ene