In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import wrds
from datetime import datetime
import time
import matplotlib.pyplot as plt

In [None]:
###################
# Connect to WRDS #
###################
conn=wrds.Connection()

### 1. Load Data for Regression

In [2]:
df_regression = pd.read_csv("df_main_reg.csv")

In [3]:
lag_cols = ['auto_lag1', 'auto_lag2', 'auto_lag3', 'auto_lag4']
df_regression[lag_cols] = df_regression[lag_cols].abs()

In [5]:
file_path_1 = 'df_main_reg.csv' 
df_regression.to_csv(file_path_1, index=False)

In [4]:
df_regression.head(3)

Unnamed: 0,cusip,date,bas,tno,mktcap,price_ind,volume,illiq,volatility,synch,ind_own,act_own,ins_own,auto_lag1,auto_lag2,auto_lag3,auto_lag4
0,30710,2014-12-31,0.084839,332.131683,178.587706,0.039206,7.001668,1.135563,0.277043,-3.648,0.012341,0.084358,0.234046,0.041,0.169,0.203,0.078
1,30710,2015-03-31,0.051639,443.806176,274.286735,0.03411,9.355878,1.095236,0.317542,-1.841,0.012512,0.131629,0.329042,0.223,0.12,0.025,0.305
2,30710,2015-06-30,0.04619,581.424544,465.762113,0.02719,12.664008,0.313499,0.249738,-2.326,0.017502,0.114851,0.349641,0.106,0.062,0.202,0.156


### 2. Simple OLS Regression

#### -1) Dependet Var(Y) - Market Quality

In [6]:
# Define independent variables
market_quality_vars = ['bas', 'tno', 'illiq', 'volatility', 'synch', 'auto_lag1', 'auto_lag2', 'auto_lag3', 'auto_lag4']

In [7]:
# Dependent variable
Y = df_regression[market_quality_vars]

#### -2) Independet Var(X) - Index and Control Var

In [8]:
index_var = ['ind_own']
control_vars = ['mktcap', 'price_ind', 'volume']

In [9]:
X = df_regression[index_var + control_vars]
X = sm.add_constant(X)  # Add intercept

#### -3) OLS regreesion

In [20]:
Y_1 = df_regression[['bas', 'tno']]

In [10]:
import statsmodels.api as sm

results = {}
for col in Y.columns:  # Y is a DataFrame with multiple response variables
    model = sm.OLS(Y[col], X, missing='drop').fit()
    results[col] = model
    print(f"Results for {col}:")
    print(model.summary())

Results for bas:
                            OLS Regression Results                            
Dep. Variable:                    bas   R-squared:                       0.008
Model:                            OLS   Adj. R-squared:                  0.008
Method:                 Least Squares   F-statistic:                     515.0
Date:                Wed, 11 Jun 2025   Prob (F-statistic):               0.00
Time:                        03:14:17   Log-Likelihood:            -1.4533e+05
No. Observations:              257312   AIC:                         2.907e+05
Df Residuals:                  257307   BIC:                         2.907e+05
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          0.1301      0.001   

In [11]:
summary_rows = []

for dep_var, model in results.items():
    coef_df = model.summary2().tables[1]  # This table has coef, std err, t, p
    coef_df = coef_df.reset_index().rename(columns={'index': 'variable'})
    coef_df['dependent_var'] = dep_var
    coef_df['r_squared'] = model.rsquared
    summary_rows.append(coef_df)

# Combine all into one DataFrame
summary_table = pd.concat(summary_rows, ignore_index=True)

# Optional: reorder columns for readability
summary_table = summary_table[['dependent_var', 'variable', 'Coef.', 'Std.Err.', 't', 'P>|t|', 'r_squared']]

print(summary_table)

   dependent_var   variable         Coef.      Std.Err.           t  \
0            bas      const  1.301436e-01  1.436349e-03   90.607199   
1            bas    ind_own -5.426557e-01  1.574864e-02  -34.457310   
2            bas     mktcap  4.718136e-07  4.665944e-08   10.111858   
3            bas  price_ind -8.939195e-02  3.654392e-03  -24.461510   
4            bas     volume -6.858059e-05  3.450217e-06  -19.877179   
5            tno      const  4.715155e+02  4.012331e+00  117.516590   
6            tno    ind_own  5.608990e+02  4.399261e+01   12.749845   
7            tno     mktcap -2.468761e-03  1.303396e-04  -18.940991   
8            tno  price_ind  2.142447e+02  1.020826e+01   20.987373   
9            tno     volume  1.189778e+00  9.637918e-03  123.447613   
10         illiq      const -1.681233e+01  2.685367e+00   -6.260720   
11         illiq    ind_own -5.885988e+01  2.944331e+01   -1.999092   
12         illiq     mktcap  3.849141e-04  8.723348e-05    4.412458   
13    