In [1]:
import pandas as pd
from linearmodels.panel import PanelOLS
import statsmodels.api as sm

# Load the dataset
data = pd.read_csv('data.csv')

# Rename columns using the first row, then drop the first row
new_header = data.iloc[0]  # Grab the first row for the header
new_header[0] = 'Date'  # Rename for clarity
new_header[1] = 'Entity_ID'  # Rename for clarity
data.columns = new_header  # Set the header row as the dataframe header
data = data.drop(0)  # Drop the first row

# Reset the index of the dataframe
data.reset_index(drop=True, inplace=True)

# Convert appropriate columns to numeric values where possible, including 'Date' and 'Entity_ID'
numeric_columns = data.columns.drop(['Date', 'Entity_ID'])  # Presuming 'Date' and 'Entity_ID' should not be dropped here
data[numeric_columns] = data[numeric_columns].apply(pd.to_numeric, errors='coerce')
data['Entity_ID'] = pd.to_numeric(data['Entity_ID'], errors='coerce')
data['Date'] = pd.to_numeric(data['Date'], errors='coerce')

# Set 'Entity_ID' and 'Date' as the index AFTER conversion
data = data.set_index(['Entity_ID', 'Date'])

# # Prepare the model data
# X = data[['Log Board_Size', 'PCT_IND_DIR', 'PCT_Women_On_Board', 'CEO_DUALITY', 'Log_Audit_Comm', 'BOARD_AVERAGE_TENURE', 'EQY_INST_PCT_SH_OUT']]
# y = data['TOT_DEBT_TO_TOT_EQY']  # Example dependent variable

# # Define and fit the model
# mod = PanelOLS(y, X, entity_effects=True)
# res = mod.fit(cov_type='clustered', cluster_entity=True)

# # Display the results
# print(res.summary)

from linearmodels.panel import PanelOLS

# Define dependent and independent variables
X = data[['Log Board_Size', 'PCT_IND_DIR', 'PCT_Women_On_Board', 'CEO_DUALITY', 'Log_Audit_Comm', 'BOARD_AVERAGE_TENURE', 'EQY_INST_PCT_SH_OUT']]
y = data['TOT_DEBT_TO_TOT_EQY']  # Example dependent variable
X = sm.add_constant(X)  # Adds a constant term to the predictors

model = PanelOLS(y, X, entity_effects=True)
results = model.fit(cov_type='robust')

print(results)

                           PanelOLS Estimation Summary                           
Dep. Variable:     TOT_DEBT_TO_TOT_EQY   R-squared:                        0.0136
Estimator:                    PanelOLS   R-squared (Between):              0.0038
No. Observations:                  800   R-squared (Within):               0.0136
Date:                 Tue, Mar 26 2024   R-squared (Overall):              0.0065
Time:                         16:30:15   Log-likelihood                   -820.24
Cov. Estimator:                 Robust                                           
                                         F-statistic:                      1.3671
Entities:                          100   P-value                           0.2163
Avg Obs:                        8.0000   Distribution:                   F(7,693)
Min Obs:                        8.0000                                           
Max Obs:                        8.0000   F-statistic (robust):             1.8194
                