# Fixed effect regression models

## Set-up

In [1]:
import pandas as pd
import numpy as np
import pickle
import statsmodels.formula.api as smf
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats
from sklearn.preprocessing import minmax_scale
import statsmodels.stats.api as sms
from statsmodels.compat import lzip
from statsmodels.stats.stattools import durbin_watson
import itertools
from linearmodels import PooledOLS
import statsmodels.api as sm
from statsmodels.stats.diagnostic import het_white, het_breuschpagan
import numpy.linalg as la
from scipy import stats
import numpy as np
from linearmodels import PanelOLS
from linearmodels import RandomEffects
import statsmodels.api as sm
from linearmodels.panel import compare
import csv
from io import StringIO

In [2]:
# Import final dataframe
data = pd.read_pickle('../Data/Variable dataframes/company_details_pivoted.pkl')

## Time fixed effects with clustered errors

### Data set-up

In [3]:
# Set multi-index (needed for fixed effects models) and set dtypes
data['Year_num'] = pd.to_numeric(data['Year'], errors='coerce')
mi_data_firm = data.set_index(["Exchange-Ticker", "Year_num"])
for col in ['Controversies_all_bool', 'Sensitive', 
            'sustainability_comp_incentives', 'global_compact']:
    mi_data_firm[col] = pd.to_numeric(mi_data_firm[col], errors='coerce')

# Get exog variable options
exog_vars_main = ['Sensitive', 'ESG_controversies_numeric',
             'board_gender_diversity', 
             'sustainability_comp_incentives']
exog_vars_w_control = ['Sensitive', 'ESG_controversies_numeric',
                       'board_gender_diversity', 
                       'sustainability_comp_incentives', 
                       'log_total_revenue', 'trimmed_ROA', 
                       'trimmed_liabilities_to_assets', 
                       'trimmed_book_val_per_share']
exog_vars_lagged = ['Sensitive', 
                    'ESG_controversies_numeric_lag1',
                    'board_gender_diversity_lag1', 
                    'sustainability_comp_incentives_lag1', 
                    'log_total_revenue', 'trimmed_ROA', 
                       'trimmed_liabilities_to_assets', 
                       'trimmed_book_val_per_share']

# Get endog variables from original predictions
endog_options = ['Market_to_process_log','Env_market_to_process_log',
                  'Soc_market_to_process_log']

# Get endog variables from alternate predictions
endog_options_alt = ['Market_to_process_log_alt','Env_market_to_process_log_alt',
                  'Soc_market_to_process_log_alt']

# Set up industry and geography dummy variables
mi_data_firm = pd.get_dummies(mi_data_firm, columns=['main_industry','geo_region'], drop_first=True)
mi_data_firm.rename(columns={'main_industry_Consumer Durables and Apparel (Primary)':'ind_dur_app',
                             'main_industry_Consumer Services (Primary)':'ind_serv',
                             'main_industry_Food and Staples Retailing (Primary)':'ind_food_ret',
                             'main_industry_Food, Beverage and Tobacco (Primary)':'ind_food_bev',
                             'main_industry_Household and Personal Products (Primary)':'ind_house_pers',
                             'main_industry_Retailing (Primary)':'ind_ret',
                             'geo_region_Europe':'europe',
                             'geo_region_Latin America and Caribbean':'lat_am',
                             'geo_region_United States and Canada':'us_can'}, inplace=True)
ind_geo_dummy_cols = ['ind_dur_app','ind_serv','ind_food_ret','ind_food_bev','ind_house_pers',
              'ind_ret','europe','lat_am','us_can']
ind_dummies = ['ind_dur_app','ind_serv','ind_food_ret','ind_food_bev','ind_house_pers',
              'ind_ret']
geo_dummies = ['europe','lat_am','us_can']

### Function set-up

In [4]:
def compare_models(endog_cols, filename, model_names):
    model_dict = dict()
    for endog_col, endog_name in zip(endog_cols, model_names):
        # MAIN
        mi_data_firm_no_na = mi_data_firm[exog_vars_main + ind_dummies + endog_cols].dropna()

        # Regression dummies and year fixed effects
        exog = sm.add_constant(mi_data_firm_no_na[exog_vars_main + ind_dummies])
        FE = PanelOLS(mi_data_firm_no_na[endog_col], exog, 
                      time_effects=True, entity_effects=False)
        # Result
        main_res = FE.fit(cov_type = 'clustered',
                     cluster_entity=True,
                     cluster_time=True
                     )
        model_dict[f'main {endog_name}'] = main_res

        # MAIN W CONTROLS
        mi_data_firm_no_na = mi_data_firm[exog_vars_w_control + ind_geo_dummy_cols + endog_cols].dropna()

        # Regression dummies and year fixed effects
        exog = sm.add_constant(mi_data_firm_no_na[exog_vars_w_control + ind_geo_dummy_cols])
        FE = PanelOLS(mi_data_firm_no_na[endog_col], exog, 
                      time_effects=True, entity_effects=False)

        # Result
        main_control_res = FE.fit(cov_type = 'clustered',
                     cluster_entity=True,
                     cluster_time=True
                     )
        model_dict[f'main with controls {endog_name}'] = main_control_res

        # LAGGED
        mi_data_firm_no_na = mi_data_firm[exog_vars_lagged + ind_geo_dummy_cols + endog_cols].dropna()

        # Regression dummies and year fixed effects
        exog = sm.add_constant(mi_data_firm_no_na[exog_vars_lagged + ind_geo_dummy_cols])
        FE = PanelOLS(mi_data_firm_no_na[endog_col], exog, 
                      time_effects=True, entity_effects=False)

        # Result
        lagged_res = FE.fit(cov_type = 'clustered',
                     cluster_entity=True,
                     cluster_time=True
                     )
        model_dict[f'lagged {endog_name}'] = lagged_res

    comparison = compare(model_dict, 
                  stars=True, precision='std_errors')
    
    #print(comparison)
    comparison_df = pd.read_csv(StringIO(comparison.summary.as_csv()), 
                     skiprows=1, skipfooter=1, engine='python')
    comparison_df.to_excel(f'../Data/{filename}_model_res.xlsx')

### Models using original predictions 

In [5]:
compare_models(endog_cols=endog_options, 
               filename='fixed_effects_models',
               model_names=endog_options)

### Models using alternate predictions 

In [6]:
compare_models(endog_cols=endog_options_alt, 
               filename='fixed_effects_models_alternate',
               model_names=endog_options_alt)