# Experiment - Loughran & McDonald Difference-in-difference

In [1]:
NAME = '09-04_experiment_lm_did'
PROJECT = 'conference-calls-sentiment'
PYTHON_VERSION = '3.7.0'

### Imports  

In [2]:
import os
import re
import numpy as np
import pandas as pd

# Regressions
from linearmodels import PanelOLS
import statsmodels.api as sm

# Hide annoying warnings
import warnings
warnings.filterwarnings('ignore')

### Settings

In [3]:
workdir = re.sub("(?<={})[\w\W]*".format(PROJECT), "", os.getcwd())
os.chdir(workdir)

pipeline = os.path.join('2_pipeline', NAME)
if not os.path.exists(pipeline):
    os.makedirs(pipeline)
    for folder in ['out', 'store', 'tmp']:
        os.makedirs(os.path.join(pipeline, folder))

--- 
# Main code

# Preprocess Transcripts

In [None]:
controls = pd.read_feather(os.path.join('2_pipeline', '07-01_dataset_control_variables', 'out', 'control_variables.feather'))
controls.head()

In [None]:
LM_PUBLICATION = '2011-01-06'  # Date of Loughran & McDonald (2011) Publication
tone_by_role = pd.read_feather(os.path.join('2_pipeline', '04-01_tone_lm', 'out', 'lm_tone_by_role.feather'))

tone_did = (tone_by_role
            .assign(year=lambda x: x['event_date'].dt.year,
                    quarter=lambda x: x['event_date'].dt.to_period('Q'),
                    treated=lambda x: np.where(x['speaker_role'] == 'Management', 1, 0),
                    post=lambda x: (x['event_date'] >= LM_PUBLICATION).astype('int'),
                    treated_post=lambda x: x['post'] * x['treated'])
            .merge(controls, on=['gvkey', 'quarter'])
            .assign(quarter=lambda x: x['quarter'].astype('str'))  # Stata cannot handle the pandas.Period dtype
            .dropna()
            .set_index(['gvkey', 'year']))

tone_did

In [32]:
tone_did['treated'].value_counts()

0    22293
1    22289
Name: treated, dtype: int64

In [33]:
# Save
tone_did.to_stata(os.path.join(pipeline, 'out', 'tone_did.dta'))

## Diffrence-in-difference

In [34]:
controls = ['surprise', 'size', 'roa', 'leverage', 'loss', 'capex']

In [35]:
def ols_model(df, dependent, exog, ffe, tfe):
    dependent = df[dependent]
    exog = sm.add_constant(df[exog])
    model = PanelOLS(dependent, exog, entity_effects=ffe, time_effects=tfe)
    return model.fit(cov_type='clustered', cluster_entity=True)

### Negative Words

In [37]:
ols_model(df=tone_did, 
          dependent='lm_negative',
          exog=['treated', 'post', 'treated_post' ,'num_words'],
          ffe=False,
          tfe=False)

0,1,2,3
Dep. Variable:,lm_negative,R-squared:,0.5858
Estimator:,PanelOLS,R-squared (Between):,0.6215
No. Observations:,44582,R-squared (Within):,0.5690
Date:,"Thu, May 13 2021",R-squared (Overall):,0.5858
Time:,20:08:42,Log-likelihood,-1.602e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,1.576e+04
Entities:,601,P-value,0.0000
Avg Obs:,74.180,Distribution:,"F(4,44577)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,7.3750,0.2470,29.857,0.0000,6.8909,7.8592
treated,-7.5995,0.3093,-24.573,0.0000,-8.2057,-6.9934
post,-2.8507,0.1856,-15.363,0.0000,-3.2144,-2.4870
treated_post,-0.6473,0.2933,-2.2067,0.0273,-1.2222,-0.0723
num_words,0.0090,0.0002,54.303,0.0000,0.0087,0.0093


In [11]:
ols_model(df=tone_did, 
          dependent='lm_negative',
          exog=['treated', 'post', 'treated_post', 'num_words'] + controls,
          ffe=False,
          tfe=False)

0,1,2,3
Dep. Variable:,lm_negative,R-squared:,0.5904
Estimator:,PanelOLS,R-squared (Between):,0.6374
No. Observations:,44582,R-squared (Within):,0.5713
Date:,"Thu, May 13 2021",R-squared (Overall):,0.5904
Time:,20:02:05,Log-likelihood,-1.599e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,6423.4
Entities:,601,P-value,0.0000
Avg Obs:,74.180,Distribution:,"F(10,44571)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,9.6578,1.5922,6.0656,0.0000,6.5370,12.779
treated,-7.5696,0.3069,-24.668,0.0000,-8.1711,-6.9682
post,-2.7532,0.1996,-13.794,0.0000,-3.1445,-2.3620
treated_post,-0.6286,0.2913,-2.1575,0.0310,-1.1996,-0.0575
num_words,0.0090,0.0002,55.593,0.0000,0.0087,0.0093
surprise,-0.0056,0.0061,-0.9286,0.3531,-0.0175,0.0063
size,-0.1733,0.1668,-1.0387,0.2989,-0.5003,0.1537
roa,-0.1735,0.0489,-3.5462,0.0004,-0.2694,-0.0776
leverage,-0.1000,0.9225,-0.1084,0.9137,-1.9082,1.7082


In [12]:
ols_model(df=tone_did, 
          dependent='lm_negative',
          exog=['treated', 'post', 'treated_post', 'num_words'],
          ffe=False,
          tfe=True)

0,1,2,3
Dep. Variable:,lm_negative,R-squared:,0.5853
Estimator:,PanelOLS,R-squared (Between):,0.4737
No. Observations:,44582,R-squared (Within):,0.5174
Date:,"Thu, May 13 2021",R-squared (Overall):,0.5258
Time:,20:02:05,Log-likelihood,-1.596e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,1.572e+04
Entities:,601,P-value,0.0000
Avg Obs:,74.180,Distribution:,"F(4,44561)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,3.0807,0.3207,9.6055,0.0000,2.4521,3.7093
treated,-7.4642,0.3083,-24.207,0.0000,-8.0686,-6.8599
post,4.0859,0.3205,12.751,0.0000,3.4578,4.7140
treated_post,-0.6079,0.2929,-2.0755,0.0379,-1.1820,-0.0338
num_words,0.0089,0.0002,53.886,0.0000,0.0086,0.0093


In [13]:
ols_model(df=tone_did, 
          dependent='lm_negative',
          exog=['treated', 'post', 'treated_post', 'num_words'],
          ffe=True,
          tfe=True)

0,1,2,3
Dep. Variable:,lm_negative,R-squared:,0.5703
Estimator:,PanelOLS,R-squared (Between):,0.4664
No. Observations:,44582,R-squared (Within):,0.5147
Date:,"Thu, May 13 2021",R-squared (Overall):,0.5226
Time:,20:02:06,Log-likelihood,-1.55e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,1.459e+04
Entities:,601,P-value,0.0000
Avg Obs:,74.180,Distribution:,"F(4,43961)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,3.1872,0.2921,10.912,0.0000,2.6147,3.7596
treated,-7.1971,0.2842,-25.322,0.0000,-7.7542,-6.6400
post,4.2309,0.2189,19.329,0.0000,3.8019,4.6599
treated_post,-0.5240,0.2838,-1.8465,0.0648,-1.0802,0.0322
num_words,0.0088,0.0001,65.898,0.0000,0.0085,0.0090


In [14]:
ols_model(df=tone_did, 
          dependent='lm_positive',
          exog=['treated', 'post', 'treated_post', 'num_words'],
          ffe=False,
          tfe=False)

0,1,2,3
Dep. Variable:,lm_positive,R-squared:,0.7881
Estimator:,PanelOLS,R-squared (Between):,0.6291
No. Observations:,44582,R-squared (Within):,0.8077
Date:,"Thu, May 13 2021",R-squared (Overall):,0.7881
Time:,20:02:06,Log-likelihood,-1.759e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,4.145e+04
Entities:,601,P-value,0.0000
Avg Obs:,74.180,Distribution:,"F(4,44577)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-7.2629,0.3960,-18.342,0.0000,-8.0390,-6.4867
treated,6.6313,0.4971,13.340,0.0000,5.6569,7.6056
post,1.8784,0.1705,11.016,0.0000,1.5441,2.2126
treated_post,4.1106,0.4834,8.5040,0.0000,3.1632,5.0580
num_words,0.0144,0.0003,55.446,0.0000,0.0139,0.0149


In [15]:
ols_model(df=tone_did, 
          dependent='lm_positive',
          exog=['treated', 'post', 'treated_post', 'num_words'] + controls,
          ffe=False,
          tfe=False)

0,1,2,3
Dep. Variable:,lm_positive,R-squared:,0.7903
Estimator:,PanelOLS,R-squared (Between):,0.6423
No. Observations:,44582,R-squared (Within):,0.8085
Date:,"Thu, May 13 2021",R-squared (Overall):,0.7903
Time:,20:02:07,Log-likelihood,-1.757e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,1.679e+04
Entities:,601,P-value,0.0000
Avg Obs:,74.180,Distribution:,"F(10,44571)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-14.713,2.3146,-6.3566,0.0000,-19.249,-10.176
treated,6.7453,0.4885,13.808,0.0000,5.7878,7.7028
post,1.3322,0.2152,6.1900,0.0000,0.9104,1.7540
treated_post,4.1431,0.4839,8.5622,0.0000,3.1947,5.0915
num_words,0.0143,0.0003,56.279,0.0000,0.0138,0.0148
surprise,0.0125,0.0025,5.0114,0.0000,0.0076,0.0174
size,0.7932,0.2365,3.3537,0.0008,0.3296,1.2568
roa,0.2540,0.0801,3.1699,0.0015,0.0969,0.4110
leverage,0.3515,1.2616,0.2786,0.7806,-2.1214,2.8243


In [16]:
ols_model(df=tone_did, 
          dependent='lm_positive',
          exog=['treated', 'post', 'treated_post', 'num_words'],
          ffe=False,
          tfe=True)

0,1,2,3
Dep. Variable:,lm_positive,R-squared:,0.7882
Estimator:,PanelOLS,R-squared (Between):,0.2195
No. Observations:,44582,R-squared (Within):,0.7169
Date:,"Thu, May 13 2021",R-squared (Overall):,0.6836
Time:,20:02:07,Log-likelihood,-1.756e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,4.147e+04
Entities:,601,P-value,0.0000
Avg Obs:,74.180,Distribution:,"F(4,44561)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,4.2059,0.4875,8.6271,0.0000,3.2504,5.1615
treated,6.5281,0.4985,13.096,0.0000,5.5511,7.5051
post,-16.350,0.4556,-35.887,0.0000,-17.243,-15.457
treated_post,4.0635,0.4825,8.4213,0.0000,3.1178,5.0093
num_words,0.0145,0.0003,55.732,0.0000,0.0140,0.0150


In [17]:
ols_model(df=tone_did, 
          dependent='lm_positive',
          exog=['treated', 'post', 'treated_post', 'num_words'],
          ffe=True,
          tfe=True)

0,1,2,3
Dep. Variable:,lm_positive,R-squared:,0.8084
Estimator:,PanelOLS,R-squared (Between):,0.6017
No. Observations:,44582,R-squared (Within):,0.8054
Date:,"Thu, May 13 2021",R-squared (Overall):,0.7845
Time:,20:02:08,Log-likelihood,-1.701e+05
Cov. Estimator:,Clustered,,
,,F-statistic:,4.638e+04
Entities:,601,P-value,0.0000
Avg Obs:,74.180,Distribution:,"F(4,43961)"
Min Obs:,1.0000,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-4.9708,0.4666,-10.654,0.0000,-5.8853,-4.0563
treated,6.7758,0.4466,15.173,0.0000,5.9005,7.6511
post,-1.5236,0.3252,-4.6845,0.0000,-2.1610,-0.8861
treated_post,4.1630,0.4775,8.7189,0.0000,3.2272,5.0989
num_words,0.0143,0.0002,66.215,0.0000,0.0139,0.0147
