# Experiment - Analyst Output

In [1]:
NAME = '09-06-experiment_analysts'
PROJECT = 'conference-calls-sentiment'
PYTHON_VERSION = '3.7.0'

### Imports  

In [2]:
import os
import re
import numpy as np
import pandas as pd

# Regressions
from linearmodels import PanelOLS
import statsmodels.api as sm

# Hide annoying warnings
import warnings
warnings.filterwarnings('ignore')

### Settings

In [3]:
workdir = re.sub("(?<={})[\w\W]*".format(PROJECT), "", os.getcwd())
os.chdir(workdir)

--- 
# Main code

In [None]:
analysts_dataset = pd.read_feather(os.path.join('2_pipeline', '07-02_dataset_construction', 'out', 'analysts_dataset.feather'))
analysts_dataset = analysts_dataset.set_index(['gvkey', 'year'])
analysts_dataset.head()

In [5]:
def ols_model(df, tone, dependent):
    dependent = df[dependent]
    exog_vars = [tone, 'num_words', 'surprise', 'size', 'roa', 'leverage', 'loss', 'capex']
    exog = sm.add_constant(df[exog_vars])
    model = PanelOLS(dependent, exog, entity_effects=True, time_effects=True)
    return model.fit(cov_type='clustered', cluster_entity=True)

## Regressions

In [6]:
def get_stars(pvalue):
    if pvalue < 0.01:
        return '***'
    if pvalue < 0.05:
        return '**'
    if pvalue < 0.1:
        return '*'
    else:
        return ''

In [7]:
for dv in ['rating_change', 'price_change', 'eps_change']:
    print(dv.upper())
    print('=' * 50)
    for tone in ['lm_tone_norm', 'finbert_tone_norm']:
        res = ols_model(analysts_dataset, tone, dv)
        for name, coefficient, tstat, pvalue in zip(res._var_names, res.params, res.tstats, res.pvalues):
            print(f"{name:18s} {coefficient:10.3f}{get_stars(pvalue):3s}\t({tstat:.2f})")
        print('-' * 50)
    print('')

RATING_CHANGE
const                   1.429** 	(2.18)
lm_tone_norm            0.032   	(1.64)
num_words              -0.001** 	(-2.40)
surprise                0.005***	(3.00)
size                   -0.133** 	(-2.03)
roa                    -0.008   	(-1.01)
leverage               -0.101   	(-0.29)
loss                   -0.134   	(-1.44)
capex                  -0.007   	(-0.61)
--------------------------------------------------
const                   1.491** 	(2.30)
finbert_tone_norm       0.099***	(5.43)
num_words              -0.001** 	(-2.13)
surprise                0.006***	(3.25)
size                   -0.140** 	(-2.15)
roa                    -0.008   	(-0.96)
leverage               -0.110   	(-0.32)
loss                   -0.129   	(-1.41)
capex                  -0.005   	(-0.45)
--------------------------------------------------

PRICE_CHANGE
const                 -24.746** 	(-2.49)
lm_tone_norm            0.530***	(6.32)
num_words              -0.005***	(-2.64)
surprise        

## Bootstrapping

In [8]:
def create_sample(df, dependent, n_obs):
    sample = (df
              .copy()
              .filter([dv, 'lm_tone_norm', 'finbert_tone_norm', 'num_words',
                       'surprise', 'size', 'roa', 'leverage', 'loss', 'capex'])
              .dropna()
              .sample(n_obs))
    return sample

In [15]:
N_ITERS = 500
N_OBS = 2500

columns = ['dv', 'tone', 'coefficient', 'pvalue']
results = pd.DataFrame(columns=columns)
for dv in ['rating_change', 'price_change', 'eps_change']:
    for tone in ['lm_tone_norm', 'finbert_tone_norm']:
        for _ in range(N_ITERS):
            # Create random sample
            sample = create_sample(analysts_dataset, dv, N_OBS)
            res = ols_model(sample, tone, dv)

            # Get coefficient and p-value
            coefficient, pvalue = res.params[tone], res.pvalues[tone]

            # Append to results
            results.loc[len(results)] = [dv, tone, coefficient, pvalue]

In [16]:
table = (results.assign(positive_sign=lambda x: (x['coefficient'] > 0),
                        pct_10=lambda x: x['positive_sign'] & (x['pvalue'] < 0.10),
                        pct_5=lambda x: x['positive_sign'] & (x['pvalue'] < 0.05),
                        pct_1=lambda x: x['positive_sign'] & (x['pvalue'] < 0.01))
                .groupby(['dv', 'tone'])
                .agg({'coefficient': np.mean,
                      'positive_sign': np.mean,
                      'pct_10': np.mean,
                      'pct_5': np.mean,
                      'pct_1': np.mean})
                .sort_index(ascending=False)
                .round(3))
table

Unnamed: 0_level_0,Unnamed: 1_level_0,coefficient,positive_sign,pct_10,pct_5,pct_1
dv,tone,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
rating_change,lm_tone_norm,0.032,0.94,0.21,0.098,0.008
rating_change,finbert_tone_norm,0.096,1.0,0.994,0.964,0.804
price_change,lm_tone_norm,0.503,0.934,0.464,0.346,0.136
price_change,finbert_tone_norm,0.84,0.984,0.826,0.732,0.508
eps_change,lm_tone_norm,0.002,0.586,0.048,0.024,0.004
eps_change,finbert_tone_norm,0.008,0.744,0.144,0.074,0.01


In [18]:
print(table.to_latex())

\begin{tabular}{llrrrrr}
\toprule
           &                   &  coefficient &  positive\_sign &  pct\_10 &  pct\_5 &  pct\_1 \\
dv & tone &              &                &         &        &        \\
\midrule
rating\_change & lm\_tone\_norm &        0.032 &          0.940 &   0.210 &  0.098 &  0.008 \\
           & finbert\_tone\_norm &        0.096 &          1.000 &   0.994 &  0.964 &  0.804 \\
price\_change & lm\_tone\_norm &        0.503 &          0.934 &   0.464 &  0.346 &  0.136 \\
           & finbert\_tone\_norm &        0.840 &          0.984 &   0.826 &  0.732 &  0.508 \\
eps\_change & lm\_tone\_norm &        0.002 &          0.586 &   0.048 &  0.024 &  0.004 \\
           & finbert\_tone\_norm &        0.008 &          0.744 &   0.144 &  0.074 &  0.010 \\
\bottomrule
\end{tabular}

