# Tone - Combine

In [1]:
NAME = '04-03_tone_measures'
PROJECT = 'conference-calls-sentiment'
PYTHON_VERSION = '3.7.0'

### Imports

In [2]:
import os
import re
import numpy as np
import pandas as pd

### Settings

In [3]:
workdir = re.sub("(?<={})[\w\W]*".format(PROJECT), "", os.getcwd())
os.chdir(workdir)

pipeline = os.path.join('2_pipeline', NAME)
if not os.path.exists(pipeline):
    os.makedirs(pipeline)
    for folder in ['out', 'store', 'tmp']:
        os.makedirs(os.path.join(pipeline, folder))

---
# Main code

## Tone by Firm

In [4]:
lm_tone_by_firm = pd.read_feather(os.path.join('2_pipeline', '04-01_tone_lm', 'out', 'lm_tone_by_firm.feather'))
lm_tone_by_firm.head()

Unnamed: 0,gvkey,ticker,event_date,num_words,num_sentences,num_speakers,lm_positive,lm_negative,lm_tone,lm_tone_norm
0,1013,ADCT,2004-02-18,3882,217,14,47,38,0.105882,-0.187908
1,1013,ADCT,2004-05-19,3720,195,9,40,29,0.15942,0.03232
2,1013,ADCT,2004-08-25,3424,145,10,28,40,-0.176471,-1.349363
3,1013,ADCT,2004-12-14,3583,168,13,40,41,-0.012346,-0.674237
4,1013,ADCT,2005-02-28,4503,232,13,53,42,0.115789,-0.147155


In [5]:
finbert_tone_by_firm = pd.read_feather(os.path.join('2_pipeline', '04-02_tone_finbert', 'out', 'finbert_tone_by_firm.feather'))
finbert_tone_by_firm.head()

Unnamed: 0,gvkey,ticker,event_date,negative,neutral,positive,finbert_tone,finbert_tone_norm
0,1013,ADCT,2004-02-18,22,163,32,0.185185,-0.43352
1,1013,ADCT,2004-05-19,11,151,33,0.5,0.723564
2,1013,ADCT,2004-08-25,22,105,18,-0.1,-1.481702
3,1013,ADCT,2004-12-14,11,128,29,0.45,0.539792
4,1013,ADCT,2005-02-28,27,165,40,0.19403,-0.401012


In [6]:
tone_by_firm = (pd.merge(lm_tone_by_firm, finbert_tone_by_firm, how='outer', validate='1:1')
                .assign(year=lambda x: x['event_date'].dt.year,
                        quarter=lambda x: x['event_date'].dt.to_period('Q'))
                .filter(['gvkey', 'ticker', 'event_date', 'num_words', 'num_sentences',
                         'lm_tone', 'lm_tone_norm', 'finbert_tone', 'finbert_tone_norm',
                         'quarter', 'year']))
tone_by_firm

Unnamed: 0,gvkey,ticker,event_date,num_words,num_sentences,lm_tone,lm_tone_norm,finbert_tone,finbert_tone_norm,quarter,year
0,1013,ADCT,2004-02-18,3882,217,0.105882,-0.187908,0.185185,-0.433520,2004Q1,2004
1,1013,ADCT,2004-05-19,3720,195,0.159420,0.032320,0.500000,0.723564,2004Q2,2004
2,1013,ADCT,2004-08-25,3424,145,-0.176471,-1.349363,-0.100000,-1.481702,2004Q3,2004
3,1013,ADCT,2004-12-14,3583,168,-0.012346,-0.674237,0.450000,0.539792,2004Q4,2004
4,1013,ADCT,2005-02-28,4503,232,0.115789,-0.147155,0.194030,-0.401012,2005Q1,2005
...,...,...,...,...,...,...,...,...,...,...,...
26232,316056,ALLE,2019-10-24,2329,134,0.629630,1.966520,0.593750,1.068137,2019Q4,2019
26233,316056,ALLE,2020-02-18,2243,127,0.178571,0.111098,0.432836,0.476706,2020Q1,2020
26234,316056,ALLE,2020-04-23,5007,280,0.204969,0.219684,0.344828,0.153236,2020Q2,2020
26235,316056,ALLE,2020-07-23,4036,217,0.333333,0.747709,0.621053,1.168486,2020Q3,2020


In [7]:
# Save
tone_by_firm.to_feather(os.path.join(pipeline, 'out', 'tone_by_firm.feather'))

## Tone by Role

In [8]:
lm_tone_by_role = pd.read_feather(os.path.join('2_pipeline', '04-01_tone_lm', 'out', 'lm_tone_by_role.feather'))
lm_tone_by_role.head()

Unnamed: 0,gvkey,ticker,event_date,speaker_role,num_words,num_sentences,num_speakers,lm_positive,lm_negative,lm_tone,lm_tone_norm
0,1013,ADCT,2004-02-18,Analyst,1511,87,12,13,21,-0.235294,-0.877963
1,1013,ADCT,2004-02-18,Management,2371,130,2,34,17,0.333333,0.774359
2,1013,ADCT,2004-05-19,Analyst,1306,70,7,7,14,-0.333333,-1.162846
3,1013,ADCT,2004-05-19,Management,2414,125,2,33,15,0.375,0.895435
4,1013,ADCT,2004-08-25,Analyst,1248,60,8,6,24,-0.6,-1.937729


In [9]:
finbert_tone_by_role = pd.read_feather(os.path.join('2_pipeline', '04-02_tone_finbert', 'out', 'finbert_tone_by_role.feather'))
finbert_tone_by_role.head()

Unnamed: 0,gvkey,ticker,event_date,speaker_role,negative,neutral,positive,finbert_tone,finbert_tone_norm
0,1013,ADCT,2004-02-18,Analyst,7,72,8,0.066667,-0.202309
1,1013,ADCT,2004-02-18,Management,15,91,24,0.230769,0.180165
2,1013,ADCT,2004-05-19,Analyst,4,63,3,-0.142857,-0.690647
3,1013,ADCT,2004-05-19,Management,7,88,30,0.621622,1.091126
4,1013,ADCT,2004-08-25,Analyst,11,47,2,-0.692308,-1.971253


In [10]:
tone_by_role = (pd.merge(lm_tone_by_role, finbert_tone_by_role, how='outer', validate='1:1')
                   .assign(year=lambda x: x['event_date'].dt.year,
                           quarter=lambda x: x['event_date'].dt.to_period('Q'))
                   .filter(['gvkey', 'ticker', 'event_date', 'speaker_role', 'num_words',
                            'num_sentences', 'lm_tone', 'lm_tone_norm', 'finbert_tone',
                            'finbert_tone_norm', 'quarter', 'year']))
tone_by_role

Unnamed: 0,gvkey,ticker,event_date,speaker_role,num_words,num_sentences,lm_tone,lm_tone_norm,finbert_tone,finbert_tone_norm,quarter,year
0,1013,ADCT,2004-02-18,Analyst,1511,87,-0.235294,-0.877963,0.066667,-0.202309,2004Q1,2004
1,1013,ADCT,2004-02-18,Management,2371,130,0.333333,0.774359,0.230769,0.180165,2004Q1,2004
2,1013,ADCT,2004-05-19,Analyst,1306,70,-0.333333,-1.162846,-0.142857,-0.690647,2004Q2,2004
3,1013,ADCT,2004-05-19,Management,2414,125,0.375000,0.895435,0.621622,1.091126,2004Q2,2004
4,1013,ADCT,2004-08-25,Analyst,1248,60,-0.600000,-1.937729,-0.692308,-1.971253,2004Q3,2004
...,...,...,...,...,...,...,...,...,...,...,...,...
52139,316056,ALLE,2020-04-23,Management,3934,229,0.267176,0.582118,0.372549,0.510612,2020Q2,2020
52140,316056,ALLE,2020-07-23,Analyst,811,41,-0.047619,-0.332615,0.333333,0.419212,2020Q3,2020
52141,316056,ALLE,2020-07-23,Management,3225,176,0.403509,0.978276,0.651163,1.159978,2020Q3,2020
52142,316056,ALLE,2020-10-22,Analyst,1130,67,0.263158,0.570443,0.090909,-0.145807,2020Q4,2020


In [11]:
# Save
tone_by_role.to_feather(os.path.join(pipeline, 'out', 'tone_by_role.feather'))

## Tone by Speaker

In [12]:
lm_tone_by_speaker = pd.read_feather(os.path.join('2_pipeline', '04-01_tone_lm', 'out', 'lm_tone_by_speaker.feather'))
lm_tone_by_speaker.head()

Unnamed: 0,gvkey,ticker,event_date,speaker_role,speaker_name,speaker_firm,num_words,num_sentences,lm_positive,lm_negative,lm_tone,lm_tone_norm
0,1013,ADCT,2004-02-18,Analyst,BUCK E,JANCO PARTNERS,117,6,1,0,1.0,1.696411
1,1013,ADCT,2004-02-18,Analyst,CHURCH R,WACHOVIA SECURITIES,111,10,2,1,0.333333,0.649002
2,1013,ADCT,2004-02-18,Analyst,COLEMAN S,MORGAN STANLEY,78,8,0,0,0.0,0.125298
3,1013,ADCT,2004-02-18,Analyst,COOPERSCHMIDT M,LEHMAN BROTHERS,135,11,1,2,-0.333333,-0.398407
4,1013,ADCT,2004-02-18,Analyst,GOULD M,CSFB,159,9,3,2,0.2,0.43952


In [13]:
finbert_tone_by_speaker = pd.read_feather(os.path.join('2_pipeline', '04-02_tone_finbert', 'out', 'finbert_tone_by_speaker.feather'))
finbert_tone_by_speaker.head()

Unnamed: 0,gvkey,ticker,event_date,speaker_role,speaker_name,speaker_firm,negative,neutral,positive,finbert_tone,finbert_tone_norm
0,1013,ADCT,2004-02-18,Analyst,BUCK E,JANCO PARTNERS,0,6,0,0.0,-0.024677
1,1013,ADCT,2004-02-18,Analyst,CHURCH R,WACHOVIA SECURITIES,1,8,1,0.0,-0.024677
2,1013,ADCT,2004-02-18,Analyst,COLEMAN S,MORGAN STANLEY,0,7,1,1.0,1.458168
3,1013,ADCT,2004-02-18,Analyst,COOPERSCHMIDT M,LEHMAN BROTHERS,1,9,1,0.0,-0.024677
4,1013,ADCT,2004-02-18,Analyst,GOULD M,CSFB,0,7,2,1.0,1.458168


In [14]:
tone_by_speaker = (pd.merge(lm_tone_by_speaker, finbert_tone_by_speaker, how='outer', validate='1:1')
                   .assign(year=lambda x: x['event_date'].dt.year,
                           quarter=lambda x: x['event_date'].dt.to_period('Q'))
                   .filter(['gvkey', 'ticker', 'event_date', 'speaker_role', 'speaker_name',
                            'speaker_firm', 'num_words', 'num_sentences', 'lm_tone', 'lm_tone_norm',
                            'finbert_tone', 'finbert_tone_norm', 'quarter', 'year']))
tone_by_speaker

Unnamed: 0,gvkey,ticker,event_date,speaker_role,speaker_name,speaker_firm,num_words,num_sentences,lm_tone,lm_tone_norm,finbert_tone,finbert_tone_norm,quarter,year
0,1013,ADCT,2004-02-18,Analyst,BUCK E,JANCO PARTNERS,117,6,1.000000,1.696411,0.000000,-0.024677,2004Q1,2004
1,1013,ADCT,2004-02-18,Analyst,CHURCH R,WACHOVIA SECURITIES,111,10,0.333333,0.649002,0.000000,-0.024677,2004Q1,2004
2,1013,ADCT,2004-02-18,Analyst,COLEMAN S,MORGAN STANLEY,78,8,0.000000,0.125298,1.000000,1.458168,2004Q1,2004
3,1013,ADCT,2004-02-18,Analyst,COOPERSCHMIDT M,LEHMAN BROTHERS,135,11,-0.333333,-0.398407,0.000000,-0.024677,2004Q1,2004
4,1013,ADCT,2004-02-18,Analyst,GOULD M,CSFB,159,9,0.200000,0.439520,1.000000,1.458168,2004Q1,2004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
308182,316056,ALLE,2020-10-22,Analyst,SNYDER C,UBS INVESTMENT BANK,114,8,-1.000000,-1.445816,-1.000000,-1.507521,2020Q4,2020
308183,316056,ALLE,2020-10-22,Analyst,WEST C,LONGBOW RESEARCH LLC,83,6,1.000000,1.696411,-0.333333,-0.518958,2020Q4,2020
308184,316056,ALLE,2020-10-22,Analyst,WOJS T,ROBERT W. BAIRD & CO. INCORPORATED,89,6,-1.000000,-1.445816,1.000000,1.458168,2020Q4,2020
308185,316056,ALLE,2020-10-22,Management,PETRATIS D,ALLEGION PLC,2047,115,0.500000,0.910855,0.522388,0.749943,2020Q4,2020


In [15]:
# Save
tone_by_speaker.to_feather(os.path.join(pipeline, 'out', 'tone_by_speaker.feather'))