# Hassan, et al. (2019)

> Firm-level risk and sentiment derived from quarterly earnings conference calls

This module downloads and processes data developed by:

- Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Ahmed Tahoun, 2019, “Firm-Level Political Risk: Measurement and Effects,”  Quarterly Journal of Economics, 134 (4), pp.2135-2202. <https://doi.org/10.1093/qje/qjz021>.

The dataset also contains data developed by the papers below, but we will not use it in this module:

- Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Ahmed Tahoun, "The Global Impact of Brexit Uncertainty," 2020, NBER Working Paper 26609

- Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Markus Schwedeler, Ahmed Tahoun, "Firm-level Exposure to Epidemic Diseases: Covid-19, SARS, and H1N1," 2020, mimeo London Business School. Avilable at SSRN: <https://ssrn.com/abstract=3566530>

See the authors' dedicated website for more information on this dataset: <https://www.firmlevelrisk.com/>

In [None]:
#| default_exp papers.hassan_etal_2019

In [None]:
#| export
from __future__ import annotations
import pandas as pd

import pandasmore as pdm
from finsets import wrds

In [None]:
#| export
def source_url():
    """URL where data can be downloaded from. All vintages I downloaded before are included in ascending timeline."""
    
    return pd.Series({'07_08_2023': "https://www.dropbox.com/s/m7o9oycj49rpl9d/firmquarter_2022q1.dta?raw=1"})

List all vintage data URL's:

In [None]:
source_url()

07_08_2023    https://www.dropbox.com/s/m7o9oycj49rpl9d/firm...
dtype: object

Link to the latest vintage I've downloaded:

In [None]:
source_url()[-1]

'https://www.dropbox.com/s/m7o9oycj49rpl9d/firmquarter_2022q1.dta?raw=1'

In [None]:
#| export
def variables():
    """Names of key variables in the dataset. 
    `company_name`,`hqcountrycode`,`isin`,`cusip`,`ticker` are also available but are omitted here to speed things up and save memory."""
    
    return ['gvkey','date','date_earningscall',
            'PRisk','NPRisk','Risk',
            'PSentiment','NPSentiment','Sentiment',
            'PRiskT_economic','PRiskT_environment','PRiskT_trade','PRiskT_institutions','PRiskT_health','PRiskT_security','PRiskT_tax','PRiskT_technology']

In [None]:
variables()

['gvkey',
 'date',
 'date_earningscall',
 'PRisk',
 'NPRisk',
 'Risk',
 'PSentiment',
 'NPSentiment',
 'Sentiment',
 'PRiskT_economic',
 'PRiskT_environment',
 'PRiskT_trade',
 'PRiskT_institutions',
 'PRiskT_health',
 'PRiskT_security',
 'PRiskT_tax',
 'PRiskT_technology']

In [None]:
#| export
def download(url: str=source_url()[-1], # URL to the Stata (.dta) version of the dataset
            vars: list=variables(), # Which variables to download
            ) -> pd.DataFrame:
    """Download raw data from `url`"""
    
    return pd.read_stata(url, columns=vars)

In [None]:
#| eval: false
raw = download()

In [None]:
#| eval: false
raw

Unnamed: 0,gvkey,date,date_earningscall,PRisk,NPRisk,Risk,PSentiment,NPSentiment,Sentiment,PRiskT_economic,PRiskT_environment,PRiskT_trade,PRiskT_institutions,PRiskT_health,PRiskT_security,PRiskT_tax,PRiskT_technology
0,001004,2002-01-01,20-Mar-2002,359.550717,2928.601353,168.982351,997.864152,5550.580665,469.395419,9001.562500,6331.430176,8942.732422,3561.493408,6755.946777,4922.558105,4933.755371,9790.708984
1,001004,2002-04-01,27-Jun-2002,0.000000,0.000000,0.000000,1594.732147,-5656.607357,544.824170,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2,001004,2002-07-01,26-Sep-2002,0.000000,0.000000,0.000000,49.334494,-17818.418427,318.471338,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,001004,2003-07-01,17-Sep-2003,0.000000,0.000000,0.000000,2581.944092,81710.482710,1314.828342,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
4,001004,2003-10-01,18-Dec-2003,0.000000,298.871602,97.087379,2008.371500,81338.761583,1747.572816,2291.365969,2100.714942,963.631448,4163.550979,2500.242534,2388.684727,3250.849740,373.608619
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
354513,350693,2022-01-01,24-Mar-2022,362.077900,1571.748061,298.429999,827.052429,6040.433458,921.240431,11010.865959,12893.395890,8393.576628,7328.531710,9858.445806,19089.667607,14110.064871,10008.572481
354514,350952,2022-01-01,11-Feb-2022,100.109747,1126.731364,127.094165,-102.683598,10284.420640,103.986135,3208.910816,2725.445139,1281.570096,2255.610742,2541.676687,4076.329379,2006.074227,1512.204495
354515,351151,2021-10-01,27-Nov-2021,21.283859,351.120600,30.532740,1555.210132,5448.890864,953.670362,1842.126927,676.892778,589.398597,449.762148,1239.617338,1787.638938,1326.951725,1148.852056
354516,351491,2022-01-01,08-Feb-2022,221.078310,4575.797051,102.684465,728.881736,14539.567816,337.391815,2534.970901,1857.109647,440.773330,1775.376868,1583.013443,1746.418904,3334.864948,2178.284059


In [None]:
#| eval: false
raw.info(memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
Int64Index: 354518 entries, 0 to 354517
Data columns (total 17 columns):
 #   Column               Non-Null Count   Dtype         
---  ------               --------------   -----         
 0   gvkey                354518 non-null  object        
 1   date                 354518 non-null  datetime64[ns]
 2   date_earningscall    354518 non-null  object        
 3   PRisk                353437 non-null  float64       
 4   NPRisk               353437 non-null  float64       
 5   Risk                 353437 non-null  float64       
 6   PSentiment           353437 non-null  float64       
 7   NPSentiment          353437 non-null  float64       
 8   Sentiment            353437 non-null  float64       
 9   PRiskT_economic      353437 non-null  float64       
 10  PRiskT_environment   353437 non-null  float64       
 11  PRiskT_trade         353437 non-null  float64       
 12  PRiskT_institutions  353437 non-null  float64       
 13  PRiskT_health 

In [None]:
#| export
def clean(df: pd.DataFrame=None, # If None, will download using `download_raw`
          gvkey_permno_link: bool|pd.DataFrame=True, # Whether to download permno or not. If DataFrame, must contain `permno`, `gvkey`, and `Qdate`
          how: str='inner' # How to merge permno into `df` if `gvkey_permno_link` is not False
          ) -> pd.DataFrame:
    """Converts `gvkey` to string and applies `pandasmore.setup_panel`. Adds `permno` if `gvkey_permno_link` is not False."""

    if df is None: df = download()
    else: df = df.copy()
    df['gvkey'] = df['gvkey'].astype('string')
    df = pdm.setup_panel(df, panel_ids='gvkey', time_var='date', freq='Q',
                        panel_ids_toint=False,
                        drop_index_duplicates=True, duplicates_which_keep='last')
    if not gvkey_permno_link: return df
    else:    
      if gvkey_permno_link is True: gvkey_permno_link = wrds.linking.gvkey_permno_q()
      df = df.reset_index().merge(gvkey_permno_link, how=how, on=['gvkey','Qdate'])
      return pdm.setup_panel(df, panel_ids='permno', dates_processed=True, freq='Q',
                              drop_index_duplicates=True, duplicates_which_keep='last')

In [None]:
#| eval: false
df = clean(raw)

Loading library list...
Done


In [None]:
#| eval: false
df

Unnamed: 0_level_0,Unnamed: 1_level_0,date,dtdate,gvkey,date_earningscall,PRisk,NPRisk,Risk,PSentiment,NPSentiment,Sentiment,PRiskT_economic,PRiskT_environment,PRiskT_trade,PRiskT_institutions,PRiskT_health,PRiskT_security,PRiskT_tax,PRiskT_technology
permno,Qdate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
10001,2014Q2,2014-04-01,2014-04-01,012994,16-May-2014,94.384437,418.035478,85.836910,3737.937830,10709.556574,1688.125894,1472.467739,1427.332970,153.831442,834.154642,1036.247671,1329.046064,619.587873,953.820437
10001,2014Q3,2014-07-01,2014-07-01,012994,15-Aug-2014,52.685563,283.103019,22.547914,271.234092,-23341.508700,45.095829,461.149048,550.654417,37.415681,267.443077,263.191824,187.083329,26.115169,0.000000
10001,2014Q4,2014-10-01,2014-10-01,012994,14-Nov-2014,84.720984,505.826238,72.516316,1985.533735,-39785.791449,870.195794,741.550408,885.479455,60.166260,430.061656,423.225431,300.839218,41.994480,0.000000
10001,2015Q1,2015-01-01,2015-01-01,012994,13-Mar-2015,160.354858,679.804350,59.265113,1882.599045,14314.726235,1086.527064,13185.479350,2472.812723,522.973040,1988.293020,2451.243318,1962.271834,3618.889031,514.463599
10001,2015Q2,2015-04-01,2015-04-01,012994,12-May-2015,102.572640,326.618382,87.796313,94.565626,14089.660067,746.268657,897.803348,1072.059850,72.843962,520.680442,512.403748,364.229396,50.843185,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93436,2021Q1,2021-01-01,2021-01-01,184996,27-Jan-2021,70.162735,2423.955088,32.485111,1048.529357,8259.943838,1136.978885,1386.315319,1225.807277,174.758487,431.137670,817.046323,665.545915,224.107003,566.914117
93436,2021Q2,2021-04-01,2021-04-01,184996,26-Apr-2021,114.776578,1048.825388,79.051383,275.735391,27480.085621,564.652739,2340.898855,5368.546832,2355.194831,1962.508770,2233.169016,2623.382115,2770.032754,1947.769698
93436,2021Q3,2021-07-01,2021-07-01,184996,26-Jul-2021,81.427234,444.381153,94.573827,2018.630101,14960.662326,461.047405,3585.390369,3239.354051,1326.355721,1787.795332,2409.251732,3233.956798,4120.243627,2622.481604
93436,2021Q4,2021-10-01,2021-10-01,184996,20-Oct-2021,144.372651,1305.048152,152.936378,1277.480720,3896.593455,723.898858,6081.882833,4283.588251,2538.469967,2494.153322,6012.918395,5502.449838,6379.861869,3413.794078


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()