# Hassan, et al. (2019)

> Firm-level risk and sentiment derived from quarterly earnings conference calls

This module downloads and processes data developed by:

- Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Ahmed Tahoun, 2019, “Firm-Level Political Risk: Measurement and Effects,”  Quarterly Journal of Economics, 134 (4), pp.2135-2202. <https://doi.org/10.1093/qje/qjz021>.

The dataset also contains data developed by the papers below:

- Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Ahmed Tahoun, "The Global Impact of Brexit Uncertainty," 2020, NBER Working Paper 26609

- Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Markus Schwedeler, Ahmed Tahoun, "Firm-level Exposure to Epidemic Diseases: Covid-19, SARS, and H1N1," 2020, mimeo London Business School. Avilable at SSRN: <https://ssrn.com/abstract=3566530>

See the authors' dedicated website for more information on this dataset: <https://www.firmlevelrisk.com/>

In [None]:
#| default_exp papers.hassan_etal_2019

In [None]:
#| export
from __future__ import annotations
import pandas as pd

import pandasmore as pdm
from finsets import wrds
from finsets.fetch_tools import get_text_file_from_url

In [None]:
#| exports
PROVIDER = 'Tarek A. Hassan, Stephan Hollander, Laurence van Lent, Ahmed Tahoun, 2019'
URL = 'https://www.dropbox.com/s/96xo9f1twlu3525/firmquarter_2022q1.csv?raw=1'
HOST_WEBSITE = 'https://www.firmlevelrisk.com/'
FREQ = 'Q'
MIN_YEAR = 2002
MAX_YEAR = 2022
ENTITY_ID_IN_RAW_DSET = 'gvkey'
ENTITY_ID_IN_CLEAN_DSET = 'permno'
TIME_VAR_IN_RAW_DSET = 'date'
TIME_VAR_IN_CLEAN_DSET = f'{FREQ}date'

In [None]:
#| export
def list_all_vars(url: str=URL,
                  delimiter: str='\t'):
    df = get_text_file_from_url(url, nrows=1, delimiter=delimiter)

    return pd.DataFrame(list(df.columns), columns=['name'])

In [None]:
#| eval: false
alv = list_all_vars()
alv.head()

Unnamed: 0,name
0,gvkey
1,date
2,PRisk
3,NPRisk
4,Risk


In [None]:
#| eval: false
print(list(alv.name))

['gvkey', 'date', 'PRisk', 'NPRisk', 'Risk', 'PSentiment', 'NPSentiment', 'Sentiment', 'PRiskT_economic', 'PRiskT_environment', 'PRiskT_trade', 'PRiskT_institutions', 'PRiskT_health', 'PRiskT_security', 'PRiskT_tax', 'PRiskT_technology', 'company_name', 'hqcountrycode', 'isin', 'cusip', 'ticker', 'date_earningscall', 'Covid_Exposure', 'Covid_Neg_Sentiment', 'Covid_Pos_Sentiment', 'Covid_Net_Sentiment', 'Covid_Risk', 'SARS_Exposure', 'H1N1_Exposure', 'Zika_Exposure', 'Ebola_Exposure', 'Brexit_Exposure', 'Brexit_Neg_Sentiment', 'Brexit_Pos_Sentiment', 'Brexit_Net_Sentiment', 'Brexit_Risk']


In [None]:
#| export
def get_raw_data(url: str=URL, 
            nrows: int=None, # How many rows to download. If None, all rows are downloaded
            delimiter: str='\t'
            ) -> pd.DataFrame:
    """Download raw data from `url`"""

    return get_text_file_from_url(url, nrows=nrows, delimiter=delimiter)

In [None]:
#| eval: false
raw = get_raw_data(nrows=3)

In [None]:
#| eval: false
raw

Unnamed: 0,gvkey,date,PRisk,NPRisk,Risk,PSentiment,NPSentiment,Sentiment,PRiskT_economic,PRiskT_environment,...,Covid_Risk,SARS_Exposure,H1N1_Exposure,Zika_Exposure,Ebola_Exposure,Brexit_Exposure,Brexit_Neg_Sentiment,Brexit_Pos_Sentiment,Brexit_Net_Sentiment,Brexit_Risk
0,1004,2002q1,359.55072,2928.6014,168.98235,997.86415,5550.5807,469.39542,9001.563,6331.43,...,0,0,0,0,0,,,,,
1,1004,2002q2,0.0,0.0,0.0,1594.7321,-5656.6074,544.82417,0.0,0.0,...,0,0,0,0,0,,,,,


In [None]:
#| export
def process_raw_data(
        df: pd.DataFrame=None, # Raw data
        gvkey_permno_link: bool|pd.DataFrame=True, # Whether to download permno or not. If DataFrame, must contain `permno`, `gvkey`, and `Qdate`
) -> pd.DataFrame:
    """Converts `gvkey` to string and applies `pandasmore.setup_panel`. Adds `permno` if `gvkey_permno_link` is not False."""

    df = df.copy()

    df['gvkey'] = df['gvkey'].astype('string').str.zfill(6).astype('category') #prepend 0's up to len 6
    df['date'] = df['date'].astype('string')

    # Format date variable so it can be converted into datetime (as the last day of the quarter)
    year = df['date'].str.slice(0, 4).astype('string')
    quarter = df['date'].str.slice(5, 6).astype('int')

    last_month = (quarter * 3).astype('string').str.zfill(2)
    last_day = last_month.map({'03': '31', '06': '30', '09': '30', '12': '31'})

    df['date'] = year + '-' + last_month + '-' + last_day

    df = pdm.setup_panel(df, panel_ids='gvkey', 
                        time_var='date', freq='Q',
                        panel_ids_toint=False,
                        drop_index_duplicates=True, duplicates_which_keep='last')
    
    if not gvkey_permno_link: return df

    if gvkey_permno_link is True: gvkey_permno_link = wrds.linking.gvkey_permno_q()
    
    gvkey_permno_link['gvkey'] = gvkey_permno_link['gvkey'].astype('string').astype('category') 
    df = df.reset_index().merge(gvkey_permno_link, how='inner', on=['gvkey','Qdate'])
    df['permno'] = df['permno'].astype('Int64').astype('category')

    return pdm.setup_panel(df, panel_ids='permno', dates_processed=True, freq='Q',
                            panel_ids_toint=False,
                            drop_index_duplicates=True, duplicates_which_keep='last')

In [None]:
#| eval: false
df = process_raw_data(raw, gvkey_permno_link=False)

In [None]:
#| eval: false
df

Unnamed: 0_level_0,Unnamed: 1_level_0,date,dtdate,PRisk,NPRisk,Risk,PSentiment,NPSentiment,Sentiment,PRiskT_economic,PRiskT_environment,...,Covid_Risk,SARS_Exposure,H1N1_Exposure,Zika_Exposure,Ebola_Exposure,Brexit_Exposure,Brexit_Neg_Sentiment,Brexit_Pos_Sentiment,Brexit_Net_Sentiment,Brexit_Risk
gvkey,Qdate,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
1004,2002Q1,2002-03-31,2002-03-31,359.55072,2928.6014,168.98235,997.86415,5550.5807,469.39542,9001.563,6331.43,...,0,0,0,0,0,,,,,
1004,2002Q2,2002-06-30,2002-06-30,0.0,0.0,0.0,1594.7321,-5656.6074,544.82417,0.0,0.0,...,0,0,0,0,0,,,,,


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()