# Analyst Recommendations
*[Source](https://wrds-web.wharton.upenn.edu/wrds//ds/ibes/recddet/index.cfm)*

In [6]:
NAME = '06-01_analysts_recommendations'
PROJECT = 'conference-calls-sentiment'
PYTHON_VERSION = '3.7.0'

### Imports  

In [7]:
import os
import re
import numpy as np
import pandas as pd

### Settings

In [8]:
workdir = re.sub("(?<={})[\w\W]*".format(PROJECT), "", os.getcwd())
os.chdir(workdir)

pipeline = os.path.join('2_pipeline', NAME)
if not os.path.exists(pipeline):
    os.makedirs(pipeline)
    for folder in ['out', 'store', 'tmp']:
        os.makedirs(os.path.join(pipeline, folder))

---
# Main Code 

In [None]:
cols = {'OFTIC': 'ticker', 'TICKER': 'ibes_ticker', 'CNAME': 'coname',
        'ESTIMID': 'brokerage', 'ANALYST': 'analyst', 'IRECCD': 'rating',
        'EMASKCD': 'brokerage_id', 'AMASKCD': 'analyst_id',
        'ANNDATS': 'date', 'ANNTIMS': 'time'}

recommendations_raw = pd.read_csv(os.path.join('0_data', 'ibes', 'ibes_recommendations_2000-2020.csv.gz'), usecols=cols, encoding='latin-1')

recommendations_raw.head()

## Data manipulation
*Ratings (`rating`):*

*1. Strong Buy;*
*2. Buy;*
*3. Hold;*
*4. Underperform;*
*5. Sell*

In [10]:
def add_previous_valuation(df):
    df[['prev_rating', 'prev_date']] = df.groupby(['ticker', 'brokerage', 'analyst'])[['rating', 'date']].shift(1)
    df['days_since_prev'] = df['date'] - df['prev_date']
    df['rating_change'] = -(df['rating'] - df['prev_rating'])  # Lower rating is better
    df['rating_sentiment'] = np.where(df['rating_change'] > 0, 1, -1)
    df['rating_sentiment'] = np.where(df['rating_change'] == 0, 0, df['rating_sentiment'])
    return df

In [None]:
recommendations = (recommendations_raw
                   .copy()
                   .dropna()
                   .rename(columns=cols)
                   .assign(
                       date=lambda x: pd.to_datetime(x['date'], format=r'%Y%m%d'),
                       analyst=lambda x: x['analyst'].apply(lambda a: ' '.join(a.split()))  # Remove extra whitespaces between analyst names
                       )
                   .sort_values(['ticker', 'analyst_id', 'date', 'time'])
                   .drop_duplicates(['ticker', 'analyst_id', 'date'])
                   .pipe(add_previous_valuation)
                   .dropna()
                   .reset_index(drop=True)
                   .filter(['ticker', 'coname', 'brokerage', 'brokarage_id',
                            'analyst', 'analyst_id', 'date', 'days_since_prev',
                            'prev_rating', 'rating', 'rating_change', 'rating_sentiment']))

recommendations.head()

In [12]:
recommendations['rating_change'].value_counts()

-1.0    28157
 1.0    25661
-0.0    23552
-2.0    13455
 2.0    12618
-3.0      351
 3.0      291
-4.0      204
 4.0      189
Name: rating_change, dtype: int64

In [7]:
recommendations['rating_sentiment'].value_counts()

-1    42167
 1    38759
 0    23552
Name: rating_sentiment, dtype: int64

## Save `analyst_id`-`analyst` mapping
*I/B/E/S's EPS forecasts files do not have analyst names specified. I use the analyst ID to analyst name mapping from the recommendations files to map the analyst ID of the EPS forecasts to an analyst name.*

In [13]:
analyst_mapping = (recommendations.copy()
                   .filter(['analyst', 'analyst_id', 'ticker'])
                   .drop_duplicates(['analyst_id', 'ticker'])
                   .reset_index(drop=True))
                   
analyst_mapping.to_feather(os.path.join(pipeline, 'store', 'analyst_mapping.feather'))

## Match analyst's rating with conference calls transcript

In [None]:
cc = pd.read_feather(os.path.join('2_pipeline', '02-02_conference_calls_preprocess', 'out', 'cc_transcripts.feather'))
cc = (cc.filter(['gvkey', 'ticker', 'event_date', 'speaker_name', 'speaker_firm', 'speaker_role'])
        .query("speaker_role == 'Analyst'")
        .drop_duplicates()
        .reset_index(drop=True))
cc

In [15]:
# Merge to transcripts
recommendations_cc = (recommendations.merge(cc,
                                            left_on=['ticker', 'analyst'],
                                            right_on=['ticker', 'speaker_name'],)
                                     .assign(days_between=lambda x: x['date'] - x['event_date'])
                                     .query('0 <= days_between.dt.days <= 20')
                                     .sort_values(['gvkey', 'analyst', 'brokerage', 'date', 'days_between'])
                                     .drop_duplicates(['gvkey', 'analyst', 'brokerage', 'date'], keep='first'))

len(recommendations_cc)

6568

## Recommendations sentiment by analysts

In [None]:
recommendations_sentiment_analyst = (recommendations_cc
                                     .copy()
                                     .reset_index(drop=True)
                                     .filter(['gvkey', 'event_date', 'analyst', 'rating_change', 'rating_sentiment']))
recommendations_sentiment_analyst

In [17]:
recommendations_sentiment_analyst.to_feather(os.path.join(pipeline, 'out', 'recommendations_analyst.feather'))