# Analyst EPS Forecast
*[Source](https://wrds-web.wharton.upenn.edu/wrds//ds/ibes/det/index.cfm)*


In [1]:
NAME = '06-03_analysts_eps_forecast'
PROJECT = 'conference-calls-sentiment'
PYTHON_VERSION = '3.7.0'

### Imports  

In [2]:
import os
import re
import numpy as np
import pandas as pd

### Settings

In [3]:
workdir = re.sub("(?<={})[\w\W]*".format(PROJECT), "", os.getcwd())
os.chdir(workdir)

pipeline = os.path.join('2_pipeline', NAME)
if not os.path.exists(pipeline):
    os.makedirs(pipeline)
    for folder in ['out', 'store', 'tmp']:
        os.makedirs(os.path.join(pipeline, folder))

---
# Main Code 

In [None]:
cols = {'OFTIC': 'ticker', 'TICKER': 'ibes_ticker', 'CNAME': 'coname',
        'ESTIMATOR': 'brokerage_id', 'VALUE': 'eps_forecast',
        'ANALYS': 'analyst_id', 'ANNDATS': 'date', 'ANNTIMS': 'time'}

eps_forecast_raw = pd.read_csv(os.path.join('0_data', 'ibes', 'ibes_eps-forecast_2000-2020.csv.gz'), usecols=cols, encoding='latin-1')

eps_forecast_raw.head()

## Data manipulation

In [5]:
def add_previous_valuation(df):
    df[['prev_forecast', 'prev_date']] = df.groupby(['ticker', 'analyst_id'])[['eps_forecast', 'date']].shift(1)
    df['days_since_prev'] = df['date'] - df['prev_date']
    df['eps_change'] = df['eps_forecast'] - df['prev_forecast']
    df['eps_sentiment'] = np.where(df['eps_change'] > 0, 1, -1)
    df['eps_sentiment'] = np.where(df['eps_change'] == 0, 0, df['eps_sentiment'])
    return df

In [None]:
eps_forecast = (eps_forecast_raw
                .copy()
                .rename(columns=cols)
                .assign(date=lambda x: pd.to_datetime(x['date'], format=r'%Y%m%d'))
                .sort_values(['ticker', 'analyst_id', 'date', 'time'])
                .drop_duplicates(['ticker', 'analyst_id', 'date'])
                .pipe(add_previous_valuation)
                .dropna()
                .reset_index(drop=True)
                .filter(['ticker', 'coname', 'brokarage_id', 'analyst_id', 'date',
                         'days_since_prev', 'prev_forecast', 'eps_forecast',
                         'eps_change', 'eps_sentiment']))

eps_forecast.head()

## Map `analyst_id` to name
*Add analyst name*

In [None]:
analyst_mapping = pd.read_feather(os.path.join('2_pipeline', '03-01_ibes_process_recommendations', 'store', 'analyst_mapping.feather'))
eps_forecast_names = eps_forecast.merge(analyst_mapping, on=['analyst_id', 'ticker'])
eps_forecast_names

## Match analyst's forecast with conference calls transcript

In [None]:
cc = pd.read_feather(os.path.join('2_pipeline', '02-02_conference_calls_preprocess', 'out', 'cc_transcripts.feather'))
cc = (cc.filter(['gvkey', 'ticker', 'event_date', 'speaker_role', 'speaker_name', 'speaker_firm'])
        .query("speaker_role == 'Analyst'")
        .drop_duplicates()
        .reset_index(drop=True))
cc

In [9]:
# Merge to transcripts
eps_forecast_cc = (eps_forecast_names.merge(cc,
                                            left_on=['ticker', 'analyst'],
                                            right_on=['ticker', 'speaker_name'])
                                     .assign(days_between=lambda x: x['date'] - x['event_date'])
                                     .query('0 <= days_between.dt.days <= 20')
                                     .sort_values(['gvkey', 'analyst', 'speaker_firm', 'date', 'days_between'])
                                     .drop_duplicates(['gvkey', 'analyst', 'speaker_firm', 'date'], keep='first'))

len(eps_forecast_cc)

130738

## EPS forecast sentiment by analysts

In [None]:
eps_forecast_sentiment_analyst = (eps_forecast_cc
                                  .copy()
                                  .reset_index(drop=True)
                                  .filter(['gvkey', 'event_date', 'analyst', 'eps_change', 'eps_sentiment']))
eps_forecast_sentiment_analyst

In [11]:
eps_forecast_sentiment_analyst.to_feather(os.path.join(pipeline, 'out', 'eps_forecast_analyst.feather'))