In [1]:
'''
Yahoo! Earnings Calendar scraper
'''
import datetime
import json
import logging
import requests
import pdb

BASE_URL = 'https://finance.yahoo.com/calendar/earnings'
BASE_STOCK_URL = 'https://finance.yahoo.com/quote'

# Logging config
logger = logging.getLogger()
handler = logging.StreamHandler()
formatter = logging.Formatter(
    '%(asctime)s %(name)-12s %(levelname)-8s %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.ERROR)


class YahooEarningsCalendar(object):
    """
    This is the class for fetching earnings data from Yahoo! Finance
    """

    def _get_data_dict(self, url):
#         pdb.set_trace()
        page = requests.get(url)
        page_content = page.content.decode(encoding='utf-8', errors='strict') 
        page_data_string = [row for row in page_content.split('\n') if row.startswith('root.App.main = ')][0][:-1]
        page_data_string = page_data_string.split('root.App.main = ', 1)[1]
        return json.loads(page_data_string)

    def get_next_earnings_date(self, symbol):
        """Gets the next earnings date of symbol
        Args:
            symbol: A ticker symbol
        Returns:
            Unix timestamp of the next earnings date
        Raises:
            Exception: When symbol is invalid or earnings date is not available
        """
        url = '{0}/{1}'.format(BASE_STOCK_URL, symbol)
        try:
            page_data_dict = self._get_data_dict(url)
            return page_data_dict['context']['dispatcher']['stores']['QuoteSummaryStore']['calendarEvents']['earnings']['earningsDate'][0]['raw']
        except:
            raise Exception('Invalid Symbol or Unavailable Earnings Date')

    def earnings_on(self, date):
        """Gets earnings calendar data from Yahoo! on a specific date.
        Args:
            date: A datetime.date instance representing the date of earnings data to be fetched.
        Returns:
            An array of earnings calendar data on date given. E.g.,
            [
                {
                    "ticker": "AMS.S",
                    "companyshortname": "Ams AG",
                    "startdatetime": "2017-04-23T20:00:00.000-04:00",
                    "startdatetimetype": "TAS",
                    "epsestimate": null,
                    "epsactual": null,
                    "epssurprisepct": null,
                    "gmtOffsetMilliSeconds": 72000000
                },
                ...
            ]
        Raises:
            TypeError: When date is not a datetime.date object.
        """
        if not isinstance(date, datetime.date):
            raise TypeError(
                'Date should be a datetime.date object')
        date_str = date.strftime('%Y-%m-%d')
        logger.debug('Fetching earnings data for %s', date_str)
        dated_url = '{0}?day={1}'.format(BASE_URL, date_str)
        page_data_dict = self._get_data_dict(dated_url)
        return page_data_dict['context']['dispatcher']['stores']['ScreenerResultsStore']['results']['rows']

    def earnings_between(self, from_date, to_date):
        """Gets earnings calendar data from Yahoo! in a date range.
        Args:
            from_date: A datetime.date instance representing the from-date (inclusive).
            to_date: A datetime.date instance representing the to-date (inclusive).
        Returns:
            An array of earnigs calendar data of date range. E.g.,
            [
                {
                    "ticker": "AMS.S",
                    "companyshortname": "Ams AG",
                    "startdatetime": "2017-04-23T20:00:00.000-04:00",
                    "startdatetimetype": "TAS",
                    "epsestimate": null,
                    "epsactual": null,
                    "epssurprisepct": null,
                    "gmtOffsetMilliSeconds": 72000000
                },
                ...
            ]
        Raises:
            ValueError: When from_date is after to_date.
            TypeError: When either from_date or to_date is not a datetime.date object.
        """
        if from_date > to_date:
            raise ValueError(
                'From-date should not be after to-date')
        if not (isinstance(from_date, datetime.date) and
                isinstance(to_date, datetime.date)):
            raise TypeError(
                'From-date and to-date should be datetime.date objects')
        earnings_data = []
        current_date = from_date
        delta = datetime.timedelta(days=1)
        while current_date <= to_date:
            earnings_data += self.earnings_on(current_date)
            current_date += delta
        return earnings_data


if __name__ == '__main__':
    date_from = datetime.datetime.strptime(
        'May 5 2018  10:00AM', '%b %d %Y %I:%M%p')
    date_to = datetime.datetime.strptime(
        'May 8 2018  1:00PM', '%b %d %Y %I:%M%p')
    yec = YahooEarningsCalendar()
    
#     logging.info( yec.earnings_on(date_from))
#     logging.info( yec.earnings_between(date_from, date_to))
#     # Returns the next earnings date of BOX in Unix timestamp
#     logging.info( yec.get_next_earnings_date('box'))


In [None]:
import pdb; 
def earnings_on(date):
    """Gets earnings calendar data from Yahoo! on a specific date.
    Args:
        date: A datetime.date instance representing the date of earnings data to be fetched.
    Returns:
        An array of earnings calendar data on date given. E.g.,
        [
            {
                "ticker": "AMS.S",
                "companyshortname": "Ams AG",
                "startdatetime": "2017-04-23T20:00:00.000-04:00",
                "startdatetimetype": "TAS",
                "epsestimate": null,
                "epsactual": null,
                "epssurprisepct": null,
                "gmtOffsetMilliSeconds": 72000000
            },
            ...
        ]
    Raises:
        TypeError: When date is not a datetime.date object.
    """
    pdb.set_trace()
    if not isinstance(date, datetime.date):
        raise TypeError(
            'Date should be a datetime.date object')
    date_str = date.strftime('%Y-%m-%d')
    logger.debug('Fetching earnings data for %s', date_str)
    dated_url = '{0}?day={1}'.format(BASE_URL, date_str)
    page_data_dict = self._get_data_dict(dated_url)
    return page_data_dict['context']['dispatcher']['stores']['ScreenerResultsStore']['results']['rows']

In [8]:

date_from = datetime.datetime.strptime(
    'Sep 5 2018  10:00AM', '%b %d %Y %I:%M%p')
date_to = datetime.datetime.strptime(
    'Sep 8 2018  1:00PM', '%b %d %Y %I:%M%p')
yec = YahooEarningsCalendar()
# yec.earnings_on(date_from)

In [None]:
yec
# yec.earnings_between(date_from, date_to)

In [4]:
yec.get_next_earnings_date('RH')

1536019200

In [9]:
newdate = '2018-05-09'
yec.earnings_on(date=date_from)

[{'ticker': 'AVAV',
  'companyshortname': 'AeroVironment Inc',
  'startdatetime': '2018-09-05T16:00:00.000-04:00',
  'startdatetimetype': 'AMC',
  'epsestimate': 0.29,
  'epsactual': None,
  'epssurprisepct': None,
  'gmtOffsetMilliSeconds': -14400000},
 {'ticker': 'CBK',
  'companyshortname': 'Christopher & Banks Corp',
  'startdatetime': '2018-09-05T08:30:00.000-04:00',
  'startdatetimetype': 'BMO',
  'epsestimate': -0.15,
  'epsactual': None,
  'epssurprisepct': None,
  'gmtOffsetMilliSeconds': -14400000},
 {'ticker': 'OLLI',
  'companyshortname': "Ollie's Bargain Outlet Holdings Inc",
  'startdatetime': '2018-09-05T12:30:00.000-04:00',
  'startdatetimetype': 'TAS',
  'epsestimate': 0.36,
  'epsactual': None,
  'epssurprisepct': None,
  'gmtOffsetMilliSeconds': -14400000},
 {'ticker': 'VRA',
  'companyshortname': 'Vera Bradley Inc',
  'startdatetime': '2018-09-05T04:00:00.000-04:00',
  'startdatetimetype': 'TAS',
  'epsestimate': 0.16,
  'epsactual': None,
  'epssurprisepct': None,
