# Earnings Date Scraper
The code below was used to scrape the Nasdaq website for the earnings report dates of companies from 2012 - Current.
The data was saved to an Excel file, 'SymbolAndReportDates' with an 'Equities' column containing the ticker symbols of reporting companies, and a 'ReportDate' column containing the dates of each report as provided by Zacks.

In [None]:
import requests, bs4, re
import pandas as pd
from datetime import date, timedelta

myDate = date(2012, 1, 3)
today = date.today()
plusOne = timedelta(days=1)
datePattern = re.compile('\d{4}\-\D{3}\-\d{2}')
url_format = 'http://www.nasdaq.com/earnings/earnings-calendar.aspx?date=%s'
dateList = []
tickerList = []

# Request Nasdaq.com Earnings Page for examination
while today > myDate:
    try:
        res = requests.get(url_format % myDate.strftime("%Y-%b-%d"))
    except:
        myDate += plusOne
        continue
    else:
        if myDate.weekday() not in [5,6]:
            res.raise_for_status()
            NasSoup = bs4.BeautifulSoup(res.text,"html5lib")
            
            try:
                #Save companies reporting earnings to a list
                companies = NasSoup.find_all(href=re.compile("/earnings/report"), id=True)
            except:
                myDate += plusOne
                continue
            
            # Search pattern to discover ticker symbols
            tickPattern = re.compile(r'\([A-Z]+\)')
            # Extract Tickers
            for ticker in companies:
                try:
                    tickerList.append(re.findall(tickPattern,str(ticker))[-1].strip('()'))
                    dateList.append(myDate)
                except IndexError:
                    pass
    finally:
        myDate += plusOne

data = {'Equities': tickerList, 'ReportDate': dateList}
df = pd.DataFrame(data)