In [1]:
""" 
estimize_API_daily

This file helps build and collect EA dates dataset 
moving forward in time, to complement the backtest files. 

Note that all tickers here may be backfilled. 

This code seeks to translates the following API query: 
curl -H "X-Estimize-Key: API_KEY_HERE" 
'api.estimize.com/estimates?start_date=2018-04-06&end_date=2018-04-07'

"""

# Package imports
import requests
from dateutil.relativedelta import relativedelta
import pandas as pd
from datetime import date, timedelta
import datetime
import time
from optparse import OptionParser
import json
import sqlite3
import numpy as np
from datetime import datetime
import time
from optparse import OptionParser
import os
import io

# Setup for API
url = "http://api.estimize.com"
headers = {"X-Estimize-Key" : "ca2bed82074413cd06d2711f",
"content-type" : "application/json"}

# Define start date and get number of years between start of API and now
start_date = date(2022, 1, 1)      # start this year! 
end_date = date.today() 
delta = end_date - start_date      # returns timedelta
list_ticker_dfs = []

In [2]:
"""
Query tickers first

"""
def get_tickers(ticker, start_date, end_date):
    """
    Overview: returns a dictionary of the company information for the company specified with ticker
    Required params: None
    Optional params: None
    Response:
	-name: The name of the company
	-ticker: The ticker/symbol for the company
    """
    function_url = url + "/companies/%s" % ticker
    params = {"start_date" : start_date, "end_date" : end_date}
    req = requests.get(function_url, params = params, headers=headers)
    if req.status_code != 200 : return None
    return req.json()

# Loop over the date range, make sure to input one day at a time
for i in range(delta.days + 1):
    day = start_date + timedelta(days=i)
    endday = day + timedelta(days = 1)
    temp1 = pd.DataFrame(get_tickers("", str(day), str(endday)))
    temp1['asofdate'] = day
    list_ticker_dfs.append(temp1)
    
# Get all unique tickers afterwards
unique_tickers = pd.concat(list_ticker_dfs)
unique_tickers = unique_tickers.drop_duplicates(['ticker'])
unique_tickers

Unnamed: 0,name,ticker,cusip,asofdate
0,Xoom Corporation,XOOM,98419Q101,2022-01-01
1,Forrester Research Inc.,FORR,346563109,2022-01-01
2,Cotiviti Holdings Inc.,COTV,22164K101,2022-01-01
3,eBay Inc.,EBAY,278642103,2022-01-01
4,Ormat Technologies Inc.,ORA,686688102,2022-01-01
...,...,...,...,...
3318,"SK Telecom Co., Ltd.",SKM,78440P306,2022-01-01
3319,MGM Resorts International,MGM,552953101,2022-01-01
3320,Twilio Inc,TWLO,90138F102,2022-01-01
3321,"Uber Technologies, Inc.",UBER,90353T100,2022-01-01


In [3]:
"""
Look through exchange tickers to get event id's 

"""

def get_release_id(ticker):
    """
    Overview: returns a list of dictionaries of the past financial releases for the speficied company (by ticker)
    Required params: None
    Optional params: None
    Response: 
	-fiscal_year: The fiscal year for the release
	-fiscal_quarter: The fiscal quarter for the release
	-release_date: The date of the release
	-eps: The earnings per share for the specified fiscal quarter
	-revenue: The revenue for the speified fiscal quarter
	-wallstreet_eps_estimate: The estimated EPS from Wall Street
	-wallstreet_revenue_estimate: The estimated revenue from Wall Street
	-consensus_eps_estimate: The average estimated EPS by the Estimize community 
	-consensus_revenue_estimate: The average estimated revenue by the Estimize community
    """
    function_url = url + "/companies/%s/releases" % ticker
    req = requests.get(function_url, headers=headers)
    if req.status_code != 200 : return None
    return pd.DataFrame(req.json())

# Save information (fiscal year-quarter, final estimates, id and release date)
list_release_ids = []

# Get release id's for unique tickers from above 
for i in range(len(unique_tickers)):
    temp2 = pd.DataFrame(get_release_id(unique_tickers.ticker[i]))
    temp2['ticker'] = unique_tickers.ticker[i]
    list_release_ids.append(temp2)
    
# Join together in data frame 
tics_event_ids = pd.concat(list_release_ids)
tics_event_ids

Unnamed: 0,fiscal_year,fiscal_quarter,eps,revenue,consensus_eps_estimate,consensus_revenue_estimate,wallstreet_revenue_estimate,wallstreet_eps_estimate,consensus_weighted_revenue_estimate,consensus_weighted_eps_estimate,release_date,id,ticker
0,2013.0,2.0,0.14,33.49,,,,-0.07,,,2013-07-24T12:00:00-04:00,125752,XOOM
1,2013.0,3.0,0.06,32.28,,,,-0.01,,,2013-10-22T12:00:00-04:00,125753,XOOM
2,2013.0,4.0,0.06,32.12,,,,-0.01,,,2014-02-04T11:00:00-05:00,125754,XOOM
3,2014.0,1.0,0.06,35.94,,,,-0.04,,,2014-04-22T12:00:00-04:00,125755,XOOM
4,2014.0,2.0,0.09,39.84,,,,0.05,,,2014-07-22T12:00:00-04:00,125756,XOOM
...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,2023.0,4.0,,,,,2045.83281,1.490096,,,2024-02-07T06:00:00-05:00,210118,YUM
98,2022.0,2.0,1.05,1636.00,1.097273,1637.8,1643.390199,1.092942,1638.284288,1.102351,2022-08-03T06:00:00-04:00,191944,YUM
99,2024.0,2.0,,,,,1868.714727,1.438375,,,2024-07-31T06:00:00-04:00,216586,YUM
100,2022.0,1.0,1.05,1547.00,1.15,1623.857143,1591.551156,1.069789,1629.390063,1.143953,2022-05-04T06:00:00-04:00,188841,YUM


In [None]:
# Translate the statement: curl -H "X-Estimize-Key: ca2bed82074413cd06d2711f"
url = "http://api.estimize.com"
headers = {"X-Estimize-Key" : "ca2bed82074413cd06d2711f", 
           "content-type" : "application/json", "accept-encoding": "gzip, deflate",}

df = pd.DataFrame()
reinvestigate_ws = pd.DataFrame()

# API Request for Wall Street Estimates 
def get_ws_est_eps_hist(ticker, release_id):

    trials = 0

    # Call API or return an empty df if 3 retries fail
    try:
        function_url = url + "/releases/%s/consensus?type=wallstreet" % release_id
        req = requests.get(function_url, headers=headers)
    
        req.encoding = 'utf-8'
        status_code = req.status_code
    except:
        print("Hit an issue.")
        req = None
        status_code = None
    while status_code != 200 or req is None:
        trials = trials + 1
        print("Rerun attempt ", trials, sep="")
        time.sleep(60) # sleeps for one minute and attempts another API call
        try:
            req = requests.get(function_url, headers=headers, timeout=10)
            req.encoding = 'utf-8'
        except:
            print("rerun attempt failed.")
        if trials == 3:
            print("The API request was attempted 3 times.")
            df = pd.DataFrame()
            return df
            break

    # create a dataframe from json object
    try:
        df = pd.DataFrame(req.json()['wallstreet']['eps']['revisions'])
    except:
        print("There was an issue with the output format.")
        try:
            response = req.text.strip()
            print(response[-1])
            response2 = response[:-1] + "]"
            print(response2[-1])
            response3 = json.loads(response2)
            df = pd.DataFrame(response3)
            print(df)      # these data frames need to be re-investigated 
            # Save these to be reinvestigated dataframe 
            row = pd.DataFrame()
            row['ticker'] = str(ticker)
            row['release_id'] = release_id
            reinvestigate_ws = pd.concat([reinvestigate_ws, row], axis=0)

        except:
            print("The output format issue was not resolved.")
            df = pd.DataFrame()
            fname = str(ticker) + "_and_" + str(release_id) + "_ws" + ".txt" 
            with open(fname, "w") as f:
                f.write(req.text)

    return df

# ouptut dir name -- CHANGE IF NEEDED 
#output_dir = "/kellogg/proj/mrl898/estimize/output"
output_dir = "C://Users/clj585/Downloads/estimizetemp"

# rerun filename
rerun_file = "rerun_ws.csv"

# empty dataframes for reruns
rerunList = []
initial_run = []

# initial count
count = 0

# Loop through these ticker-event ID pairings 
for index, row in tics_event_ids.iterrows():
    
    # print the request count
    count += 1
    print(count)
    
    # print row's ticker and event ID
    ticker_id_str = str(row['ticker']) + "_and_" + str(row['id'])
    print(ticker_id_str)
    
    # send the API request
    output = get_ws_est_eps_hist(row['ticker'], row['id'])

    # change directories to output
    os.chdir(output_dir)
    
    # if the output dataframe is empty save it to rerun the date range later
    if output.empty:

        print("No output file for the following: ", ticker_id_str, sep="")

        # save to rerun dataframe
        
        rerunList.append(ticker_id_str) # save the missing date range
        initial_run.append(datetime.now()) # save the current run time

        df_rerun = {'missing ticker-id pair':rerunList,
                    'initial run time':initial_run} # create a dict
        df_rerun = pd.DataFrame(df_rerun) # create a dataframe
        file_exists = os.path.isfile(rerun_file) # see if the rerun file exits

        if not file_exists:
            df_rerun.to_csv(rerun_file, index=False)
        else:
            df_rerun.to_csv(rerun_file, mode='a', header=False, index=False)

    else:
        # save output dataframe -- CAN CHANGE TO PARQUET IF LIKE 
        output_name = str(ticker_id_str) + "_ws" + ".csv"
        output['ticker'] = row['ticker']
        output['release_id'] = row['id']
        output.to_csv(output_name, index=False)

    # sleep time 5 seconds
    time.sleep(5)
    
# Reinvestigate these and deal with these separately... 
reinvestigate_ws.to_csv(output_dir + "/reinvestigate_ws.csv")

In [None]:
reinvestigate_cons = pd.DataFrame()

# API Request for consensus! 
def get_cons_est_eps_hist(ticker, release_id):

    trials = 0

    # Call API or return an empty df if 3 retries fail
    try:
        function_url = url + "/releases/%s/consensus?type=estimize" % release_id
        req = requests.get(function_url, headers=headers)
    
        req.encoding = 'utf-8'
        status_code = req.status_code
    except:
        print("Hit an issue.")
        req = None
        status_code = None
    while status_code != 200 or req is None:
        trials = trials + 1
        print("Rerun attempt ", trials, sep="")
        time.sleep(60) # sleeps for one minute and attempts another API call
        try:
            req = requests.get(function_url, headers=headers, timeout=10)
            req.encoding = 'utf-8'
        except:
            print("rerun attempt failed.")
        if trials == 3:
            print("The API request was attempted 3 times.")
            df = pd.DataFrame()
            return df
            break

    # create a dataframe from json object
    try:
        df = pd.DataFrame(req.json()['estimize_weighted']['eps']['revisions'])
    except:
        print("There was an issue with the output format.")
        try:
            response = req.text.strip()
            print(response[-1])
            response2 = response[:-1] + "]"
            print(response2[-1])
            response3 = json.loads(response2)
            df = pd.DataFrame(response3)
            print(df)   # these data frames need to be re-investigated 
            # Save these to be reinvestigated dataframe 
            row = pd.DataFrame()
            row['ticker'] = str(ticker)
            row['release_id'] = release_id
            reinvestigate_cons = pd.concat([reinvestigate_cons, row], axis=0)
        except:
            print("The output format issue was not resolved.")
            df = pd.DataFrame()
            fname = str(ticker) + "_and_" + str(release_id) + "_cons" + ".txt" 
            with open(fname, "w") as f:
                f.write(req.text)

    return df

# ouptut dir name
#output_dir = "/kellogg/proj/mrl898/estimize/output"
output_dir = "C://Users/clj585/Downloads/estimizetemp"

# rerun filename
rerun_file = "rerun_cons.csv"

# empty dataframes for reruns
rerunList = []
initial_run = []

# initial count
count = 0

# Loop through these ticker-event ID pairings 
for index, row in tics_event_ids.iterrows():
    
    # print the request count
    count += 1
    print(count)
    
    # print row's ticker and event ID
    ticker_id_str = str(row['ticker']) + "_and_" + str(row['id'])
    print(ticker_id_str)
    
    # send the API request
    output = get_cons_eps_hist(row['ticker'], row['id'])
    
    # change directories to output
    os.chdir(output_dir)
    
    # if the output dataframe is empty save it to rerun the date range later
    if output.empty:

        print("No output file for this date range: ", ticker_id_str, sep="")

        # save to rerun dataframe
        
        rerunList.append(ticker_id_str) # save the missing date range
        initial_run.append(datetime.now()) # save the current run time

        df_rerun = {'missing ticker-id pair':rerunList,
                    'initial run time':initial_run} # create a dict
        df_rerun = pd.DataFrame(df_rerun) # create a dataframe
        file_exists = os.path.isfile(rerun_file) # see if the rerun file exits

        if not file_exists:
            df_rerun.to_csv(rerun_file, index=False)
        else:
            df_rerun.to_csv(rerun_file, mode='a', header=False, index=False)

    else:
        # save output dataframe
        output_name = str(ticker_id_str) + "_cons" + ".csv"
        output['ticker'] = row['ticker']
        output['release_id'] = row['id']
        output.to_csv(output_name, index=False)

    # sleep time 5 seconds
    time.sleep(5)
    
# Save reinvestigate dataframe and examine those JSONS
reinvestigate_cons.to_csv(output_dir + "/reinvestigate_cons.csv")

In [None]:
reinvestigate_wgt = pd.DataFrame()

# API Request Method for weighted! 
def get_wgt_est_eps_hist(ticker, release_id):

    trials = 0
    # Call API or return an empty df if 3 retries fail
    try:
        function_url = url + "/releases/%s/consensus?type=estimize_weighted" % release_id
        req = requests.get(function_url, headers=headers)
    
        req.encoding = 'utf-8'
        status_code = req.status_code
    except:
        print("Hit an issue.")
        req = None
        status_code = None
    while status_code != 200 or req is None:
        trials = trials + 1
        print("Rerun attempt ", trials, sep="")
        time.sleep(60) # sleeps for one minute and attempts another API call
        try:
            req = requests.get(function_url, headers=headers, timeout=10)
            req.encoding = 'utf-8'
        except:
            print("rerun attempt failed.")
        if trials == 3:
            print("The API request was attempted 3 times.")
            df = pd.DataFrame()
            return df
            break

    # create a dataframe from json object
    try:
        df = pd.DataFrame(req.json()['estimize']['eps']['revisions'])
    except:
        print("There was an issue with the output format.")
        try:
            response = req.text.strip()
            print(response[-1])
            response2 = response[:-1] + "]"
            print(response2[-1])
            response3 = json.loads(response2)
            df = pd.DataFrame(response3)
            print(df)  # these data frames need to be re-investigated 
            # Save these to be reinvestigated dataframe 
            row = pd.DataFrame()
            row['ticker'] = str(ticker)
            row['release_id'] = release_id
            reinvestigate_wgt = pd.concat([reinvestigate_wgt, row], axis=0)
        except:
            print("The output format issue was not resolved.")
            df = pd.DataFrame()
            fname = str(ticker) + "_and_" + str(release_id) + "_wgt" + ".txt" 
            with open(fname, "w") as f:
                f.write(req.text)

    return df

# ouptut dir name
#output_dir = "/kellogg/proj/mrl898/estimize/output"
output_dir = "C://Users/clj585/Downloads/estimizetemp"

# rerun filename
rerun_file = "rerun_wgt.csv"

# empty dataframes for reruns
rerunList = []
initial_run = []

# initial count
count = 0

# Loop through these ticker-event ID pairings 
for index, row in tics_event_ids.iterrows():
    
    # print the request count
    count += 1
    print(count)
    
    # print row's ticker and event ID
    ticker_id_str = str(row['ticker']) + "_and_" + str(row['id'])
    print(ticker_id_str)
  
    # send the API request
    output = get_wgt_eps_hist(row['ticker'], row['id'])

    # change directories to output
    os.chdir(output_dir)
    
    # if the output dataframe is empty save it to rerun the date range later
    if output.empty:

        print("No output file for this date range: ", ticker_id_str, sep="")

        # save to rerun dataframe
        
        rerunList.append(ticker_id_str) # save the missing date range
        initial_run.append(datetime.now()) # save the current run time

        df_rerun = {'missing ticker-id pair':rerunList,
                    'initial run time':initial_run} # create a dict
        df_rerun = pd.DataFrame(df_rerun) # create a dataframe
        file_exists = os.path.isfile(rerun_file) # see if the rerun file exits

        if not file_exists:
            df_rerun.to_csv(rerun_file, index=False)
        else:
            df_rerun.to_csv(rerun_file, mode='a', header=False, index=False)

    else:
        # save output dataframe
        output_name = str(ticker_id_str) + "_wgt" + ".csv"
        output['ticker'] = row['ticker']
        output['release_id'] = row['id']
        output.to_csv(output_name, index=False)

    # sleep time 5 seconds
    time.sleep(5)
    
# Reinvestigate these JSONS 
reinvestigate_wgt.to_csv(output_dir + "/reinvestigate_wgt.csv")