## Import packages

In [16]:
import requests
import pandas as pd
import numpy as np

# python mongo libraries
import pymongo

# Reading properties
from jproperties import Properties

from datetime import datetime

## Constants

In [17]:
FUNCTION = 'EARNINGS'
BASE_URL = 'https://www.alphavantage.co/query?'

# List of fields we need to convert from string to float
FIELDS_TO_FLOAT = [
     'reportedEPS','estimatedEPS','surprise','surprisePercentage'
]

## Load properties

In [18]:
# Initialize from property file
configs = Properties()

with open('config/insert_earnings.properties', 'rb') as config_file:
     configs.load(config_file)

TICKERS = configs.get('TICKERS').data.split(',')
API_KEY = configs.get('ALPHAV_API_KEY').data
MONGO_URI = configs.get('MONGO_URI').data
DB = configs.get('DB').data
ANNUAL_COLLECTION = configs.get('ANNUAL_COLLECTION').data
QUARTERLY_COLLECTION = configs.get('QUARTERLY_COLLECTION').data

## Get income statements from Alpha Vantage

In [19]:
# Holds earnings data for each symbol
earnings = {}

# Get earnings for each stock ticker
for ticker in TICKERS:
    response = requests.get(f'{BASE_URL}function={FUNCTION}&symbol={ticker}&apikey={API_KEY}')
    earnings[ticker] = response.json()

## Utility method to create a DF
#### __Note:__ it uses the global variable earnings_

In [20]:
def create_ticker_df(ticker, report_type):
    if report_type.upper() == 'Q':
        df_ticker = pd.DataFrame(earnings[ticker]['quarterlyEarnings'])
        # Convert reported date which is only applicable to quarterly earnings from string
        df_ticker['reportedDate'] = pd.to_datetime(df_ticker['reportedDate'])
    
        for field in FIELDS_TO_FLOAT:
            # non numeric are converted to NaN
            df_ticker[field] = pd.to_numeric(df_ticker[field], errors='coerce')
    elif report_type.upper() == 'A':
        df_ticker = pd.DataFrame(earnings[ticker]['annualEarnings'])
        # Only reportedEPS is present for Annual
        df_ticker['reportedEPS'] = pd.to_numeric(df_ticker['reportedEPS'], errors='coerce')
    else:
        raise Exception('Unknown report type, valid types are Q or A')
        
    # Add a column for the ticker
    df_ticker['ticker'] = ticker

    # Convert to dates which are in strings in raw format
    df_ticker['fiscalDateEnding'] = pd.to_datetime(df_ticker['fiscalDateEnding'])

    # # Sort by dates - we want the oldest date first
    df_ticker = df_ticker.sort_values('fiscalDateEnding')
    
    return df_ticker

## Create DataFrame containing Annual and Quarterly earnings

In [21]:
# DFs to collect all the annual and quarterly earnings for tickers
df_annual = pd.DataFrame()
df_quarterly = pd.DataFrame()

# Loop through earnings dictionary which holds earnings for each ticker symbol        
for ticker in earnings:
    df_annual = pd.concat([df_annual, create_ticker_df(ticker, 'A')])
    df_quarterly = pd.concat([df_quarterly, create_ticker_df(ticker, 'Q')])

## Reset the index to ticker + fiscal date

In [22]:
# When setting the index, fiscal date ending and ticker are retained as columns; we need them in the collection
df_annual.set_index(['fiscalDateEnding', 'ticker'], drop=False, inplace=True)
df_quarterly.set_index(['fiscalDateEnding', 'ticker'], drop=False, inplace=True)
df_quarterly.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,fiscalDateEnding,reportedDate,reportedEPS,estimatedEPS,surprise,surprisePercentage,ticker
fiscalDateEnding,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1996-03-31,INTC,1996-03-31,1996-04-15,0.13,0.12,0.01,8.3333,INTC
1996-06-30,INTC,1996-06-30,1996-07-16,0.15,0.14,0.01,7.1429,INTC
1996-09-30,INTC,1996-09-30,1996-10-14,0.19,0.16,0.03,18.75,INTC
1996-12-31,INTC,1996-12-31,1997-01-14,0.27,0.23,0.04,17.3913,INTC
1997-03-31,INTC,1997-03-31,1997-04-14,0.28,0.26,0.02,7.6923,INTC


## Check Annual earnings

In [23]:
df_annual.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,fiscalDateEnding,reportedEPS,ticker
fiscalDateEnding,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1996-12-31,INTC,1996-12-31,0.74,INTC
1997-12-31,INTC,1997-12-31,0.98,INTC
1998-12-31,INTC,1998-12-31,0.89,INTC
1999-12-31,INTC,1999-12-31,1.19,INTC
2000-12-31,INTC,2000-12-31,1.65,INTC


# Sanity check #1
### with other another Ticker

In [24]:
df_quarterly[df_quarterly['ticker'] == 'AMD'].head()

Unnamed: 0_level_0,Unnamed: 1_level_0,fiscalDateEnding,reportedDate,reportedEPS,estimatedEPS,surprise,surprisePercentage,ticker
fiscalDateEnding,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1996-03-31,AMD,1996-03-31,1996-04-09,0.09,0.15,-0.06,-40.0,AMD
1996-06-30,AMD,1996-06-30,1996-07-10,-0.16,-0.11,-0.05,-45.4545,AMD
1996-09-30,AMD,1996-09-30,1996-10-07,-0.12,-0.18,0.06,33.3333,AMD
1996-12-31,AMD,1996-12-31,1997-01-13,-0.07,-0.1,0.03,30.0,AMD
1997-03-31,AMD,1997-03-31,1997-04-07,0.05,-0.01,0.06,600.0,AMD


## Insert Annual and Quarterly Earnings to MongoDB

In [25]:
# Initialize mongo client
client = pymongo.MongoClient(MONGO_URI)

with client:
    # The database
    db = client[DB]
    db[ANNUAL_COLLECTION].insert_many(df_annual.to_dict('records'))
    db[QUARTERLY_COLLECTION].insert_many(df_quarterly.to_dict('records'))