# Analyze Fundamental Stock Data

In [1]:
# Libraries
import pandas as pd
from eod import EodHistoricalData
from functools import reduce
from datetime import datetime, timedelta

# Importing and assigning the api key
with open("../eodHistoricalData-API.txt", "r") as f:
    api_key = f.read()
    
# EOD Historical Data client
client = EodHistoricalData(api_key)

# Formatting Fundamental Data

In [2]:
def getFundamentals(ticker):
    """
    Returns the fundamental data from the financial data API.  Combines the quarterly balance 
    sheet, cash flow, income statement, and earnings for a specific stock ticker.
    """
    
    # Getting data
    fund_data = client.get_fundamental_equity(ticker)
    
    # Financials
    bal = pd.DataFrame(fund_data['Financials']['Balance_Sheet']['quarterly']).T
    
    cf = pd.DataFrame(fund_data['Financials']['Cash_Flow']['quarterly']).T
    
    inc = pd.DataFrame(fund_data['Financials']['Income_Statement']['quarterly']).T
    
    # Earnings
    earn = pd.DataFrame(fund_data['Earnings']['History']).T
    
    # Merging them together
    df = reduce(
        lambda left,right: pd.merge(
            left,
            right,
            left_index=True, 
            right_index=True, 
            how='outer',
            suffixes=('', '_drop')
        ), 
        [bal, cf, inc, earn]
    )
    
    # Dropping redundant date and duplicate columns
    dup_cols = [i for i in df.columns if "date" in i or "Date" in i or "_drop" in i]
    
    df = df.drop(dup_cols, axis=1)
    
    return df

In [9]:
def getPrices(df, ticker):
    """
    Gets the stock price at the time for each date in the financial statements for
    the given ticker and dataframe of financial information.
    """
    # Getting stock price at the time
    prices = client.get_prices_eod(ticker, period='d')
    
    prices = pd.DataFrame(prices).set_index('date')[['adjusted_close', 'close', 'volume']]

    # Converting to date time
    prices.index = pd.to_datetime(prices.index)

    # Filling in missing price data
    prices = prices.reindex(
        pd.date_range(prices.index[0], prices.index[-1]),
        method='ffill'
    )
    
    # Converting back to string for merging later
    prices.index = prices.index.strftime("%Y-%m-%d")
        
    price_dates = [i for i in prices.index if i in df.index]
    
    prices = prices.loc[price_dates]

    # Joining together
    df = df.join(prices, how='outer')
    
    return df

In [10]:
def formatFundamentals(ticker, dropna=False):
    """
    Formats the given ticker's fundamental and price data. Cleans up the data by dropping
    any empty/nan values if requested.
    """
    
    # Getting fundamental data
    fund_data = getFundamentals(ticker)
    
    # Getting accompanying price data
    df = getPrices(fund_data, ticker)
    
    # Dropping if all items are na in respective row
    df = df.dropna(how='all')
    
    if dropna:
        # Dropping mostly nan columns and rows if requested
        df = df.dropna(
            axis=0,
            thresh=round(df.shape[0]*.3) # If 50% of the values in the row are Nans, drop the whole row
        ).dropna(
            axis=1,
            thresh=round(df.shape[1]*.3) # If 50% of the values in the columns are Nans, drop the whole column
        )
    
    return df

In [19]:
df = formatFundamentals("TSLA", dropna=True)

In [20]:
df

Unnamed: 0,currency_symbol,totalAssets,intangibleAssets,otherCurrentAssets,totalLiab,totalStockholderEquity,otherCurrentLiab,commonStock,retainedEarnings,otherLiab,...,netIncomeFromContinuingOps,netIncomeApplicableToCommonShares,currency,epsActual,epsEstimate,epsDifference,surprisePercent,adjusted_close,close,volume
2008-09-30,,51699000.0,,,251413000.0,,,,-204914000.0,,...,,,,,,,,,,
2008-12-31,USD,51699000.0,,2180000.0,251413000.0,,71607000.0,7000.0,-204914000.0,6884000.0,...,0.0,0.0,,,,,,,,
2009-03-31,,,,,,,,,,,...,0.0,0.0,,,,,,,,
2009-06-30,USD,155916000.0,,,386561000.0,,,,-236412000.0,,...,0.0,0.0,,,,,,,,
2009-09-30,USD,155916000.0,,,386561000.0,,,,-236412000.0,,...,0.0,0.0,,,,,,,,
2009-12-31,USD,130424000.0,,4222000.0,383947000.0,65702000.0,41661000.0,7000.0,-260654000.0,6433000.0,...,0.0,0.0,,,,,,,,
2010-03-31,USD,145320000.0,,4537000.0,101141000.0,39928000.0,40143000.0,8000.0,-290173000.0,15648000.0,...,0.0,0.0,,,,,,,,
2010-06-30,USD,147974000.0,,6745000.0,457749000.0,9450000.0,41123000.0,8000.0,-328689000.0,23832000.0,...,0.0,0.0,USD,-0.0812,,,,4.766,23.83,85971954.0
2010-09-30,USD,361621000.0,,8870000.0,141698000.0,219923000.0,39727000.0,93000.0,-363624000.0,15247000.0,...,0.0,0.0,USD,-0.37,-0.43,0.06,13.9535,4.081,20.405,10983125.0
2010-12-31,USD,386082000.0,14508000.0,10839000.0,179034000.0,207048000.0,53661000.0,95000.0,-414982000.0,21145000.0,...,0.0,0.0,USD,-0.47,-0.5,0.03,6.0,5.326,26.63,7089235.0


# Getting Fundamentals from Multiple Companies

In [23]:
def getMultipleFunds(tickers, api_token):
    """
    Gets fundamental data from multiple stock tickers given as a list. Returns
    a large dataframe containing the concatenated information for all the given
    tickers.
    """
    
    # Verifying if the list of tickers is compatible
    available = client.get_exchange_symbols("US")

    available = set(i['Code'] for i in available)
    
    tickers = [i for i in tickers if i in available]
    
    if len(tickers)==0:
        return "No valid tickers found."
    
    # Iterating through the tickers
    dfs = {}
    
    for ticker in tickers:
        
        dfs[ticker] = formatFundamentals(ticker)
        
    
    return pd.concat(dfs, axis=0)
    

In [24]:
df = getMultipleFunds(["asdfase"], api_key)

In [25]:
df#.loc['TSLA']

'No valid tickers found.'