<a href="https://colab.research.google.com/github/mojojojoe/my-code-examples/blob/main/EDA%20DCF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# EDA of FMP data used for DCF

In [1]:
# building a functino to extract data from financialmodelingprep.com
import requests
import pandas as pd
import numpy as np
import json
import datetime

def get_statement(company_ticker, statement_name, api_key, frequency='annual', df = False):
    """
    Get a financial statement to use for fundamental calculations

    inputs:
    company_ticker (str) -- e.g. 'AAPL' for Apple inc.
    statement_name (str) -- one of: 'income-statement','balance-sheet-statement','cash-flow-statement','enterprise-value'
    period (str) -- 'annual' or 'quarter'
    forecast_period (int) -- Number of years you wish to forecast
    api_key (str) -- api key to access financialmodelingprep account

    returns:
    Pandas DataFrame object
    """
    if statement_name in ['income-statement','balance-sheet-statement','cash-flow-statement', 'enterprise-values', 'ratios']:

        statement = requests.get(f'https://financialmodelingprep.com/api/v3/{statement_name}/{company_ticker}?period={frequency}&apikey={api_key}').json()

        if df:
            statement = pd.DataFrame.from_dict(statement)
            statement = statement.iloc[:, 5:-2]  #these columns are not useful or repetitive


        else:
            pass

    return statement

In [2]:
# print statement eda to have a quick look at the data
def df_summary(df, name="DataFrame"):
    print(f"--- {name}.info() ---")
    print(df.info())
    print()

    print(f"--- {name}.describe() ---")
    print(df.describe(include='all'))
    print()

    print(f"--- {name}.head() ---")
    print(df.head())
    print()

In [3]:
#some columns only have zero as a value so we will drop them:
def drop_zero_columns(df):
    numeric_columns = df.select_dtypes(include=['number']).columns

    for column in numeric_columns:
        if df[column].sum() == 0:
            print(f'{column} only has zero values')
            df.drop(column, axis=1, inplace=True)


In [4]:
#data extraction from financial modelling prep

api_key= 'your_api_key'
ticker= 'AAPL'
company_ticker = ticker
base_url='https://financialmodelingprep.com/api/v3/'

In [5]:
# income statement
# some of the columns have only 0 values so they are dropped
income_statement = pd.DataFrame(get_statement(company_ticker, 'income-statement', api_key, frequency='annual', df=False))
if not income_statement.empty:
    income_statement = income_statement.iloc[:, 5:-2]

drop_zero_columns(income_statement)

df_summary(income_statement, name="Income Statement")

ValueError: If using all scalar values, you must pass an index

In [None]:
# balance sheet yearly
balance_sheet = get_statement(company_ticker, 'balance-sheet-statement', api_key, frequency='annual', df=True)
drop_zero_columns(balance_sheet)
df_summary(balance_sheet, name="Balance Sheet")

In [None]:
#cashflow statement
cashflow_statement= get_statement(company_ticker, 'cash-flow-statement', api_key, frequency = 'annual', df=True)
drop_zero_columns(cashflow_statement)
df_summary(cashflow_statement, name= 'cash flow statement')


In [None]:
# financial ratios
financial_ratios= get_statement(company_ticker, 'ratios', api_key, frequency= 'annual', df= True)
drop_zero_columns(financial_ratios)
df_summary(financial_ratios, name= 'financial ratios')

In [None]:
financial_ratios.columns