In [1]:
import pandas as pd
import requests
import json
import os
import multiprocessing as mp

In [2]:
def getKeyMetrics(stock):
    url = "https://financialmodelingprep.com/api/v3/company-key-metrics/" + stock + "?period=quarter"
    response = requests.get(url)
    try:
        data = json.loads(response.text)
        return data["metrics"]
    except:
        return None

In [3]:
def getFinancialGrowth(stock):
    url = "https://financialmodelingprep.com/api/v3/financial-statement-growth/" + stock + "?period=quarter"
    response = requests.get(url)
    try:
        data = json.loads(response.text)
        return data["growth"]
    except:
        return None

In [4]:
def getDiscountedCashFlow(stock):
    url = "https://financialmodelingprep.com/api/v3/company/historical-discounted-cash-flow/" + stock + "?period=quarter"
    response = requests.get(url)
    try:
        data = json.loads(response.text)
        return data["historicalDCF"]
    except:
        return None

In [5]:
def process(stock):
    df1 = pd.read_csv("api_data/income_statements/" + stock + ".csv")
    df2 = pd.read_csv("api_data/balance_sheets/" + stock + ".csv")
    df3 = pd.read_csv("api_data/cash_flow_statements/" + stock + ".csv")
    
    try:
        df = pd.concat([df1,df2,df3], axis=0, ignore_index=True, sort=False).T.reset_index()
    except:
        print("Error: "+ stock)
        return
    new_header = df.iloc[0] 
    df = df[1:] 
    df.columns = new_header

    cols = list(df.columns.values)
    df["Company"] = stock

    cols.insert(0, "Company")

    df = df[cols]
    
    keyMetrics = getKeyMetrics(stock)
    financialGrowth = getFinancialGrowth(stock)
    dcf = getDiscountedCashFlow(stock)
    
    if keyMetrics is not None:
        keyMetrics = pd.DataFrame(keyMetrics).rename(columns={'date': 'Date'})
        df = df.merge(keyMetrics, how="outer", left_on=["Date"], right_on=["Date"])

    if financialGrowth is not None:
        financialGrowth = pd.DataFrame(financialGrowth).rename(columns={'date': 'Date'})
        df = df.merge(financialGrowth, how="outer", left_on=["Date"], right_on=["Date"])

    if dcf is not None:
        dcf = pd.DataFrame(dcf).rename(columns={'date': 'Date'})
        df = df.merge(dcf, how="outer", left_on=["Date"], right_on=["Date"])

    output_file = "api_data/processed/" + stock + ".csv"
    df.to_csv(output_file)

In [6]:
def process_chunk(stock_chunks):
    for stock in stock_chunks:
        process(stock)
    print("Done chunk")

In [7]:
files1 = set(os.listdir("api_data/income_statements"))
files2 = set(os.listdir("api_data/balance_sheets"))
files3 = set(os.listdir("api_data/cash_flow_statements"))

files = list(files1.intersection(files2).intersection(files3))

stocks = list(map(lambda x: x[:-4], files))

In [8]:
def split_list(alist, wanted_parts=1):
    length = len(alist)
    return [ alist[i*length // wanted_parts: (i+1)*length // wanted_parts] 
             for i in range(wanted_parts) ]

In [9]:
stocks_chunked = split_list(stocks, 100)

In [10]:
pool = mp.Pool(processes=32)
results = [pool.apply_async(process_chunk, args=(stock_chunk,)) for stock_chunk in stocks_chunked]
output = [p.get() for p in results]

Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk
Done chunk