In [None]:
import os
from sys import path
import pandas as pd
path.append('python-files')
from nasdaq_api_call import wiki_api_call, zacks_fc_api_call
pd.set_option('display.max_columns', None)


In [None]:
#Read source
source_data_path= os.path.join('data', 'post_Y2K_tickers_for_webscraping.csv')
source_data = pd.read_csv(source_data_path)

#format date for functions
def format_date_col_string(col):
    source_data[col] = [str(d)[:10] for d in source_data[col]]

format_date_col_string('start_date')
format_date_col_string('end_date')

print(source_data.dtypes)

In [None]:
zipper = list(zip(source_data['ticker'], source_data['start_date'], source_data['end_date']))

all_data = []

#Pull WIKI historical daily prices
for t, s, e in zipper:
    df = wiki_api_call(t, s, e)
    if len(df) > 0:
        all_data.append(df)

wiki_df = pd.concat(all_data, ignore_index = True)
print(wiki_df.head(2))

#Write parquet file
out_folder = 'data'
wiki_file_name = 'NASDAQ-WIKI-PRICES.parquet'
wiki_out = os.path.join(out_folder, wiki_file_name)
wiki_df.to_parquet(wiki_out, partition_cols=['ticker'], compression = 'brotli', engine = 'pyarrow')

In [None]:
#columns to include in response from ZACKS/FC api call
zacks_colums = [
    'm_ticker', #primary_key
    'ticker', 'comp_name', 'currency_code', 'per_end_date', 'per_fisc_year', 'per_cal_year', 'filing_date',
    'zacks_sector_code', 'zacks_x_ind_code', 
    'bus_city', 'bus_state_name', 'bus_post_code', 'country_name',
    'tot_revnu', #millions
    'gross_profit', #millions
    'tot_oper_exp', #millions
    'basic_net_eps', 
    'comm_shares_out', 'comm_stock_div_paid',
    'incr_decr_cash', 
]

print(zacks_colums)

In [None]:
zacks_data = []
for t, s, e in zipper:
    df = zacks_fc_api_call(t, s, e, columns = zacks_colums)
    if len(df) > 0:
        zacks_data.append(df)

zacks_df = pd.concat(zacks_data, ignore_index = True)
print(zacks_df.head(2))

zacks_file_name = 'NASDAQ-ZACKS-FC.parquet'
zacks_out = os.path.join(out_folder, zacks_file_name)

zacks_df.to_parquet(zacks_out, partition_cols = ['m_ticker', 'ticker'], compression='brotli', engine='pyarrow')