In [3]:
from google.colab import drive
drive.mount('/content/drive')

import json
import datetime as dt
from pathlib import Path
import requests
import sys
sys.path.append('/content/drive/MyDrive/Colab Notebooks/April')

from pipelines import loaders, downloaders, url_generator
print("Imported Libraries") 

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Imported Libraries


In [None]:
#Currently using updater module, perhaps should add seperate module for these type of modules and specify filepath
#Or could set it your missing historicals

iex_sp500_constituents_filepath = '/content/drive/MyDrive/Colab Notebooks/April/data/iexS&P500constituents.json'
with open(iex_sp500_constituents_filepath, 'r') as f:
  iex_sp500_constituents = json.load(f)
date_length = 'max'

In [None]:
#Generate Batch Urls#

###Partition SP500 List to Prepare for Batch Calling and Multi Threading###
def get_historical_batch_urls(tickers, date_length, IEX_TOKEN = IEX_TOKEN):
  '''Returns list of partitioned batch urls for aysnc api requests'''
  historical_batch_urls = []
  for ticker_partition in partition(tickers):
    ticker_partition = ",".join(ticker_partition)
    batch_url = f"https://cloud.iexapis.com/stable/stock/market/batch?symbols={ticker_partition}&types=chart&range={date_length}&token={IEX_TOKEN}"
    historical_batch_urls.append(batch_url)
  return historical_batch_urls
  
def partition(tickers, partition_size = 50):
  partitioned_tickers = []
  for i in range(0, len(tickers), partition_size):
    partitioned_tickers.append(tickers[i:i+partition_size])
  return partitioned_tickers

historical_batch_urls = url_generator.get_historical_batch_urls(iex_sp500_constituents, date_length)
print(historical_batch_urls) #Check if a reduced batch is giving you what you want
print('We have {} batch urls that need to be downloaded'.format(len(historical_batch_urls)))

In [None]:
#Segregate Batch URLS for easier consumption
#Finished [:2], [2:5], [5:8], [8:11]
#Current [11:]
sliced_historical_batch_urls = historical_batch_urls[11:] 
print(sliced_historical_batch_urls)

In [None]:
#For IEX Historicals to HDF5#
def get_iex_historicals(hist_batch_urls):
  historicals = dict()
  for hist_batch_url in hist_batch_urls:
    try:
      hist_response = requests.get(hist_batch_url)
      hist_response.raise_for_status()
      hist_response = hist_response.json()
    except requests.exceptions.RequestException as e:
      print('Stopped at batch url: {}'.format(hist_batch_url))
      print('Status Code: {}'.format(hist_response.status_code))
      raise SystemExit(e)
    for ticker in hist_response:
      ticker_hist = list()
      total_amount_of_days = len(hist_response[ticker]['chart'])
      for day in range(0, total_amount_of_days):
        current_date = hist_response[ticker]['chart'][day]['date']
        current_timestamp = dt.datetime.strptime(current_date,"%Y-%m-%d").timestamp()
        ticker_hist.append([current_timestamp,
                     hist_response[ticker]['chart'][day]['fopen'], #The 'f' in front of the OHLC names hash for the adjusted prices
                     hist_response[ticker]['chart'][day]['fhigh'],
                     hist_response[ticker]['chart'][day]['flow'],
                     hist_response[ticker]['chart'][day]['fclose'],
                     hist_response[ticker]['chart'][day]['fvolume']])
      historicals[ticker] = ticker_hist
      print('Finished downloading {}'.format(ticker))
  return historicals

#We have to test the try except request issue
historicals = get_iex_historicals(sliced_historical_batch_urls)

In [None]:
#Save historicals to HDF5
def save_historicals_to_hdf5(historicals, filepath):
  for ticker in historicals:
    hdf5_filepath = f'{filepath}/{ticker}.hdf5'
    with h5py.File(hdf5_filepath, 'w') as f:
      history = f.create_group('historicals')
      history.create_dataset(name='15Y', data=historicals[ticker], compression='gzip')
    print('Saved {} as HDF5'.format(ticker))

ml_filepath = '/content/drive/MyDrive/Colab Notebooks/April/data/ml'
downloaders.save_historicals_to_hdf5(historicals, filepath)

In [None]:
#Double Check That All Has Been Downloaded
ml_filepath = '/content/drive/MyDrive/Colab Notebooks/April/data/ml'

for ticker in iex_sp500_constituents:
  hdf5_ticker_filepath = f'{ml_filepath}/{ticker}.hdf5'
  hdf5_file = Path(hdf5_ticker_filepath)
  if hdf5_file.is_file():
    pass
  else:
    print("{} is missing".format(ticker))