In [1]:
import requests
import json
import pandas as pd
import datetime

from utils import formatURL
from config import getConfig

In [2]:
# TODO
# Where is the data for DIISX?
# Why so little data for MSCI and SPX?

In [3]:
def getStartDt(maxDates, symbol):
    startDt = maxDates.get(symbol, None)
    if startDt:
        startDt =  startDt + datetime.timedelta(days=1)
    else:
        startDt = pd.to_datetime('2012-01-01').date()
    endDt = startDt + datetime.timedelta(days=365)
    return startDt, endDt

In [4]:
def createDF(resp):
    # Input is the "get" response for a Fund; return a formatted df with the dates and prices
    dtList = []
    valList = []
    divList = []
    for row in resp['data']:
        dtList.append(row['date'][:10])
        valList.append(row['close'])
        divList.append(row['dividend'])
    d = {}
    d['date'] = dtList
    d['price'] = valList
    d['dividend'] = divList
    return pd.DataFrame(d).sort_values('date')

In [5]:
def getData(symbol, fromDt, toDt):
    url = baseURL + '&symbols=' + symbol + '&date_from='+ str(fromDt) + '&date_to='+str(toDt)
    return requests.get(url).json()

In [6]:
cfg = getConfig()
baseURL = formatURL('eod')

In [13]:
df = pd.read_csv(cfg['ETrade']['dataLoc']+'fundData.csv')

df['date'] = df['date'].apply(lambda x: pd.to_datetime(x).date())

In [14]:
# The dictionary will hold the most recent date where we have data for each fund
currentSymbols = set(df.fund)
maxDates = {}
for s in currentSymbols:
    maxDates[s] = df[df['fund']==s]['date'].max()

In [16]:
#maxDates

In [17]:
set(df.fund)

{'ARKK', 'CASH', 'CCJ', 'COMB', 'DIISX', 'JEPI', 'NUSI', 'QYLD', 'RYLD', 'SPY'}

In [8]:
# Enter the symbol for which you want the historical prices
# Symbol may or may not be in the fundData file already
#symbol = 'MSCIWORLD.INDX'
symbol = 'ARKK'
#symbol = 'SP500.INDX'

In [9]:
apiResp = getData(symbol, '2023-01-01', '2023-12-31')

In [10]:
apiResp

{'pagination': {'limit': 1000, 'offset': 0, 'count': 250, 'total': 250},
 'data': [{'open': 54.0,
   'high': 54.21,
   'low': 52.24,
   'close': 52.37,
   'volume': 18769128.0,
   'adj_high': 54.21,
   'adj_low': 52.24,
   'adj_close': 52.37,
   'adj_open': 54.0,
   'adj_volume': 18769128.0,
   'split_factor': 1.0,
   'dividend': 0.0,
   'symbol': 'ARKK',
   'exchange': 'ARCX',
   'date': '2023-12-29T00:00:00+0000'},
  {'open': 54.0,
   'high': 54.39,
   'low': 53.79,
   'close': 54.14,
   'volume': 11816400.0,
   'adj_high': 54.3899,
   'adj_low': 53.79,
   'adj_close': 54.14,
   'adj_open': 54.0,
   'adj_volume': 11828164.0,
   'split_factor': 1.0,
   'dividend': 0.0,
   'symbol': 'ARKK',
   'exchange': 'ARCX',
   'date': '2023-12-28T00:00:00+0000'},
  {'open': 54.3,
   'high': 54.52,
   'low': 53.8,
   'close': 54.26,
   'volume': 13786300.0,
   'adj_high': 54.52,
   'adj_low': 53.8,
   'adj_close': 54.26,
   'adj_open': 54.3,
   'adj_volume': 13727519.0,
   'split_factor': 1.0,
   

In [19]:
fromDt, toDt = getStartDt(maxDates, symbol)
apiResp = getData(symbol, fromDt, toDt)

tmp = createDF(apiResp)
tmp['fund'] = symbol

dfList = []
dfList.append(tmp)
while toDt < datetime.date.today():
    fromDt = toDt + datetime.timedelta(days=1)
    toDt = toDt + datetime.timedelta(days=365)
    apiResp = getData(symbol, fromDt, toDt)
    tmp = createDF(apiResp)
    tmp['fund'] = symbol
    dfList.append(tmp)

upd = pd.concat(dfList).drop_duplicates().dropna()
upd = upd.sort_values(['fund', 'date']).reset_index(drop=True)
upd['date'] = upd['date'].apply(lambda x: pd.to_datetime(x).date())

df = pd.concat([df, upd])

In [21]:
apiResp

{'pagination': {'limit': 1000, 'offset': 0, 'count': 0, 'total': 0},
 'data': []}

In [23]:
toDt

datetime.date(2024, 2, 1)

In [71]:
# Check that all funds were updated
df.groupby('fund')['date'].max()

fund
990100.INDX       2022-07-15
ARKK              2022-08-11
CCJ               2022-08-11
COMB              2022-08-11
MSCIALL.INDX      2022-07-15
MSCIEAFE.INDX     2022-07-15
MSCIWORLD.INDX    2022-07-15
QYLD              2022-08-11
RYLD              2022-08-11
SP500TR.INDX      2022-08-12
Name: date, dtype: object

In [58]:
# Just in case I ran the same symbol twice
df = df.drop_duplicates()
# Fill NaN dividend with 0
df.fillna(0, inplace=True)

In [59]:
df.to_csv(cfg['dataLoc']+'fundData.csv', index=False)