In [1]:
import os
import shutil
import datetime
import pandas as pd

In [2]:
urls = { 
    "ARKK": 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_INNOVATION_ETF_ARKK_HOLDINGS.csv',
    "ARKQ": 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_AUTONOMOUS_TECHNOLOGY_&_ROBOTICS_ETF_ARKQ_HOLDINGS.csv',
    "ARKW": 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_NEXT_GENERATION_INTERNET_ETF_ARKW_HOLDINGS.csv',
    "ARKG": 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_FINTECH_INNOVATION_ETF_ARKF_HOLDINGS.csv',
    "ARKF": 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_FINTECH_INNOVATION_ETF_ARKF_HOLDINGS.csv',
    "ARKP": 'https://ark-funds.com/wp-content/fundsiteliterature/csv/THE_3D_PRINTING_ETF_PRNT_HOLDINGS.csv',
    "ARKI": 'https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_ISRAEL_INNOVATIVE_TECHNOLOGY_ETF_IZRL_HOLDINGS.csv'
}



In [3]:
def download_today_files(urls):

    for fund,url in urls.items():

        print("PROCESSING {}".format(fund))

        # get today YMD 
        today_str = datetime.datetime.now().strftime("%Y_%m_%d")

        # setup Fund dir
        funddir = os.path.join(os.getcwd(),fund)
        if not os.path.exists(funddir):
            os.mkdir(funddir)

        # setup archive dir
        archive_dir = os.path.join(funddir,"archive")
        if not os.path.exists(archive_dir):
            os.mkdir(archive_dir)

        # Setup outfn
        fund_date_str = fund+"_"+today_str+".csv"
        otufn = os.path.join(funddir,fund_date_str)

        dl_cmd = 'curl -o {} {}'.format(otufn,url)
        os.system(dl_cmd)
        print(dl_cmd)
        
    return "Fetched today's ARK data"

def diff_yday_today(fund_name):
    
    '''
    Joe's function
    '''
    
    # Setup dirs, get files
    fund_dir = os.path.join(os.getcwd(), fund_name)
    today_file = [os.path.join(fund_dir,x) for x in os.listdir(fund_dir) if fund_name in x]    
    
    archive_dir = os.path.join(fund_dir, "archive")
    archive_dir_list = [os.path.join(archive_dir,x) for x in os.listdir(archive_dir) if x.endswith(".csv")]
    archive_dir_list_sorted = sorted(archive_dir_list, reverse = True)

    # read files as dfs        
    ydf = pd.read_csv(archive_dir_list_sorted[0])
    tdf = pd.read_csv(today_file[0])
    
    # Drop footers 
    ydf.drop(ydf.tail(3).index,inplace=True)
    tdf.drop(tdf.tail(3).index,inplace=True)
    
    # Difference each ticker yesterday to today 
    outdict = {}
    
    for ticker in ydf['ticker'].unique()[:]:
        today_nshares = tdf[tdf['ticker'] == ticker]['shares'].values
        yday_nshares = ydf[ydf['ticker'] == ticker]['shares'].values
        sharediff = today_nshares - yday_nshares
        outdict[ticker] = sharediff

    outdf = pd.DataFrame.from_dict(outdict, orient='index')
    outdf.columns = ['share_diff']
    
    return outdf

def clean_up(fund_name = "ARKK"):
    fund_dir = os.path.join(os.getcwd(), fund_name)
    today_file = [os.path.join(fund_dir,x) for x in os.listdir(fund_dir) if fund_name in x if x.endswith(".csv")][0]
    archive_dir = os.path.join(fund_dir, "archive")
    archived_fn = os.path.join(archive_dir,os.path.split(today_file)[1])
    
    if not os.path.exists(archived_fn):
        print("asdf")
        shutil.move(today_file, archived_fn)
    
    print("PROCESSED {}".format(archived_fn))
    return archived_fn

In [4]:
def main():
    download_today_files(urls)
    funds = list(urls.keys())
    
    t = diff_yday_today("ARKK")
    clean_up("ARKK")
    
    print(t)

main()

PROCESSING ARKK
curl -o /Users/aakash/Desktop/junkyard/ARKK/ARKK_2021_02_02.csv https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_INNOVATION_ETF_ARKK_HOLDINGS.csv
PROCESSING ARKQ
curl -o /Users/aakash/Desktop/junkyard/ARKQ/ARKQ_2021_02_02.csv https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_AUTONOMOUS_TECHNOLOGY_&_ROBOTICS_ETF_ARKQ_HOLDINGS.csv
PROCESSING ARKW
curl -o /Users/aakash/Desktop/junkyard/ARKW/ARKW_2021_02_02.csv https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_NEXT_GENERATION_INTERNET_ETF_ARKW_HOLDINGS.csv
PROCESSING ARKG
curl -o /Users/aakash/Desktop/junkyard/ARKG/ARKG_2021_02_02.csv https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_FINTECH_INNOVATION_ETF_ARKF_HOLDINGS.csv
PROCESSING ARKF
curl -o /Users/aakash/Desktop/junkyard/ARKF/ARKF_2021_02_02.csv https://ark-funds.com/wp-content/fundsiteliterature/csv/ARK_FINTECH_INNOVATION_ETF_ARKF_HOLDINGS.csv
PROCESSING ARKP
curl -o /Users/aakash/Desktop/junkyard/ARKP/ARKP_2021_02_02.csv htt