In [20]:
# process ark-trades csvs

import concurrent.futures
import pandas as pd
from pathlib import Path

In [41]:
# read files parallel
files = list(Path("./trades/").glob("*.csv"))

def parallel_read(f):
    try:
        df = pd.read_csv(f, header=3,
                         usecols=["FUND", "Date", "Direction", "Ticker", "CUSIP", "Name", "Shares", "% of ETF"]
                        )
        df.columns = df.columns.map(lambda x: x.lower())
        return df
    except Exception as err:
        print(err)

df = [] # list of dfs
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    df.extend(executor.map(parallel_read, files))
df = pd.concat(df).reset_index(drop=True)
df

Unnamed: 0,fund,date,direction,ticker,cusip,name,shares,% of etf
0,ARKF,2021-02-09,Buy,PYPL,70450Y103,PAYPAL HOLDINGS INC,20600.0,0.1671
1,ARKF,2021-02-09,Sell,LSPD,53227R106,LIGHTSPEED POS INC,44100.0,0.0923
2,ARKG,2021-02-09,Buy,NVS,66987V109,NOVARTIS AG,426773.0,0.3095
3,ARKG,2021-02-09,Buy,RHHBY,771195104,ROCHE HOLDING AG,200400.0,0.0697
4,ARKG,2021-02-09,Buy,REGN,75886F107,REGENERON PHARMACEUTICALS INC,28723.0,0.1131
...,...,...,...,...,...,...,...,...
197,ARKW,2021-02-08,Buy,U,91332U101,UNITY SOFTWARE INC,183300.0,0.4878
198,ARKW,2021-02-08,Sell,PINS,72352L106,PINTEREST INC,329200.0,0.3256
199,ARKW,2021-02-08,Sell,TCEHY,88032Q109,TENCENT HOLDINGS LTD,196000.0,0.2318
200,ARKW,2021-02-08,Sell,API,00851L103,AGORA INC,157993.0,0.1952


In [42]:
df["date"] = pd.to_datetime(df["date"])
df.sort_values(["date", "ticker"], inplace=True)
df = df.dropna().reset_index(drop=True)
df

Unnamed: 0,fund,date,direction,ticker,cusip,name,shares,% of etf
0,ARKW,2021-02-01,Sell,API,00851L103,AGORA INC,54100.0,0.0577
1,ARKQ,2021-02-01,Sell,AVAV,8073108,AEROVIRONMENT INC,6132.0,0.0241
2,ARKK,2021-02-01,Buy,BEAM,07373V105,BEAM THERAPEUTICS INC,58218.0,0.0241
3,ARKW,2021-02-01,Buy,BEKE,482497104,KE HOLDINGS INC,110900.0,0.0986
4,ARKQ,2021-02-01,Sell,BYDDY,05606L100,BYD CO LTD,31503.0,0.0671
...,...,...,...,...,...,...,...,...
196,ARKG,2021-02-11,Sell,PACB,69404D108,PACIFIC BIOSCIENCES OF CALIFORNIA INC,667121.0,0.2699
197,ARKF,2021-02-11,Buy,PYPL,70450Y103,PAYPAL HOLDINGS INC,17056.0,0.1228
198,ARKG,2021-02-11,Buy,RHHBY,771195104,ROCHE HOLDING AG,244902.0,0.0844
199,ARKW,2021-02-11,Buy,SHOP,82509L107,SHOPIFY INC,9370.0,0.1549


In [43]:
df.to_csv("./local/ark_trades_feb.csv", index=False)