In [1]:
%run ./utils.py

import concurrent.futures
from pathlib import Path

import numpy as np
import pandas as pd
import pandas_datareader.data as web
from tqdm import tqdm
max_workers = 3


def main():
    # read unprocessed dfs
    processed_trades = set([path.name for path in Path("./trades/processed/").glob("*.csv")])
    raw_trades = set([path.name for path in Path("./trades/raw/").glob("*.csv")])
    unprocessed_trades = raw_trades.difference(processed_trades)
    if len(unprocessed_trades)==0:
        return "Nothing to Process"
    dfs = UtilsIO.parallel_read_df(files=[f"./trades/raw/{name}" for name in unprocessed_trades], returns="list")
    print(dfs[0].head(3))
    
    
    # open prices of traded stocks
    def get_trade_openings(df_):
        df = df_.copy()
        if df["date"].nunique()!=1:
            raise ValueError("number of unique dates should be 1")
        else:
            # set date
            trade_date = df.iloc[0]["date"]

        # date's open price
        open_configs = [{"ticker":ticker, "date":trade_date, "on":"Open"} for ticker in df["ticker"].unique()] # on ark's trade date
        open_prices = UtilsFinancial.parallel_fetch_yahoo_daily(open_configs)

        # join key: date + ticker
        open_prices["key1"] = open_prices["ticker"].astype(str) + "_" + open_prices["Date"].astype(str)
        df["key1"] = df["ticker"].astype(str) + "_" + df["date"].astype(str)
        df = df.set_index("key1").join(open_prices.set_index("key1")[["Open"]], how="inner")
        df.drop_duplicates(inplace=True)

        return df
    priced_trades = []
    for df_ in tqdm(dfs):
        priced_trades.append(get_trade_openings(df_))
    print(priced_trades[0].head(3))
    
    
    # net & volume of trades (assume on open price)
    def estimate_volume(priced_df):
        net = priced_df.copy()[["fund", "date", "direction", "ticker", "company", "shares", "Open"]]
        net["shares"] = net.apply(lambda row:row["shares"] if row["direction"]=="Buy" else -row["shares"], axis=1)
        net = net.groupby(["ticker", "date", "company", "fund"], as_index=False).agg({'shares':'sum', 'Open':'mean'})
        # new cols
        net["signal"] = net["shares"].apply(lambda x: "Buy" if x>0 else "Sell")
        net["volume"] = net["shares"] * net["Open"]
        net["abs_volume"] = abs(net["volume"])
        # reorder
        net = net[["date", "fund", "ticker", "company", "signal", "shares", "Open", "volume", "abs_volume"]]
        return net
    nets = [] # list of dfs
    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
        nets.extend(tqdm(executor.map(estimate_volume, priced_trades), total=len(priced_trades)))
    print(nets[-1].head(3))
    
    
    # save processed locally
    def save_processed_dfs(processed_df):
        if processed_df["date"].nunique()!=1:
            raise ValueError("more than 1 value for date")
        trade_date = processed_df["date"].iloc[0]
        processed_df.sort_values("abs_volume", ascending=False).\
        to_csv(f"./trades/processed/{trade_date}_ARK_TRADES.csv", index=False)
    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        list(tqdm(executor.map(save_processed_dfs, nets), total=len(nets)))
    
    
    return "OK"
main()

100%|██████████| 2/2 [00:00<00:00, 7876.63it/s]
  0%|          | 0/2 [00:00<?, ?it/s]

   fund        date direction ticker      cusip         company  shares  \
0  ARKQ  2021-03-05      Sell   AAPL   37833100       APPLE INC   27787   
1  ARKG  2021-03-05       Buy   ACCD  4.37E+104    ACCOLADE INC  206960   
2  ARKQ  2021-03-05      Sell   AMZN   23135106  AMAZON.COM INC    1303   

   % of etf  
0    0.1004  
1    0.0824  
2    0.1169  
No data fetched for symbol KSPILI using YahooDailyReader
No data fetched for symbol LSPDCN using YahooDailyReader


 50%|█████     | 1/2 [00:12<00:12, 12.15s/it]

No data fetched for symbol KSPILI using YahooDailyReader
No data fetched for symbol LSPDCN using YahooDailyReader


100%|██████████| 2/2 [00:21<00:00, 10.87s/it]
100%|██████████| 2/2 [00:00<00:00, 57.46it/s]
100%|██████████| 2/2 [00:00<00:00, 135.20it/s]

                 fund        date direction ticker      cusip         company  \
key1                                                                            
AAPL_2021-03-05  ARKQ  2021-03-05      Sell   AAPL   37833100       APPLE INC   
ACCD_2021-03-05  ARKG  2021-03-05       Buy   ACCD  4.37E+104    ACCOLADE INC   
AMZN_2021-03-05  ARKQ  2021-03-05      Sell   AMZN   23135106  AMAZON.COM INC   

                 shares  % of etf         Open  
key1                                            
AAPL_2021-03-05   27787    0.1004   120.980003  
ACCD_2021-03-05  206960    0.0824    39.310001  
AMZN_2021-03-05    1303    0.1169  3005.000000  
         date  fund ticker         company signal  shares         Open  \
0  2021-03-08  ARKG   ACCD    ACCOLADE INC    Buy   75140    40.110001   
1  2021-03-08  ARKQ   AMZN  AMAZON.COM INC   Sell    -297  3015.000000   
2  2021-03-08  ARKQ   AONE             ONE    Buy   13118    11.430000   

         volume    abs_volume  
0  3.013865e+06  3.0




'OK'