In [1]:
%run ./utils.py

import concurrent.futures
from pathlib import Path

import numpy as np
import pandas as pd
import pandas_datareader.data as web
from tqdm import tqdm
max_workers = 3


def main():
    # read unprocessed dfs
    processed_trades = set([path.name for path in Path("./trades/processed/").glob("*.csv")])
    raw_trades = set([path.name for path in Path("./trades/raw/").glob("*.csv")])
    unprocessed_trades = raw_trades.difference(processed_trades)
    if len(unprocessed_trades)==0:
        return "Nothing to Process"
    dfs = UtilsIO.parallel_read_df(files=[f"./trades/raw/{name}" for name in unprocessed_trades], returns="list")
    print(dfs[0].head(3))
    
    
    # open prices of traded stocks
    def get_trade_openings(df_):
        df = df_.copy()
        if df["date"].nunique()!=1:
            raise ValueError("number of unique dates should be 1")
        else:
            # set date
            trade_date = df.iloc[0]["date"]

        # date's open price
        open_configs = [{"ticker":ticker, "date":trade_date, "on":"Open"} for ticker in df["ticker"].unique()] # on ark's trade date
        open_prices = UtilsFinancial.parallel_fetch_yahoo_daily(open_configs)

        # join key: date + ticker
        open_prices["key1"] = open_prices["ticker"].astype(str) + "_" + open_prices["Date"].astype(str)
        df["key1"] = df["ticker"].astype(str) + "_" + df["date"].astype(str)
        df = df.set_index("key1").join(open_prices.set_index("key1")[["Open"]], how="inner")
        df.drop_duplicates(inplace=True)

        return df
    priced_trades = []
    for df_ in tqdm(dfs):
        priced_trades.append(get_trade_openings(df_))
    print(priced_trades[0].head(3))
    
    
    # net & volume of trades (assume on open price)
    def estimate_volume(priced_df):
        net = priced_df.copy()[["fund", "date", "direction", "ticker", "company", "shares", "Open"]]
        net["shares"] = net.apply(lambda row:row["shares"] if row["direction"]=="Buy" else -row["shares"], axis=1)
        net = net.groupby(["ticker", "date", "company", "fund"], as_index=False).agg({'shares':'sum', 'Open':'mean'})
        # new cols
        net["signal"] = net["shares"].apply(lambda x: "Buy" if x>0 else "Sell")
        net["volume"] = net["shares"] * net["Open"]
        net["abs_volume"] = abs(net["volume"])
        # reorder
        net = net[["date", "fund", "ticker", "company", "signal", "shares", "Open", "volume", "abs_volume"]]
        return net
    nets = [] # list of dfs
    with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
        nets.extend(tqdm(executor.map(estimate_volume, priced_trades), total=len(priced_trades)))
    print(nets[-1].head(3))
    
    
    # save processed locally
    def save_processed_dfs(processed_df):
        if processed_df["date"].nunique()!=1:
            raise ValueError("more than 1 value for date")
        trade_date = processed_df["date"].iloc[0]
        processed_df.sort_values("abs_volume", ascending=False).\
        to_csv(f"./trades/processed/{trade_date}_ARK_TRADES.csv", index=False)
    with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
        list(tqdm(executor.map(save_processed_dfs, nets), total=len(nets)))
    
    
    return "OK"
main()

100%|██████████| 12/12 [00:00<00:00, 34497.36it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

   fund        date direction ticker      cusip  \
0  ARKG  2021-02-01       Buy   CDXS  192005106   
1  ARKG  2021-02-01       Buy   RPTX  760273102   
2  ARKG  2021-02-01      Sell   PACB  69404D108   

                                 company  shares  % of etf  
0                            CODEXIS INC   62497    0.0134  
1                REPARE THERAPEUTICS INC   10283    0.0034  
2  PACIFIC BIOSCIENCES OF CALIFORNIA INC  210508    0.0645  


 25%|██▌       | 3/12 [00:19<00:57,  6.41s/it]

No data fetched for symbol 1833 using YahooDailyReader


 92%|█████████▏| 11/12 [01:43<00:13, 13.32s/it]

No data fetched for symbol 1833 using YahooDailyReader
No data fetched for symbol ADYEN using YahooDailyReader
No data fetched for symbol 3690 using YahooDailyReader


100%|██████████| 12/12 [01:56<00:00,  9.75s/it]
100%|██████████| 12/12 [00:00<00:00, 66.95it/s]
  0%|          | 0/12 [00:00<?, ?it/s]

                 fund        date direction ticker      cusip  \
key1                                                            
API_2021-02-01   ARKW  2021-02-01      Sell    API  00851L103   
AVAV_2021-02-01  ARKQ  2021-02-01      Sell   AVAV  008073108   
BEAM_2021-02-01  ARKK  2021-02-01       Buy   BEAM  07373V105   

                               company  shares  % of etf        Open  
key1                                                                  
API_2021-02-01               AGORA INC   54100    0.0577   72.080002  
AVAV_2021-02-01      AEROVIRONMENT INC    6132    0.0241  120.040001  
BEAM_2021-02-01  BEAM THERAPEUTICS INC   58218    0.0241   99.709999  
         date  fund ticker                     company signal  shares  \
0  2021-02-10  ARKW    API                   AGORA INC   Sell  -17639   
1  2021-02-10  ARKG   CDNA                  CAREDX INC   Sell  -76084   
2  2021-02-10  ARKQ   EXPC  EXPERIENCE INVESTMENT CORP    Buy   36147   

         Open        volum

100%|██████████| 12/12 [00:00<00:00, 298.51it/s]


'OK'