# Proof of concept and test ground for methods

The goal of this project is to create an ETL pipeline pulling ARKK's trades from the web. Using this data, to the best of my ability, create mock inverse trades using minute data (if available, closing price if not) to evaluate performance against the fund

In [6]:
from bs4 import BeautifulSoup as BS
import pandas as pd
import requests

In [7]:


df = pd.DataFrame(columns=["Fund", "Date", "Action", "Ticker", "Company", "Shares", "Percent"])

url_temp = "https://arkinvestdailytrades.com/"
url = "https://arkinvestdailytrades.com/?page=1"

condition = True
while condition:
    tbl = []
    data = requests.get(url).text
    soup = BS(data, 'html.parser')

    table = soup.find('table', class_="tables")

    for row in table.tbody.find_all('tr'):
        columns = row.find_all('td')

        if (columns != []):
            i = 0   
            r = []
            while i < 7:
                r.append(columns[i].text)
                i += 1

            tbl.append(r)

    #concatenate to df
    df = pd.concat([df, pd.DataFrame(tbl, columns=df.columns)], ignore_index=True)
            
    #next url
    pg = soup.find('ul', 'pagination')
    active_pg = pg.find('li', 'page-link disabled')
    try:
        next_url = active_pg.findNextSibling('li').a.get('href')
    except AttributeError:
        condition = False

    #asssign new url

    url = url_temp + next_url

In [8]:
df.head()

Unnamed: 0,Fund,Date,Action,Ticker,Company,Shares,Percent
0,ARKF,31 Aug 2023,\n Sell\n ...,SHOP,SHOPIFY INC,13704,0.1013
1,ARKF,31 Aug 2023,\n Buy\n ...,PLTR,PALANTIR TECHNOLOGIES INC,58326,0.0973
2,ARKG,31 Aug 2023,\n Buy\n ...,ACCD,ACCOLADE INC,43619,0.028
3,ARKG,31 Aug 2023,\n Buy\n ...,NTLA,INTELLIA THERAPEUTICS INC,6699,0.012
4,ARKG,31 Aug 2023,\n Sell\n ...,CELL,PHENOMEX INC,3037,0.0001


In [9]:
df.tail()

Unnamed: 0,Fund,Date,Action,Ticker,Company,Shares,Percent
14625,ARKW,9 Sep 2020,\n Buy\n ...,TWLO,TWILIO INC,15139,0.1529
14626,ARKW,9 Sep 2020,\n Sell\n ...,VMW,VMWARE INC,32028,0.1481
14627,ARKW,9 Sep 2020,\n Sell\n ...,ROKU,ROKU INC,36533,0.2588
14628,ARKW,9 Sep 2020,\n Sell\n ...,AYX,ALTERYX INC,18041,0.0892
14629,ARKW,9 Sep 2020,\n Sell\n ...,AAPL,APPLE INC,34120,0.1766


In [10]:
df.shape

(14630, 7)

In [11]:
import matplotlib.pyplot as plt

%matplotlib inline

In [47]:
#mock backtest function to "buy" and "sell" same number of shares at daily close. Adds equity to df
from datetime import datetime
import yfinance as yf

#convert dates to yf format in df
df['Date'] = df["Date"].apply(lambda x : datetime.strptime(x, '%d %b %Y').strftime('%Y-%m-%d'))

ValueError: time data '2023-08-31' does not match format '%d %b %Y'

In [None]:
df.head()

Unnamed: 0,Fund,Date,Action,Ticker,Company,Shares,Percent
0,ARKF,2023-08-31,\n Sell\n ...,SHOP,SHOPIFY INC,13704,0.1013
1,ARKF,2023-08-31,\n Buy\n ...,PLTR,PALANTIR TECHNOLOGIES INC,58326,0.0973
2,ARKG,2023-08-31,\n Buy\n ...,ACCD,ACCOLADE INC,43619,0.028
3,ARKG,2023-08-31,\n Buy\n ...,NTLA,INTELLIA THERAPEUTICS INC,6699,0.012
4,ARKG,2023-08-31,\n Sell\n ...,CELL,PHENOMEX INC,3037,0.0001


In [None]:
#focus just on ARKK
arkk = df[df['Fund']=='ARKK']
arkk.reset_index(drop=True, inplace=True)
arkk.head()

Unnamed: 0,Fund,Date,Action,Ticker,Company,Shares,Percent
0,ARKK,2023-08-31,\n Sell\n ...,SHOP,SHOPIFY INC,126207,0.1054
1,ARKK,2023-08-31,\n Buy\n ...,PLTR,PALANTIR TECHNOLOGIES INC,525292,0.099
2,ARKK,2023-08-30,\n Buy\n ...,NTLA,INTELLIA THERAPEUTICS INC,7441,0.0036
3,ARKK,2023-08-28,\n Buy\n ...,ACHR,ARCHER AVIATION INC,506873,0.0409
4,ARKK,2023-08-28,\n Sell\n ...,EXAS,EXACT SCIENCES CORP,29684,0.0325


In [None]:
tickers = set(arkk['Ticker'].unique())

#fix .US type tickers
problems = [x for x in tickers if "." in x]
for problem in problems:
    new = problem.split(".")[0]
    tickers.remove(problem)
    tickers.add(new)

tickers

{'ACHR',
 'BEAM',
 'BEKE',
 'BIDU',
 'BLI',
 'CERS',
 'CGEN',
 'COIN',
 'CRSP',
 'DDD',
 'DKNG',
 'DNA',
 'DOCU',
 'DOYU',
 'EDIT',
 'EXAS',
 'FATE',
 'FB',
 'HOOD',
 'HUYA',
 'ICE',
 'ILMN',
 'IOVA',
 'IRDM',
 'MCRB',
 'META',
 'MTLS',
 'NSTG',
 'NTDOY',
 'NTLA',
 'NVDA',
 'NVS',
 'NVTA',
 'ONVO',
 'PACB',
 'PATH',
 'PCAR',
 'PD',
 'PINS',
 'PLTR',
 'PRLB',
 'PSTG',
 'PYPL',
 'RBLX',
 'REGN',
 'ROKU',
 'SE',
 'SGFY',
 'SHOP',
 'SKLZ',
 'SNAP',
 'SNPS',
 'SPLK',
 'SPOT',
 'SQ',
 'SRNG',
 'SSYS',
 'SYRS',
 'TCEHY',
 'TDOC',
 'TER',
 'TREE',
 'TRMB',
 'TSLA',
 'TSM',
 'TSP',
 'TWLO',
 'TWOU',
 'TWST',
 'TWTR',
 'TXG',
 'U',
 'VCYT',
 'VERV',
 'WORK',
 'XLNX',
 'XONE',
 'Z',
 'ZM',
 'ZS'}

In [54]:
arkk['Date'] = pd.to_datetime(arkk['Date'])
arkk.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  arkk['Date'] = pd.to_datetime(arkk['Date'])


Unnamed: 0,Fund,Date,Action,Ticker,Company,Shares,Percent
0,ARKK,2023-08-31,\n Sell\n ...,SHOP,SHOPIFY INC,126207,0.1054
1,ARKK,2023-08-31,\n Buy\n ...,PLTR,PALANTIR TECHNOLOGIES INC,525292,0.099
2,ARKK,2023-08-30,\n Buy\n ...,NTLA,INTELLIA THERAPEUTICS INC,7441,0.0036
3,ARKK,2023-08-28,\n Buy\n ...,ACHR,ARCHER AVIATION INC,506873,0.0409
4,ARKK,2023-08-28,\n Sell\n ...,EXAS,EXACT SCIENCES CORP,29684,0.0325


In [69]:
start = arkk['Date'].min()
end = arkk['Date'].max()

dates = pd.date_range(start, end)

equity = pd.Series(index=dates)
equity.head()

  equity = pd.Series(index=dates)


2020-09-09   NaN
2020-09-10   NaN
2020-09-11   NaN
2020-09-12   NaN
2020-09-13   NaN
Freq: D, dtype: float64

In [None]:
#backtest with equity 

def backtest(equity : pd.Series, starting_cash=1_000_000) -> pd.Series:
    holdings = {} #{"TSLA": int shares}