In [36]:
#Import modules

import pandas as pd
import requests
import numpy as np
import json
from dotenv import load_dotenv
import sys
import os
from alpaca_trade_api.rest import TimeFrame, URL
import alpaca_trade_api as tradeapi
from MCForecastTools import MCSimulation
import datetime as dt
import hvplot.pandas

In [37]:
#import environment variables
load_dotenv()
alpaca_api_key = os.getenv('ALPACA_API_KEY')
alpaca_secret_key = os.getenv('ALPACA_SECRET_KEY')
type(alpaca_api_key)

str

In [38]:
#Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version='v2')

**Below we are going to define functions we will be using repeatedly within the project -- will probably need to be moved to a separate .py file in order to "modularize" our app.  We will then be able to import the relevant functions from the separate file.

In [39]:
def get_historical_dataframe (ticker, start_date, end_date, timeframe):
    ticker_df = alpaca.get_barset(ticker, timeframe, end = end_date, start = start_date, limit = 1000).df
    ticker_df.reindex(columns = ticker_df.columns)
    return ticker_df
def filter_close_prices(dataframe):
    df_close = pd.DataFrame()
    df_close['close'] = dataframe['close']
    return df_close
def calc_daily_returns(df_close_prices):
    daily_returns = df_close_prices.pct_change().dropna()
    return daily_returns

In [40]:
#Establish ARK API variables -- base url for api calls, request type i.e. profile, trades, etc., etf_symbol for desired etf and additional arguments as parameters
 
holdings_symbol = 'ARKK'
holdings_url = 'https://arkfunds.io/api/v2/etf/holdings'  

#Initial API call to establish current positions for ARKK
# need to code for an error response if API call is unsuccessfsul i.e. if response.status_code == 200:
response = requests.get(holdings_url, params = {'symbol' : 'ARKK'}).json()
print(json.dumps(response, indent=4, sort_keys=True))

{
    "date_from": "2021-11-09",
    "date_to": "2021-11-09",
    "holdings": [
        {
            "company": "TESLA INC",
            "cusip": "88160R101",
            "date": "2021-11-09",
            "fund": "ARKK",
            "market_value": 2307938161.68,
            "share_price": 1162.94,
            "shares": 1984572,
            "ticker": "TSLA",
            "weight": 10.81,
            "weight_rank": 1
        },
        {
            "company": "TELADOC HEALTH INC",
            "cusip": "87918A105",
            "date": "2021-11-09",
            "fund": "ARKK",
            "market_value": 1456944663.04,
            "share_price": 146.24,
            "shares": 9962696,
            "ticker": "TDOC",
            "weight": 6.82,
            "weight_rank": 2
        },
        {
            "company": "COINBASE GLOBAL INC -CLASS A",
            "cusip": "19260Q107",
            "date": "2021-11-09",
            "fund": "ARKK",
            "market_value": 1348419273.6,
        

**Something for us to consider -- would it be better to utilize dataframes or databases to manipulate and analyze our data?

In [41]:
# We want to create a dataframe with the relevant 'holdings' data from the json object returned above
holdings_df = pd.DataFrame(response['holdings'])
display(holdings_df)

#Check for null values in our DataFrame
display(holdings_df.isnull().sum())


Unnamed: 0,fund,date,ticker,company,cusip,shares,market_value,share_price,weight,weight_rank
0,ARKK,2021-11-09,TSLA,TESLA INC,88160R101,1984572,2307938000.0,1162.94,10.81,1
1,ARKK,2021-11-09,TDOC,TELADOC HEALTH INC,87918A105,9962696,1456945000.0,146.24,6.82,2
2,ARKK,2021-11-09,COIN,COINBASE GLOBAL INC -CLASS A,19260Q107,3809955,1348419000.0,353.92,6.32,3
3,ARKK,2021-11-09,U,UNITY SOFTWARE INC,91332U101,7139859,1164511000.0,163.1,5.45,4
4,ARKK,2021-11-09,ROKU,ROKU INC,77543R102,3819547,1058664000.0,277.17,4.96,5
5,ARKK,2021-11-09,ZM,ZOOM VIDEO COMMUNICATIONS-A,98980L101,3678904,956147100.0,259.9,4.48,6
6,ARKK,2021-11-09,SPOT,SPOTIFY TECHNOLOGY SA,L8681T102,3015571,865167300.0,286.9,4.05,7
7,ARKK,2021-11-09,SHOP,SHOPIFY INC - CLASS A,82509L107,504065,773119800.0,1533.77,3.62,8
8,ARKK,2021-11-09,SQ,SQUARE INC - A,852234103,3129312,740927200.0,236.77,3.47,9
9,ARKK,2021-11-09,PATH,UIPATH INC - CLASS A,90364P105,12956050,732016800.0,56.5,3.43,10


fund            0
date            0
ticker          1
company         0
cusip           0
shares          0
market_value    0
share_price     0
weight          0
weight_rank     0
dtype: int64

**To be done for project -- we need to find a solution for null values in our holdings dataframe as it could change and we do not necessarily want to have to dig in and figure out which value is null and what belongs there... possibly create an if/then statement for null values and how to handle them i.e. alert the user of the null value and provide options for  how to handle it.

In [42]:
# We can see we have one 'None' value in our tickers column.  When we research the name of the company, we find the ticker to be 'DGCXX', so we can fill that value with the appropriate ticker.
holdings_df.loc[[38],['ticker']] = 'DGCXX'

In [43]:
# For our purposes we want to focus on the 'ticker','weight', and 'company' columns of the dataframe.  This will allow us to perform historical research on the stocks as well as perform projections using a Monte Carlo simulation.  
filtered_df = holdings_df[['ticker', 'weight', 'company']]
display(filtered_df.head())

# Note that for our Monte Carlo simulations, we will need to divide the weights column by 100 since the sum of weights for the simulation needs to be 1, and the dataframe is configured for the sum to be 100.

filtered_df.hvplot.bar(x='ticker', y = 'weight', hover_color = 'red', hover_cols = ['ticker', 'weight', 'company'],rot=90, title = 'Stock tickers and their corresponding weights in the portfolio')

Unnamed: 0,ticker,weight,company
0,TSLA,10.81,TESLA INC
1,TDOC,6.82,TELADOC HEALTH INC
2,COIN,6.32,COINBASE GLOBAL INC -CLASS A
3,U,5.45,UNITY SOFTWARE INC
4,ROKU,4.96,ROKU INC


In [44]:
#Use data from ARKK API call to get historical quotes from Alpaca
tickers = filtered_df['ticker'].astype(str).tolist()
timeframe = '1D'
today = pd.Timestamp.now(tz="America/New_York")
three_years_ago = pd.Timestamp(today - pd.Timedelta(days=1095)).isoformat()
end_date = today
start_date = three_years_ago
# Here we are retrieving the historical data for the stocks in the ARKK portfolio.  
# We then filter the results to leave us with closing price and ticker columns with a datetime index 
# so we can run our analyses.
portfolio_df = get_historical_dataframe(tickers, start_date, end_date, timeframe)
display(portfolio_df.head())

Unnamed: 0_level_0,BEAM,BEAM,BEAM,BEAM,BEAM,BLI,BLI,BLI,BLI,BLI,...,Z,Z,Z,Z,Z,ZM,ZM,ZM,ZM,ZM
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,...,open,high,low,close,volume,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-11-12 05:00:00+00:00,,,,,,,,,,,...,30.15,31.74,29.88,31.01,3838022,,,,,
2018-11-13 05:00:00+00:00,,,,,,,,,,,...,31.39,31.89,30.44,30.46,2608365,,,,,
2018-11-14 05:00:00+00:00,,,,,,,,,,,...,30.81,31.031,28.36,28.41,3992309,,,,,
2018-11-15 05:00:00+00:00,,,,,,,,,,,...,28.49,28.64,27.0,28.05,4609929,,,,,
2018-11-16 05:00:00+00:00,,,,,,,,,,,...,28.05,29.275,27.66,28.91,2457804,,,,,


**TBD for project -- how will we handle timeframes for our historical analyses i.e. do we want a hard coded time period or allow for user input?  Also how will this affect stocks that have no data for certain periods as well as those who have a more extensive price history.

In [45]:
#We will create a dataframe with the daily return data calculated from our retrieved historical data 

#Create empty dataframe
daily_returns_df = pd.DataFrame()
#Iterate through tickers, filter to isolate 'close' data, then run our calc_daily_returns function
for ticker in tickers:
    daily_returns = calc_daily_returns(portfolio_df[ticker]['close'])
    daily_returns_df = pd.concat([daily_returns_df, daily_returns], axis=1)
daily_returns_df.columns = tickers
display(daily_returns_df)


Unnamed: 0,TSLA,TDOC,COIN,U,ROKU,ZM,SPOT,SHOP,SQ,PATH,...,IOVA,TRMB,BLI,DGCXX,Z,CERS,None,CGEN,PRLB,NSTG
2018-11-13 05:00:00+00:00,0.022528,-0.033851,,,0.023149,,0.002361,0.023753,0.027959,,...,-0.022388,0.000696,,,-0.017736,0.009506,,0.016026,-0.001491,0.021611
2018-11-14 05:00:00+00:00,0.016361,-0.018971,,,0.017205,,0.017321,0.028823,-0.027302,,...,-0.040349,-0.013218,,,-0.067301,-0.050847,,0.025237,0.018081,-0.005769
2018-11-15 05:00:00+00:00,0.012378,0.050000,,,0.031742,,0.022851,0.051198,0.054131,,...,0.082955,0.008178,,,-0.012672,0.049603,,0.046154,0.026640,0.087041
2018-11-16 05:00:00+00:00,0.016877,-0.019910,,,0.000449,,-0.019347,0.006130,-0.041299,,...,0.006296,-0.006434,,,0.030660,-0.000945,,-0.020588,0.006031,0.003559
2018-11-19 05:00:00+00:00,-0.002286,-0.104452,,,-0.042424,,-0.056358,-0.114297,-0.107835,,...,-0.071950,-0.015484,,,-0.061570,-0.035005,,-0.066066,-0.045670,-0.024823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-03 04:00:00+00:00,0.035703,0.046321,0.024360,0.025513,0.001533,0.018111,-0.001006,0.012726,0.014457,0.032675,...,0.007299,-0.003845,0.022204,,-0.248809,0.164425,,0.017991,0.028354,0.044195
2021-11-04 04:00:00+00:00,0.013218,-0.027742,0.000087,0.028064,-0.077026,-0.011520,-0.014802,0.030542,-0.022050,0.057947,...,-0.011675,-0.017482,-0.018443,,0.030605,0.019255,,0.014728,0.028055,0.007493
2021-11-05 04:00:00+00:00,-0.006741,-0.025000,-0.021601,0.001978,-0.036684,-0.061922,-0.015297,-0.008040,-0.038901,-0.024344,...,0.017719,-0.001964,0.017954,,-0.020898,-0.003778,,-0.027576,-0.032779,-0.002010
2021-11-08 05:00:00+00:00,-0.048067,0.000000,0.049408,0.074098,-0.006024,-0.018655,-0.007404,0.003605,-0.002990,0.007129,...,0.001401,0.032183,0.006563,,0.024220,-0.018331,,-0.001493,0.025782,0.019335


**One thing to consider for our daily returns calculations.. it's possible we can just set up the Monte Carlo simulation and then pull the returned daily returns to use in our risk/return analyses

In [20]:
# Calculating standard deviations of the daily returns and plotting the results.
daily_returns_std = daily_returns_df.std().sort_values()
daily_returns_std.hvplot.bar(colorbar=True, xlabel='Stock Tickers', ylabel = 'Standard Deviation (x 100 to get %)', title='Stocks within the ARKK portfolio and the standard deviation of their daily returns', rot=90, color='orange')

In [22]:
#Calculating cumulative returns of the stocks within the portfolio and plotting the results
daily_returns_cumprod = (1 + daily_returns_df).cumprod()
portfolio_cum_plot = daily_returns_cumprod.hvplot(kind = 'line', rot=90, title = 'Cumulative returns for the individual stocks in ARKK', ylabel = 'Returns', xlabel = 'Date')
display(portfolio_cum_plot)