In [3]:
Import modules

import pandas as pd  #
import requests  #
import numpy as np
import json  #
from dotenv import load_dotenv  #
import sys
import os  #
from alpaca_trade_api.rest import TimeFrame, URL
import alpaca_trade_api as tradeapi
import quandl
from MCForecastTools import MCSimulation
import pytz
import datetime as dt
import hvplot.pandas
import panel as pn




%matplotlib inline

In [4]:
#import environment variables
load_dotenv()
alpaca_api_key = os.getenv('ALPACA_API_KEY')
alpaca_secret_key = os.getenv('ALPACA_SECRET_KEY')
type(alpaca_api_key)

str

In [5]:
#Create the Alpaca API object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version='v2')

In [6]:
#Establish ARK API variables -- base url for api calls, request type i.e. profile, trades, etc., etf_symbol for desired etf and additional arguments as parameters
 
holdings_symbol = 'ARKK'
holdings_url = 'https://arkfunds.io/api/v2/etf/holdings'  

#Initial API call to establish current positions for ARKK
# need to code for an error response if API call is unsuccessfsul i.e. if response.status_code == 200:
response = requests.get(holdings_url, params = {'symbol' : 'ARKK'}).json()
print(json.dumps(response, indent=4, sort_keys=True))



{
    "date_from": "2021-11-09",
    "date_to": "2021-11-09",
    "holdings": [
        {
            "company": "TESLA INC",
            "cusip": "88160R101",
            "date": "2021-11-09",
            "fund": "ARKK",
            "market_value": 2307938161.68,
            "share_price": 1162.94,
            "shares": 1984572,
            "ticker": "TSLA",
            "weight": 10.81,
            "weight_rank": 1
        },
        {
            "company": "TELADOC HEALTH INC",
            "cusip": "87918A105",
            "date": "2021-11-09",
            "fund": "ARKK",
            "market_value": 1456944663.04,
            "share_price": 146.24,
            "shares": 9962696,
            "ticker": "TDOC",
            "weight": 6.82,
            "weight_rank": 2
        },
        {
            "company": "COINBASE GLOBAL INC -CLASS A",
            "cusip": "19260Q107",
            "date": "2021-11-09",
            "fund": "ARKK",
            "market_value": 1348419273.6,
        

In [7]:
# We want to create a dataframe with the relevant 'holdings' data from the json object returned above
holdings_df = pd.DataFrame(response['holdings'])
display(holdings_df.tail())

#Check for null values in our DataFrame
display(holdings_df.isnull().sum())

## We can see we have one 'None' value in our tickers column.  When we research the name of the company, we find the ticker to be 'DGCXX', so we can fill that value with the appropriate ticker.
holdings_df.loc[[41],['ticker']] = 'DGCXX'



Unnamed: 0,fund,date,ticker,company,cusip,shares,market_value,share_price,weight,weight_rank
40,ARKK,2021-11-09,CERS,CERUS CORP,157085101,9993578,77650101.06,7.77,0.36,41
41,ARKK,2021-11-09,,DREYFUS GOVT CASH MAN INS,X9USDDGCM,48225078,48225078.28,1.0,0.23,42
42,ARKK,2021-11-09,CGEN,COMPUGEN LTD,M25722105,5257652,35173691.88,6.69,0.16,43
43,ARKK,2021-11-09,PRLB,PROTO LABS INC,743713109,57756,3650179.2,63.2,0.02,44
44,ARKK,2021-11-09,NSTG,NANOSTRING TECHNOLOGIES INC,63009R109,1050,53025.0,50.5,0.0,45


fund            0
date            0
ticker          1
company         0
cusip           0
shares          0
market_value    0
share_price     0
weight          0
weight_rank     0
dtype: int64

In [8]:
# For our purposes we want to focus on the 'ticker','weight', and 'company' columns of the dataframe.  This will allow us to perform historical research on the stocks as well as perform projections using a Monte Carlo simulation.  
filtered_df = holdings_df[['ticker', 'weight', 'company']]
filtered_df.sort_values(by = 'weight', inplace=True )
display(filtered_df.head())

# Note that for our Monte Carlo simulations, we will need to divide the weights column by 100 since the sum of weights for the simulation needs to be 1, and the dataframe is configured for the sum to be 100.

filtered_df.hvplot.bar(x='ticker', y = 'weight', hover_color = 'red', hover_cols = ['ticker', 'weight', 'company'],rot=90, title = 'Stock tickers and their corresponding weights in the portfolio')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,ticker,weight,company
44,NSTG,0.0,NANOSTRING TECHNOLOGIES INC
43,PRLB,0.02,PROTO LABS INC
42,CGEN,0.16,COMPUGEN LTD
41,DGCXX,0.23,DREYFUS GOVT CASH MAN INS
40,CERS,0.36,CERUS CORP


AttributeError: 'DataFrame' object has no attribute 'hvplot'

In [9]:
## Todo -- we want to be able to perform various risk and performance analyses both using historical data and projections.  
# Since we have the tickers and weights from the portfolio, we can plug these in and run several calculations.
# Ultimately we would like to allow users to change up portfolio weights, tickers, etc. to see what kind of effect that would have on our risk and return metrics.
# We could even set up framework for stock trades to be made via alpaca whenever the ARKK fund executes trades (may be a lag)
# We would like to be able to compare this fund's performance vs. other funds, indices, etc.  
# We want to be able to solicit user input on the amount they would like to invest and use that figure in our calculations.

In [22]:
# Below we have created functions to perform various tasks that we use repetitively to cut down on time (move to a separate module/s?)


def get_historical_dataframe (ticker, start_date, end_date, timeframe):
    ticker_df = alpaca.get_barset(ticker, timeframe, end = end_date, start = start_date, limit = 1000).df
    ticker_df.reindex(columns = ticker_df.columns)
    return ticker_df
def filter_close_prices(dataframe):
    df_close = pd.DataFrame()
    df_close['close'] = dataframe['close']
    return df_close
def calc_daily_returns(df_close_prices):
    daily_returns = df_close_prices.pct_change().dropna()
    return daily_returns


#Use data from ARKK API call to get historical quotes from Alpaca
tickers = filtered_df['ticker'].astype(str).tolist()
timeframe = '1D'
today = pd.Timestamp.now(tz="America/New_York")
three_years_ago = pd.Timestamp(today - pd.Timedelta(days=1095)).isoformat()
end_date = today
start_date = three_years_ago
# Here we are retrieving the historical data for the stocks in the ARKK portfolio.  
# We then filter the results to leave us with closing price and ticker columns with a datetime index 
# so we can run our analyses.
portfolio_df = get_historical_dataframe(tickers, start_date, end_date, timeframe)
portfolio_df.head()
daily_returns_df = pd.DataFrame()

# Calculating daily returns for our portfolio stocks.
for ticker in tickers:
    daily_returns = calc_daily_returns(portfolio_df[ticker]['close'])
    daily_returns_df = pd.concat([daily_returns_df, daily_returns], axis=1)
daily_returns_df.columns = tickers
display(daily_returns_df)





Unnamed: 0,NSTG,PRLB,CGEN,DGCXX,CERS,MTLS,BLI,TRMB,SSYS,IOVA,...,PATH,SQ,SHOP,SPOT,ZM,ROKU,U,COIN,TDOC,TSLA
2018-11-08 05:00:00+00:00,-0.011170,-0.010632,-0.041252,,-0.000847,-0.006775,,0.002387,0.009117,0.052459,...,,-0.090496,0.008973,-0.060808,,-0.222562,,,-0.042924,0.009376
2018-11-09 05:00:00+00:00,0.063123,-0.012247,-0.017804,,-0.028814,-0.038881,,-0.025933,-0.025051,-0.067497,...,,-0.026185,-0.057105,0.007737,,-0.039991,,,-0.043319,-0.002163
2018-11-12 05:00:00+00:00,-0.045625,-0.034077,-0.057402,,-0.082024,-0.022711,,-0.024450,-0.030329,0.044543,...,,-0.046720,-0.038650,-0.048819,,-0.055998,,,-0.031040,-0.055288
2018-11-13 05:00:00+00:00,0.021611,-0.001491,0.016026,,0.009506,-0.002179,,0.000696,-0.009557,-0.022388,...,,0.027959,0.023753,0.002361,,0.023149,,,-0.033851,0.022528
2018-11-14 05:00:00+00:00,-0.005769,0.018081,0.025237,,-0.050847,-0.008006,,-0.013218,-0.028728,-0.040349,...,,-0.027302,0.028823,0.017321,,0.017205,,,-0.018971,0.016361
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-01 04:00:00+00:00,-0.002481,0.073733,0.024615,,0.033384,0.085062,0.052903,-0.000916,0.075364,0.020156,...,0.050348,0.002161,0.040493,0.038736,0.014746,0.041049,-0.017737,0.035056,0.012431,0.085534
2021-11-02 04:00:00+00:00,-0.019693,-0.060884,0.001502,,-0.017621,-0.014914,-0.024918,0.013641,-0.016784,-0.005645,...,-0.002653,-0.023720,-0.033431,-0.008741,0.006459,-0.013668,-0.032551,0.015802,-0.026604,-0.031113
2021-11-03 04:00:00+00:00,0.044195,0.028354,0.017991,,0.164425,0.008540,0.022204,-0.003845,-0.038634,0.007299,...,0.032675,0.014457,0.012726,-0.001006,0.018111,0.001533,0.025513,0.024360,0.046321,0.035703
2021-11-04 04:00:00+00:00,0.007493,0.028055,0.014728,,0.019255,0.038876,-0.018443,-0.017482,0.127259,-0.011675,...,0.057947,-0.022050,0.030542,-0.014802,-0.011520,-0.077026,0.028064,0.000087,-0.027742,0.013218


In [23]:
# Calculating standard deviations of the daily returns and cumulative product using daily return data.

daily_returns_std = daily_returns_df.std().sort_values()
daily_returns_cumprod = (1 + daily_returns_df).cumprod()
daily_returns_std.hvplot.bar(colorbar=True, xlabel='Stock Tickers', ylabel = 'Standard Deviation (x 100 to get %)', title='Stocks within the ARKK portfolio and the standard deviation of their daily returns', rot=90, color='orange')

In [24]:
# Adding SPY and ARKK data for reference and comparison purposes ('SPY' = S&P 500, ARKK = The ark etf portfolio as a unit)
spy_arkk_df = get_historical_dataframe(['SPY', 'ARKK'], start_date, end_date, timeframe)
spy_arkk_daily_returns = pd.concat([spy_arkk_df['SPY']['close'], spy_arkk_df['ARKK']['close']], axis = 1).pct_change().dropna()
spy_arkk_daily_returns.columns = ['SPY', 'ARKK']
display(spy_arkk_daily_returns)
spy_arkk_std = spy_arkk_daily_returns.std().sort_values().dropna()
display(spy_arkk_std)
spy_arkk_cumprod = (1 + spy_arkk_daily_returns).cumprod().dropna()
display(spy_arkk_cumprod)




Unnamed: 0_level_0,SPY,ARKK
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-11-08 00:00:00-05:00,-0.001673,-0.016206
2018-11-09 00:00:00-05:00,-0.009307,-0.018782
2018-11-12 00:00:00-05:00,-0.019220,-0.036633
2018-11-13 00:00:00-05:00,-0.001578,0.007719
2018-11-14 00:00:00-05:00,-0.007094,-0.011374
...,...,...
2021-11-01 00:00:00-04:00,0.001873,0.032338
2021-11-02 00:00:00-04:00,0.003891,-0.016621
2021-11-03 00:00:00-04:00,0.006192,0.012514
2021-11-04 00:00:00-04:00,0.004583,-0.001605


SPY     0.014344
ARKK    0.024743
dtype: float64

Unnamed: 0_level_0,SPY,ARKK
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-11-08 00:00:00-05:00,0.998327,0.983794
2018-11-09 00:00:00-05:00,0.989036,0.965317
2018-11-12 00:00:00-05:00,0.970026,0.929954
2018-11-13 00:00:00-05:00,0.968495,0.937133
2018-11-14 00:00:00-05:00,0.961625,0.926474
...,...,...
2021-11-01 00:00:00-04:00,1.637820,2.722210
2021-11-02 00:00:00-04:00,1.644192,2.676963
2021-11-03 00:00:00-04:00,1.654373,2.710463
2021-11-04 00:00:00-04:00,1.661956,2.706113


In [25]:
# Plotting a bar chart of portfolio daily returns standard deviations
portfolio_std_plot = daily_returns_std.hvplot(kind = 'bar', rot=90, label=False, legend=False)
arkk_std_plot = spy_arkk_std.hvplot(kind = 'bar', color = 'red', label=False, legend=False)
combined_plot = portfolio_std_plot * arkk_std_plot
combined_plot.opts(xlabel = 'Stock Ticker', ylabel = 'Standard Deviation', show_legend=False, title = 'Historical Standard Deviation of Daily Returns for stocks in the ARKK Portfolio')
combined_plot


In [31]:
# Plotting cumulative historical returns for our portfolio
portfolio_cum_plot = daily_returns_cumprod.hvplot(kind = 'line', rot=90, title = 'Cumulative returns for the individual stocks in ARKK', ylabel = 'Returns', xlabel = 'Date')
arkk_cum_plot = spy_arkk_cumprod.hvplot(kind = 'line', ylim = [0, 4], title = 'Cumulative Returns for the ARKK portfolio vs. the S&P 500')
combined_plot = portfolio_cum_plot * arkk_cum_plot
#combined_plot.opts(xlabel = 'Date', ylabel = 'Cumulative Returns', show_legend=True, title = 'Historical Cumulative Returns for stocks in the ARKK Portfolio', ylim = (0, 30))
display(portfolio_cum_plot)
display(arkk_cum_plot)
#display(combined_plot)


In [21]:
'''def configure_monte_carlo(dataframe, weights, num_simulations, num_trading_days):
    simulation_input_df = MCSimulation(
    portfolio_data = dataframe,
    weights = weights,
    num_simulation = num_simulations,
    num_trading_days = num_trading_days
    )
    return simulation_input_df

def run_monte_carlo(simulation_input_df):
    simulation_returns_df = simulation_input_df.calc_cumulative_return()
    return simulation_returns_df

def plot_simulation_outcomes(simulation_input_df):
    simulation_plot = simulation_input_df.plot_simulation()
    return simulation_plot

def plot_distribution(simulation_input_df):
    sim_dist_plot = simulation_input_df.plot_distribution()
    return sim_dist_plot

def get_monte_summary(simulation_input_df):
    summary_stats = simulation_input_df.summarize_cumulative_return()
    return summary_stats


arkk_sim_input = configure_monte_carlo(portfolio_df, weights_list, 500, 252*3)
daily_returns = pd.DataFrame(arkk_sim_input.portfolio_data)
daily_returns_df = pd.DataFrame()
for ticker in tickers:
    ticker_daily = pd.Series(daily_returns[ticker]['daily_return'], name = ticker)
    daily_returns_df = pd.concat([daily_returns_df, ticker_daily], axis = 1)

daily_returns_df.dropna(inplace=True)
returns_std = pd.DataFrame((daily_returns_df.std().sort_values()))
returns_std = (returns_std[returns_std < 0.05]).dropna()


SyntaxError: EOF while scanning triple-quoted string literal (<ipython-input-21-cf7286fb1726>, line 36)

In [None]:
#plot the distribution of the simulation results
arkk_plot_distribution = plot_distribution(arkk_sim_input)


In [None]:
arkk_summary_stats = get_monte_summary(arkk_sim_input)
print(arkk_summary_stats)