In [43]:
#Imports
import pandas as pd
import alpaca_trade_api as tradeapi
from pathlib import Path
import os
import json
from dotenv import load_dotenv

In [2]:
# Load the environment variables from the .env file by calling the load_dotenv function
load_dotenv()

True

In [3]:
# Set the variables for the Alpaca API and secret keys
alpaca_api_key = os.getenv("ALPACA_API_KEY")
alpaca_secret_key = os.getenv("ALPACA_SECRET_KEY")

# The Alpaca tradeapi.REST object
alpaca = tradeapi.REST(
    alpaca_api_key,
    alpaca_secret_key,
    api_version="v2")

In [4]:
#Set Tickers for stocks and crypto for analysis only looking at top 10 existing for at least 4 years
stock_tickers = ['MSFT','AAPL','TSLA','AMZN','NVDA','GOOG','FB','ADBE','NFLX','PYPL']
crypto_tickers = ['BTC','ETH','BNB','ADA','XRP','DOGE','LTC','LINK','BCH','XLM']

In [46]:
# Setting time frame to 1D
timeframe = "1D"

# Set the start/ end datetime of 5 years from today 
start_date = pd.Timestamp('2018-02-01', tz='America/New_York').isoformat()
end_date = pd.Timestamp('2021-04-23',tz='America/New_York').isoformat()

# Set limit of rows to the max to maximaize
limit_rows = 1000

# Get the 2 years data frame for stocks
stocks_df = alpaca.get_barset(
    stock_tickers,
    timeframe,
    start = start_date,
    end = end_date,
    limit = limit_rows).df

# Visualazing the data 
stocks_df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,ADBE,ADBE,ADBE,ADBE,ADBE,...,PYPL,PYPL,PYPL,PYPL,PYPL,TSLA,TSLA,TSLA,TSLA,TSLA
Unnamed: 0_level_1,open,high,low,close,volume,open,high,low,close,volume,...,open,high,low,close,volume,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2018-02-01 00:00:00-05:00,167.165,168.62,166.76,167.7,29765526,199.12,201.75,198.0845,199.38,1532011,...,79.96,80.95,76.7,78.4,37352330,351.0,359.66,348.63,349.25,3065835
2018-02-02 00:00:00-05:00,166.47,166.8,160.1,160.41,67919941,197.33,199.4,195.44,195.63,1410466,...,78.01,79.62,76.28,76.55,19063799,348.44,351.95,340.51,343.75,2730271
2018-02-05 00:00:00-05:00,159.1,163.88,156.0,156.49,59470254,194.06,198.46,188.0,190.27,2332614,...,75.99,78.98,74.68,74.7,15685427,337.97,344.47,333.0,333.15,3124163
2018-02-06 00:00:00-05:00,154.91,163.72,154.0,163.06,54780528,186.59,194.82,182.1,194.52,3130162,...,72.51,76.515,72.25,75.68,16194538,325.21,336.22,323.5,333.97,3703402
2018-02-07 00:00:00-05:00,163.085,163.4,159.0685,159.52,41035232,193.87,196.24,192.02,192.36,1714797,...,75.35,77.58,74.94,75.59,9497980,338.83,346.0,335.66,344.77,4422730


In [49]:
#Create for loop to read in Change % data and clean column of string values and convert to float /100
crypto_daily_df = pd.DataFrame()
for crypto in crypto_tickers:
    df = pd.read_csv(Path(f"Crypto_Historical_Data/{crypto}.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)
    #If dtype is O ---object then str.replace
    if df['Change %'].dtypes=='O':
        df['Change %'] = df['Change %'].str.replace('%','')
    else:
        df['Change %'] = df['Change %'].replace('%','')
    crypto_daily_df[f'{crypto}_%change'] = df['Change %'].astype('float').divide(100)

#drop na values or change to 0 depending on need
crypto_daily_df = crypto_daily_df.dropna()
crypto_daily_df.head()

Unnamed: 0_level_0,BTC_%change,ETH_%change,BNB_%change,ADA_%change,XRP_%change,DOGE_%change,LTC_%change,LINK_%change,BCH_%change,XLM_%change
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2021-04-25,-0.0225,0.0454,0.0137,-0.0111,-0.0155,-0.07,0.0047,0.0236,-0.0185,-0.0211
2021-04-24,-0.0206,-0.0639,-0.0552,-0.0473,-0.1026,0.0856,-0.0671,-0.1119,-0.08,-0.056
2021-04-23,-0.0113,-0.0141,0.041,0.0092,0.0098,-0.0463,-0.0458,-0.0029,-0.0177,0.0008
2021-04-22,-0.0388,0.0171,-0.069,-0.0483,-0.102,-0.1467,-0.0189,-0.0288,-0.061,-0.084
2021-04-21,-0.0471,0.0123,-0.0752,-0.0496,-0.069,-0.038,-0.0119,-0.0697,-0.033,-0.0708


In [7]:
##Read in Crypto Data
# btc_df = pd.read_csv(Path("Crypto_Historical_Data/BTC.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)
# eth_df = pd.read_csv(Path("Crypto_Historical_Data/ETH.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)
# bnb_df = pd.read_csv(Path("Crypto_Historical_Data/BNB.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)
# ada_df = pd.read_csv(Path("Crypto_Historical_Data/ADA.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)
# xrp_df = pd.read_csv(Path("Crypto_Historical_Data/XRP.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)
# doge_df = pd.read_csv(Path("Crypto_Historical_Data/DOGE.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)
# ltc_df = pd.read_csv(Path("Crypto_Historical_Data/LTC.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)
# link_df = pd.read_csv(Path("Crypto_Historical_Data/LINK.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)
# bch_df = pd.read_csv(Path("Crypto_Historical_Data/BCH.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)
# xlm_df = pd.read_csv(Path("Crypto_Historical_Data/XLM.csv"), index_col="Date", parse_dates=True, infer_datetime_format=True)