# One Portfolio to Rule Them All!

This notebook deals with the collection of financial asset data (stocks and cryptocurrency). This must be run in an environment that is compatible with alpacas API.

1. Collect data from APIs
2. Save to csv
3. Collect all data in SQL Database

In [1]:
# Import all necessary libraries
import os
from pathlib import Path
from dotenv import load_dotenv
from ft_logger import *
from PolygonIO import PolygonIO
import pandas as pd
import alpaca_trade_api as tradeapi
import datetime
from dateutil.relativedelta import relativedelta

## Collect Data From APIs

In [2]:
# start logging
start_logging()

# Import data from API/csv into Pandas DataFrame
load_dotenv()

# Set Alpaca API key and secret
alpaca_api_key=os.getenv('APCA_API_KEY_ID')
alpaca_secret_key=os.getenv('APCA_API_SECRET_KEY')
alpaca_endpoint=os.getenv('APCA_API_BASE_URL')

# Create the Alpaca API object
alpaca = tradeapi.REST()

# Format current and previous date as ISO format
date = datetime.date.today()
date_fmt = date.strftime('%Y-%m-%d')
today = pd.Timestamp(date_fmt, tz='America/New_York').isoformat()

# Set start date of five years back from today.
# Sample results may vary from the solution based on the time frame chosen
five_yrs_ago = date - relativedelta(years=5)
five_yrs_ago = five_yrs_ago.strftime('%Y-%m-%d')
start_date = pd.Timestamp(five_yrs_ago, tz='America/New_York').isoformat()

# Set the tickers
tickers = ['AAPL', 'MSFT', 'PFE', 'DIS', 'DJIA']

# Set timeframe to "1Day" for Alpaca API
timeframe = "1Day"

# Get current closing prices for SPY and AGG
# The current day may be a day when the markets are closed (weekend, holiday, etc.)
# So, if the retrieved portfolio is empty, let's try the previous day.
tickers_df = alpaca.get_bars(tickers, timeframe, start=start_date, end=today).df.dropna()

while tickers_df.empty:
    date -= relativedelta(days=1)
    log.warn(f"End date {today} is invalid, trying prior day {date}")
    date_fmt = date.strftime('%Y-%m-%d')
    today = pd.Timestamp(date_fmt, tz='America/New_York').isoformat()
    tickers_df = alpaca.get_bars(tickers, timeframe, start=start_date, end=today, adjustment='split').df

# change timestamp to date only
tickers_df.index=tickers_df.index.map(lambda x: x.date)

# Find any stock splits that may have occurred for each stock within the period
pg = PolygonIO('INSERT_POLYGONIO_API_KEY_HERE', tickers, five_yrs_ago, date_fmt)
splits = pg.get_splits()
tickers_df.head()

Unnamed: 0,open,high,low,close,volume,trade_count,vwap,symbol
2018-04-18,177.81,178.82,176.88,177.84,20757396,136339,177.949113,AAPL
2018-04-19,174.96,175.39,172.66,172.8,34822915,230448,173.639715,AAPL
2018-04-20,170.595,171.2184,165.43,165.72,65552591,402201,167.067748,AAPL
2018-04-23,166.8348,166.92,164.09,165.24,36542216,238213,165.586598,AAPL
2018-04-24,165.67,166.33,161.2201,162.94,33695538,258898,163.622161,AAPL


In [3]:
# Apply stock splits to the data
for split_exec in splits:
    for split in split_exec:
        adjusted_close_df = tickers_df.loc[(tickers_df.index >= datetime.datetime.strptime(split['time'], '%Y-%m-%d').date()) & (tickers_df['symbol'] == split['ticker'])]['close'].mul(split['split_factor'])
        tickers_df.loc[(tickers_df.index >= datetime.datetime.strptime(split['time'], '%Y-%m-%d').date()) & (tickers_df['symbol'] == split['ticker']), ['close']] = adjusted_close_df
log.info('Stock closing prices adjusted for splits.')

In [4]:
# Review and clean data
tickers_df_list = [tickers_df[tickers_df['symbol'] == ticker].drop('symbol', axis='columns') for ticker in tickers]
stocks_df = pd.concat(tickers_df_list, axis='columns', join='inner', keys=tickers).dropna()
display(stocks_df.head())
display(stocks_df.tail())
log.info('Stocks data cleaned.')

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,MSFT,MSFT,MSFT,...,DIS,DIS,DIS,DJIA,DJIA,DJIA,DJIA,DJIA,DJIA,DJIA
Unnamed: 0_level_1,open,high,low,close,volume,trade_count,vwap,open,high,low,...,volume,trade_count,vwap,open,high,low,close,volume,trade_count,vwap
2022-02-24,152.58,162.85,152.0,650.04,141258414,1383835,158.018049,272.51,295.16,271.52,...,12449572,190450,145.726019,23.78,24.02,23.65,24.02,22738,52,23.76993
2022-02-25,163.78,165.12,160.8738,659.4,87680902,768624,163.788661,295.185,297.62,291.655,...,9677173,125585,149.054607,24.41,24.5814,24.19,24.5814,26826,172,24.27501
2022-02-28,163.06,165.42,162.43,660.48,90547539,743310,164.274417,294.31,299.14,293.0,...,10443190,118442,148.079387,24.67,24.67,24.44,24.5981,4076,113,24.53506
2022-03-01,164.695,166.6,161.97,652.8,79455454,701957,164.167482,296.4,299.97,292.15,...,8494450,129582,145.762479,24.78,24.78,24.22,24.2721,8225,107,24.338157
2022-03-02,164.39,167.36,162.95,666.24,76135254,631927,165.810466,295.36,301.44,293.698,...,7516010,110441,147.446804,24.65,24.71,24.4198,24.66,14268,123,24.555496


Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,AAPL,MSFT,MSFT,MSFT,...,DIS,DIS,DIS,DJIA,DJIA,DJIA,DJIA,DJIA,DJIA,DJIA
Unnamed: 0_level_1,open,high,low,close,volume,trade_count,vwap,open,high,low,...,volume,trade_count,vwap,open,high,low,close,volume,trade_count,vwap
2023-04-12,161.22,162.06,159.78,640.4,50164302,515535,160.883893,284.79,287.01,281.96,...,9298105,118951,99.327012,21.87,21.91,21.82,21.91,25188,278,21.858299
2023-04-13,161.63,165.8,161.42,662.24,68436844,595635,164.353317,283.59,289.9,283.17,...,8744879,95928,100.426837,21.81,21.93,21.75,21.84,59568,458,21.837493
2023-04-14,164.59,166.32,163.83,660.84,48280881,476387,165.025855,287.0,288.48,283.69,...,7105675,94736,99.621022,21.84,21.87,21.795,21.8,37136,328,21.851132
2023-04-17,165.09,165.39,164.03,660.92,40669651,474137,164.807277,289.93,291.6,286.16,...,6542098,88864,99.937498,21.95,21.99,21.81,21.85,25439,353,21.881546
2023-04-18,166.1,167.41,165.65,664.12,23425922,309880,166.344854,291.57,291.755,287.01,...,3745914,51439,100.299306,22.0,22.0,21.865,21.93,21224,162,21.899874


In [5]:
# save to csv
output_file = Path('./stocks_data.csv')
stocks_df.to_csv(output_file)
log.info(f"Output written to {output_file}")