# Portfolio Workflow

In [1]:
from platform import python_version
import time
from datetime import datetime, timedelta
import os
import pandas as pd
import pandas_datareader as pdr

import numpy as np
import math
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pickle

%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (20, 8)

# Set the import path for the tools directiory
import sys
# insert at position 1 in the path, as 0 is the path of this file.
sys.path.insert(1, 'tools')
import importlib
import ameritrade_functions as amc
importlib.reload(amc)
import trading_factors as alpha_factors
importlib.reload(alpha_factors)
import utils
importlib.reload(utils)
import nonoverlapping_estimator as ai_estimator
importlib.reload(ai_estimator)

print(f'Python version: {python_version()}')
print(f'Pandas version: {pd.__version__}')
print(f'Pandas Data Reader version: {pdr.__version__}')

Sci-Kit version: 0.24.1
Sci-Kit version: 0.24.1
Python version: 3.8.8
Pandas version: 1.4.0
Pandas Data Reader version: 0.10.0


In [2]:
from pathlib import Path

# Make sure we have a data directory
Path('./data').mkdir(parents=True, exist_ok=True) 

# Which account are we interested in
masked_account_number = '#---9216'
account_portfolios_file_name = 'data/portfolio_data.csv'
portfolio_file_name = 'data/portfolio_' + masked_account_number[-4:] + '.csv'
price_histories_file_name = 'data/price_histories_yahoo.csv'

# Stage 1: Generate Stock Universe

- Gather stocks from specific criteria (SP500 top 50...)
- Use stock sentiment to select stocks
- Gather price histories

## Stock Universe

Here we setup the univers. This needs some work. The long term goal is to use a pipeline process to help select stock that are in the top 500 or something similare.

For now we will use stocks from the portfolio, but stocks of interest (high news items), a list of well known stocks (this also has been augmented with some stocks that made Ameritrade's top 10 movers for a couple of days. This Ameritrade funciton has not been coded yet, but should be add down the line to automate pulling these tickers.

# Price History data

One you have a set of investments you want to work with, you will need to pull some historical data for them.

We will obtain 5 years of price histories. In the end this will provide us with 2 years of factor data since some of the factors are based on 1 year returns.

In [3]:
snp_500_stocks = utils.get_snp500()
stock_universe = utils.reduce_universe_by_sentiment(set(snp_500_stocks.index.to_list()))

Number of stocks in universe: 504


Tickers:   0%|          | 0/504 [00:00<?, ?Finvis Postings/s]

News Tables:   0%|          | 0/502 [00:00<?, ?News Table Items/s]

Mean Sentiment: 2.1021627673684624 with a standared deviation of: 1.6540200683117419 providing a cutoff of: 0.44814269905672055
New number of stocks in universe: 424


In [4]:
from dateutil.relativedelta import relativedelta
number_of_years = 5
start = datetime.today() - relativedelta(years = number_of_years)
end = datetime.today() - relativedelta(days = 1)
yahoo = pdr.yahoo.daily.YahooDailyReader(symbols=stock_universe, start=start, end=end, adjust_price=True, interval='d', get_actions=False, adjust_dividends=True)
price_histories = yahoo.read()
yahoo.close()
price_histories.to_csv(price_histories_file_name, index=True)

In [5]:
test_read = pd.read_csv(price_histories_file_name, header=[0, 1], index_col=[0], low_memory=False)

In [6]:
test_read


Attributes,Adj_Ratio,Adj_Ratio,Adj_Ratio,Adj_Ratio,Adj_Ratio,Adj_Ratio,Adj_Ratio,Adj_Ratio,Adj_Ratio,Adj_Ratio,...,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume,Volume
Symbols,AAL,AAP,ABBV,ABC,ABT,ACN,ADBE,ADI,ADM,ADP,...,WST,WTW,WY,XEL,XOM,XRAY,XYL,YUM,ZBRA,ZTS
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2017-04-20,0.968483,0.965229,0.795611,0.917661,0.919901,0.928828,1.0,0.906076,0.860876,0.899866,...,339300.0,488800.0,4268800.0,2919900.0,11649100.0,1307500.0,1191300.0,2448100.0,271500.0,2232800.0
2017-04-21,0.968482,0.965229,0.795611,0.917661,0.919901,0.928828,1.0,0.906076,0.860876,0.899866,...,306700.0,798100.0,4338800.0,1937700.0,9816700.0,1381800.0,1499700.0,2595700.0,214800.0,2124700.0
2017-04-24,0.968482,0.965230,0.795611,0.917661,0.919901,0.928828,1.0,0.906076,0.860876,0.899866,...,232100.0,859900.0,5779400.0,2264800.0,8729700.0,1140300.0,1796500.0,2525500.0,304500.0,2770600.0
2017-04-25,0.968482,0.965230,0.795611,0.917661,0.919901,0.928828,1.0,0.906076,0.860876,0.899866,...,438300.0,505900.0,7561600.0,2027500.0,10733800.0,1097000.0,1366400.0,3411300.0,358000.0,4346100.0
2017-04-26,0.968483,0.965229,0.795611,0.917661,0.919901,0.928828,1.0,0.906076,0.860876,0.899866,...,626300.0,819500.0,8544700.0,2135600.0,9445900.0,883800.0,1197400.0,1684300.0,245600.0,2665800.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-04-11,1.000000,1.000000,0.991573,1.000000,0.996030,0.997031,1.0,1.000000,1.000000,1.000000,...,311000.0,535000.0,3780100.0,2752300.0,21800100.0,1728600.0,1076700.0,2199600.0,249100.0,2252200.0
2022-04-12,1.000000,1.000000,0.991573,1.000000,0.996030,0.997031,1.0,1.000000,1.000000,1.000000,...,288000.0,725100.0,3150600.0,2872300.0,22444200.0,1266300.0,1172900.0,2396300.0,276100.0,1746500.0
2022-04-13,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.0,1.000000,1.000000,1.000000,...,509000.0,742100.0,3740600.0,2274800.0,19306800.0,1213400.0,1061500.0,1710900.0,466200.0,1517800.0
2022-04-14,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.0,1.000000,1.000000,1.000000,...,258000.0,392900.0,3334200.0,2007100.0,24544700.0,887800.0,3518600.0,1830700.0,268400.0,1819400.0
