# Stock Analysis Using LSTM - static load module
- load selected features and save in pickle files / CSVs

# Feature series frequency and beginning dates:
- '2 YR','30 YR' - daily; 1990-01-02
- 'Sector_Close' - daily; depends on sector
- 'VIX' - daily; 2001-05-15
- 'URTH' - daily; 2012-01-12
- 'personal_sav_rate' - monthly; 1959-01-01
- 'oil_price' - daily; 1986-01-02
- 'schiller_ratio' - monthly; 1871-02-01
- 'consumer_sentiment' - every 4 months, then monthly; 1952-11-30
- 'public_debt' - quarterly; 1966-01-01
- 'Bullish', 'Bearish' - weekly; 1987-06-26
- 'gdp' - quarterly; 1947-01-01
- 'cpi' - monthly; 1947-01-01
- 'us_dollar_index' - yearly; 1995-01-01

# Import required libraries and set up notebook


In [12]:
! python --version

Python 3.7.4


In [13]:
! pip install quandl
#! pip install pip --upgrade
#! pip install pandas-datareader
print("past pandas_datareader")
#! pip install yahoo_fin
import pandas as pd
import numpy as np
import os
import yaml
from datetime import date
import requests
from sklearn.metrics import classification_report

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")
%matplotlib inline

# For reading stock data from yahoo
#import pandas_datareader as pdr
from pandas_datareader.data import DataReader
# import yahoo_fin.stock_info as si

# For time stamps
from datetime import datetime

# for LSTM
from keras.models import Sequential
from keras.layers import Dense, LSTM, Input
from keras.models import Model
# from tensorflow.keras.layers import Input, Dropout, Dense, BatchNormalization, Activation, concatenate, GRU, Embedding, Flatten, BatchNormalization
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import load_model
# access datasets from quandl.com - need to pip install Quandl to use
import quandl
on_colab = False
verboseout = True
results_columns = ['ticker','feature_list','total deviation','avg deviation','max deviation','min deviation','proportion good days']    
config_file = 'model_training_config.yml'
feature_df_dict = {}

past pandas_datareader


You should consider upgrading via the 'c:\users\ryanm\appdata\local\programs\python\python37\python.exe -m pip install --upgrade pip' command.


In [14]:
import tensorflow as tf
tf.__version__

'1.15.0'

In [15]:
start_timestamp = datetime.now()
if on_colab:
    from google.colab import drive
    drive.mount('/content/drive')

In [16]:
if on_colab:
    %cd /content/drive/MyDrive/karma_jan_2021/stock_investigation/notebooks

# Load config parameters

In [17]:
# load config file
current_path = os.getcwd()
print("current directory is: "+current_path)

path_to_yaml = os.path.join(current_path, config_file)
print("path_to_yaml "+path_to_yaml)
try:
    with open (path_to_yaml, 'r') as c_file:
        config = yaml.safe_load(c_file)
except Exception as e:
    print('Error reading the config file')

current directory is: C:\personal\karma_stocks_2021\stock_investigation\notebooks
path_to_yaml C:\personal\karma_stocks_2021\stock_investigation\notebooks\model_training_config.yml


In [18]:
# set parameters
# parameters that could change if the CSV file is used to drive multiple runs are set in the dictionary parms
parms = {}
parms['repeatable_run'] = config['general']['repeatable_run']
# fix seeds to get identical results on mulitiple runs
if parms['repeatable_run']:
    from numpy.random import seed
    seed(4)
    # dumb switch to make up for dumb regression going from TF 1 to TF 2
    if tf.__version__ == '1.15.0':
        tf.set_random_seed(7)
    else:
        tf.random.set_seed(7)


#master_date_mode', 'master_start', 'master_end',
#       'repeatable_run', 'years_window', 'training_proportion',
#       'close_threshold', 'look_back', 'look_ahead', 'look_ahead_scale',
#       'tech_list'
list_parms = ['tech_list']
plot_all = config['general']['verboseout']
use_saved_model = config['general']['presaved']
ust_cols = config['general']['ust_cols']
report_round = config['general']['report_round']
# list of ticket symbols
# e.g. tech_list = ['AAPL', 'GOOG', 'MSFT', 'AMZN']
parms['tech_list'] = config['tech_list']
# dictionary to seed stock handles with company names
saved_model_modifier = config['files']['saved_model_modifier']
# get the column lists
parms['close_threshold'] = config['general']['close_threshold']
quandl_token = config['general']['quandl_token']
rapidai_key = config['general']['rapidai_key']
# number of days previous used to predict subsequent day
parms['look_back'] = config['general']['look_back']
# gap in day betwen the last day previous and the subsequent day being predicted
parms['look_ahead'] = config['general']['look_ahead']
parms['look_ahead_scale'] = config['general']['look_ahead_scale']
parms['years_window'] = config['general']['years_window']
# target column
lstm_target = config['general']['lstm_target']
# list from target column
lstm_target_list = [lstm_target]
# additional features beyond the target column
feature_list = config['general']['feature_list']
# complete list of features LSTM trained on
lstm_feature_list = lstm_target_list+feature_list
# switch to determine normalization method
min_max_norm = config['general']['min_max_norm']
eod_token = config['general']['eod_token']
sector_ticker = config['files']['sector_ticker']
multi_parm_file = config['files']['multi_parm_file'] # CSV file containing parms for multiple runs
debug_on = config['general']['debug_on']
max_years_back = config['general']['max_years_back']
parms['master_date_mode'] = config['general']['master_date_mode']
parms['master_start'] = config['general']['master_start']
parms['master_end'] = config['general']['master_end']
parms['training_proportion'] = config['general']['training_proportion']
static_feature_list = config['static_load']['static_feature_list']
static_load_end = config['static_load']['static_load_end']

multi_parm_run = config['general']['multi_parm_run']
# ensure value of training_proportion is valid
if (parms['training_proportion'] < 0.25) or (parms['training_proportion'] > 0.9):
    print("setting default training proportion")
    parms['training_proportion'] = 0.8

In [19]:
config

{'general': {'verboseout': False,
  'includetext': True,
  'on_colab': False,
  'debug_on': False,
  'presaved': False,
  'savemodel': False,
  'picklemodel': True,
  'save_model_plot': False,
  'tensorboard_callback': False,
  'hctextmax': 7000,
  'maxwords': 6000,
  'textmax': 50,
  'pickled_data_file': '20142018_0930.pkl',
  'pickled_dataframe': 'AB_NYC_2019_output_aug19_2020.pkl',
  'modifier': 'oct05_2020',
  'targetthresh': 6.0,
  'targetcontinuous': False,
  'target_col': 'price',
  'emptythresh': 6000,
  'zero_weight': 1.0,
  'one_weight': 45.878,
  'one_weight_offset': 0,
  'patience_threshold': 3,
  'min_max_norm': True,
  'master_date_mode': True,
  'master_start': '2020-08-01',
  'master_end': '2021-08-10',
  'repeatable_run': False,
  'years_window': 2,
  'max_years_back': 20,
  'training_proportion': 0.8,
  'close_threshold': 0.01,
  'report_round': 4,
  'ust_cols': ['2 YR'],
  'quandl_token': 'uCghYBw8CtpUvWct_W8c',
  'finnhumb_key': 'c16hj0f48v6ppg7erf7g',
  'rapidai_ke

# Ingest Sector Ticker file
- ingest CSV file that contains correlation between stock ticker symbols and ETF tickers as a proxy for sector tickers

In [20]:
def get_data_path():
    '''get the path for data files'''
    rawpath = os.getcwd()
    # data is in a directory called "data" that is a sibling to the directory containing the notebook
    path = os.path.abspath(os.path.join(rawpath, '..', 'data'))
    return(path)

In [21]:
sector_file = os.path.join(get_data_path(),sector_ticker)
print("sector_file: ",sector_file)
df_sector = pd.read_csv(sector_file,encoding = "ISO-8859-1")
df_sector.head()

sector_file:  C:\personal\karma_stocks_2021\stock_investigation\data\sector_ticker.csv


Unnamed: 0,Symbol,Name,Sector,Sector Ticker,Sector ETF Ticker,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10
0,A,Agilent Technologies Inc,Health Care,^SP500-35,IYH,,- 'A',,,,
1,AAL,American Airlines Group,Industrials,^SP500-20,IYJ,,- 'AAL',,,,
2,AAP,Advance Auto Parts,Consumer Discretionary,^SP500-25,XLY,,- 'AAP',,,,
3,AAPL,Apple Inc.,Information Technology,^SP500-45,IYW,,- 'AAPL',,,,
4,ABBV,AbbVie Inc.,Health Care,^SP500-35,IYH,,- 'ABBV',,,,


# Define start and end of analysis period

In [22]:
# DataReader: https://riptutorial.com/pandas/topic/1912/pandas-datareader
# https://pandas-datareader.readthedocs.io/en/latest/remote_data.html
# 
# Set up End and Start times for data grab
# check to see if start and end dates are hard-coded with master dates
def set_start_end():
    if parms['master_date_mode']: # start and end hardcoded by parameters
        start = parms['master_start']
        end = parms['master_end']
    else: # end is current date; start is current date minus years_window
        end = datetime.now()
        start = datetime(end.year - parms['years_window'], end.month, end.day)
    # output a test dataset
    tester = DataReader('IBM', 'yahoo', start, end)
    print(tester.shape)
    return(start,end)

In [23]:
def get_path():
    rawpath = os.getcwd()
    # data is in a directory called "static_load" that is a sibling to the directory containing the notebook
    path = os.path.abspath(os.path.join(rawpath, '..', 'static_load'))
    return(path)

# Experiment with EOD API for dividend
- EOD historical data doesn't directly support Python access
- https://eodhistoricaldata.com/financial-apis/python-example/




In [24]:
import requests

import pandas as pd

from io import StringIO

# URL for dividend data
# https://eodhistoricaldata.com/api/div/AAPL.US?api_token=OeAFFmMliFG5orCUuwAKQ8l4WWFQ67YX&from=2000-01-01

def get_div_data(symbol='AAPL.US', api_token='OeAFFmMliFG5orCUuwAKQ8l4WWFQ67YX', session=None):
    if session is None:
        session = requests.Session()
    url = 'https://eodhistoricaldata.com/api/div/%s' % symbol
    params = {'api_token': api_token}
    r = session.get(url, params=params)
    if r.status_code == requests.codes.ok:
        df = pd.read_csv(StringIO(r.text), skipfooter=1, parse_dates=[0], index_col=0, engine='python')
        return(True, df)
    else:
        print("status code",str(r.status_code))
        print("reason code",str(r.reason))
        return(False,"null")
        #raise Exception(r.status_code, r.reason, url)

# Load US Treasury stats

In [25]:
# load US Treasury yield dataframe
ust_df = quandl.get("USTREASURY/YIELD", authtoken=quandl_token)

In [26]:
ust_df

Unnamed: 0_level_0,1 MO,2 MO,3 MO,6 MO,1 YR,2 YR,3 YR,5 YR,7 YR,10 YR,20 YR,30 YR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1990-01-02,,,7.83,7.89,7.81,7.87,7.90,7.87,7.98,7.94,,8.00
1990-01-03,,,7.89,7.94,7.85,7.94,7.96,7.92,8.04,7.99,,8.04
1990-01-04,,,7.84,7.90,7.82,7.92,7.93,7.91,8.02,7.98,,8.04
1990-01-05,,,7.79,7.85,7.79,7.90,7.94,7.92,8.03,7.99,,8.06
1990-01-08,,,7.79,7.88,7.81,7.90,7.95,7.92,8.05,8.02,,8.09
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-08-09,0.04,0.05,0.06,0.06,0.08,0.23,0.43,0.79,1.09,1.33,1.87,1.96
2021-08-10,0.05,0.05,0.05,0.05,0.08,0.24,0.47,0.82,1.12,1.36,1.90,1.99
2021-08-11,0.05,0.05,0.05,0.06,0.08,0.23,0.45,0.81,1.11,1.35,1.90,1.99
2021-08-12,0.05,0.06,0.06,0.06,0.09,0.23,0.46,0.83,1.13,1.36,1.92,2.03


In [27]:
#ust_df['30 YR'].mean()

In [28]:
#ust_df['2 YR'].mean()

In [29]:
#us_30 = ust_df['30 YR']

In [30]:
# new = old[['A', 'C', 'D']].copy()
ust_df_2year = ust_df[['2 YR']].copy()
ust_df_2year.head()

Unnamed: 0_level_0,2 YR
Date,Unnamed: 1_level_1
1990-01-02,7.87
1990-01-03,7.94
1990-01-04,7.92
1990-01-05,7.9
1990-01-08,7.9


# Load Fear and Greed Index

In [31]:
# fear and greed index Source: https://rapidapi.com/rpi4gx/api/fear-and-greed-index
import requests

url = "https://fear-and-greed-index.p.rapidapi.com/v1/fgi"

headers = {
    'x-rapidapi-key': rapidai_key,
    'x-rapidapi-host': "fear-and-greed-index.p.rapidapi.com"
    }

response = requests.request("GET", url, headers=headers)

print(response.text)

{"fgi":{"now":{"value":43,"valueText":"Fear"},"previousClose":{"value":42,"valueText":"Fear"},"oneWeekAgo":{"value":36,"valueText":"Fear"},"oneMonthAgo":{"value":34,"valueText":"Fear"},"oneYearAgo":{"value":72,"valueText":"Greed"}},"lastUpdate":{"epochUnixSeconds":1628889420,"humanDate":"2021-08-13T21:17:00Z"}}


# Load US Dollar Index

In [32]:
#A weighted average of the foreign exchange value of the U.S. dollar against the currencies of a 
# broad group of major U.S. trading partners.
# Source: https://www.quandl.com/data/FRED/TWEXBANL-Trade-Weighted-U-S-Dollar-Index-Broad
us_dollar_index_df = quandl.get("FRED/TWEXBANL", authtoken=quandl_token)
us_dollar_index_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1995-01-01,92.5752
1996-01-01,97.4584
1997-01-01,104.3864
1998-01-01,115.9162
1999-01-01,116.1848


In [33]:
us_dollar_index_df.rename(columns={'Value':'us_dollar_index'}, inplace=True)
feature_df_dict['us_dollar_index'] = us_dollar_index_df

# Load Public Debt

In [34]:
# Public debt as % of GDP (quarterly)
# https://www.quandl.com/data/FRED/GFDEGDQ188S-Federal-Debt-Total-Public-Debt-as-Percent-of-Gross-Domestic-Product
public_debt_df = quandl.get("FRED/GFDEGDQ188S", authtoken=quandl_token)
public_debt_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1966-01-01,40.33999
1966-04-01,39.26763
1966-07-01,39.62091
1966-10-01,39.51977
1967-01-01,39.20383


In [35]:
public_debt_df.rename(columns={'Value':'public_debt'}, inplace=True)
feature_df_dict['public_debt'] = public_debt_df

# Load Personal saving rate

In [36]:
# Personal Savings Rate: https://www.quandl.com/data/FRED/PSAVERT-Personal-Saving-Rate (monthly)
psr_df = quandl.get("FRED/PSAVERT", authtoken=quandl_token)
psr_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1959-01-01,11.3
1959-02-01,10.6
1959-03-01,10.3
1959-04-01,11.2
1959-05-01,10.6


In [37]:

psr_df.rename(columns={'Value':'personal_sav_rate'}, inplace=True)
feature_df_dict['personal_sav_rate'] = psr_df

# Load crude oil price

In [38]:
# Crude Oil price (WTI): https://www.quandl.com/data/FRED/DCOILWTICO-Crude-Oil-Prices-West-Texas-Intermediate-WTI-Cushing-Oklahoma (daily)
oil_df = quandl.get("FRED/DCOILWTICO", authtoken=quandl_token)
oil_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1986-01-02,25.56
1986-01-03,26.0
1986-01-06,26.53
1986-01-07,25.85
1986-01-08,25.87


In [39]:
oil_df.tail()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
2021-08-03,70.64
2021-08-04,68.19
2021-08-05,69.1
2021-08-06,68.26
2021-08-09,66.56


In [40]:
oil_df.rename(columns={'Value':'oil_price'}, inplace=True)
feature_df_dict['oil_price'] = oil_df

# Load Schiller Ratio

In [41]:
# Shiller PE Ratio by Month:  https://www.quandl.com/data/MULTPL/SHILLER_PE_RATIO_MONTH-Shiller-PE-Ratio-by-Month (monthly)
schiller_df = quandl.get("MULTPL/SHILLER_PE_RATIO_MONTH", authtoken=quandl_token)
schiller_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1871-02-01,10.92
1871-03-01,11.19
1871-04-01,12.05
1871-05-01,12.59
1871-06-01,12.59


In [42]:
schiller_df.tail()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
2021-04-30,37.56
2021-05-01,36.62
2021-06-01,36.86
2021-06-30,38.11
2021-07-01,37.86


In [43]:
schiller_df.shape

(1821, 1)

In [44]:
schiller_df.rename(columns={'Value':'schiller_ratio'}, inplace=True)
feature_df_dict['schiller_ratio'] = schiller_df

# Load consumer sentiment


In [45]:
# Consumer Sentiment: https://www.quandl.com/data/UMICH/SOC1-University-of-Michigan-Consumer-Survey-Index-of-Consumer-Sentiment (monthly)
conf_df = quandl.get("UMICH/SOC1", authtoken=quandl_token)

In [46]:
conf_df.head()

Unnamed: 0_level_0,Index
Date,Unnamed: 1_level_1
1952-11-30,86.2
1953-02-28,90.7
1953-08-31,80.8
1953-11-30,80.7
1954-02-28,82.0


In [47]:
conf_df.tail()

Unnamed: 0_level_0,Index
Date,Unnamed: 1_level_1
2021-02-28,76.8
2021-03-31,84.9
2021-04-30,88.3
2021-05-31,82.9
2021-06-30,85.5


In [48]:
conf_df.shape

(614, 1)

In [49]:
conf_df.rename(columns={'Index':'consumer_sentiment'}, inplace=True)
feature_df_dict['consumer_sentiment'] = conf_df

# Parameters to incorporate
- Investor sentiment: https://www.quandl.com/data/AAII/AAII_SENTIMENT-AAII-Investor-Sentiment-Data
- GDP: https://www.quandl.com/data/FRED/GDP-Gross-Domestic-Product
- Consumer Price Index (as proxy for inflation): https://www.quandl.com/data/FRED/CPIAUCSL-Consumer-Price-Index-for-All-Urban-Consumers-All-Items (I think these figures are derived from the same source I put in the factor spreadsheet: https://www.bls.gov/news.release/cpi.toc.htm)
- Civilian unemployment rate: https://www.quandl.com/data/FRED/UNRATE-Civilian-Unemployment-Rate

# Load investor sentiment

In [50]:
# investor sentiment
# https://www.quandl.com/data/AAII/AAII_SENTIMENT-AAII-Investor-Sentiment-Data
# weekly
inv_sent_df = quandl.get("AAII/AAII_SENTIMENT", authtoken=quandl_token)
inv_sent_df.head()

Unnamed: 0_level_0,Bullish,Neutral,Bearish,Total,Bullish 8-Week Mov Avg,Bull-Bear Spread,Bullish Average,Bullish Average + St. Dev,Bullish Average - St. Dev,S&P 500 Weekly High,S&P 500 Weekly Low,S&P 500 Weekly Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1987-06-26,,,,,,,0.380245,0.480891,0.279599,,,
1987-07-17,,,,,,,0.380245,0.480891,0.279599,314.59,307.63,314.59
1987-07-24,0.36,0.5,0.14,1.0,,0.22,0.380245,0.480891,0.279599,311.39,307.81,309.27
1987-07-31,0.26,0.48,0.26,1.0,,0.0,0.380245,0.480891,0.279599,318.66,310.65,318.66
1987-08-07,0.56,0.15,0.29,1.0,,0.27,0.380245,0.480891,0.279599,323.0,316.23,323.0


In [51]:
inv_sent_df.tail()

Unnamed: 0_level_0,Bullish,Neutral,Bearish,Total,Bullish 8-Week Mov Avg,Bull-Bear Spread,Bullish Average,Bullish Average + St. Dev,Bullish Average - St. Dev,S&P 500 Weekly High,S&P 500 Weekly Low,S&P 500 Weekly Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-03-25,0.509494,0.28481,0.205696,1.0,0.450599,0.303798,0.380245,0.480891,0.279599,3887.14,3864.63,3881.2
2021-04-01,0.458333,0.309524,0.232143,1.0,0.451054,0.22619,0.380245,0.480891,0.279599,4013.04,4013.04,4013.04
2021-04-08,0.569079,0.226974,0.203947,1.0,0.457611,0.365132,0.380245,0.480891,0.279599,4093.87,4089.89,4091.04
2021-04-15,0.538206,0.215947,0.245847,1.0,0.46025,0.292359,0.380245,0.480891,0.279599,4151.69,4120.87,4124.66
2021-04-22,0.526814,0.268139,0.205047,1.0,0.462819,0.321767,0.380245,0.480891,0.279599,4170.46,4160.11,4162.43


In [52]:
inv_sent_df.shape

(1762, 12)

In [53]:
# inv_sent_df2 = inv_sent_df[['Bullish','Bearish']].copy()
bullish_df = inv_sent_df[['Bullish']].copy()
bearish_df = inv_sent_df[['Bearish']].copy()
feature_df_dict['Bullish'] = bullish_df
feature_df_dict['Bearish'] = bearish_df

# Load GDP

In [54]:
gdp_df = quandl.get("FRED/GDP", authtoken=quandl_token)
gdp_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1947-01-01,243.164
1947-04-01,245.968
1947-07-01,249.585
1947-10-01,259.745
1948-01-01,265.742


In [55]:
gdp_df.rename(columns={'Value':'gdp'}, inplace=True)
feature_df_dict['gdp'] = gdp_df
gdp_df.head()

Unnamed: 0_level_0,gdp
Date,Unnamed: 1_level_1
1947-01-01,243.164
1947-04-01,245.968
1947-07-01,249.585
1947-10-01,259.745
1948-01-01,265.742


# Load CPI

In [56]:
cpi_df = quandl.get("FRED/CPIAUCSL", authtoken=quandl_token)
cpi_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1947-01-01,21.48
1947-02-01,21.62
1947-03-01,22.0
1947-04-01,22.0
1947-05-01,21.95


In [57]:
cpi_df.rename(columns={'Value':'cpi'}, inplace=True)
feature_df_dict['cpi'] = cpi_df

# Load VIX
- special ticker to get options as proxies of where investors think the market is heading 
- source: Yahoo
- frequency: daily

In [58]:
# VIX not available from MarketStack
'''
# brute force to just get the VIX series for the last 20 years
vix_end = datetime.now()
vix_start = datetime(end.year-max_years_back, end.month, end.day)
print("vix_start: ",vix_start)
print("vix_end: ",vix_end)
company = '^VIX'
temp_df = DataReader(company,data_source='yahoo', start=vix_start, end=vix_end)
vix_df = temp_df[["Close"]].copy()
vix_df.rename(columns={'Close':'VIX'}, inplace=True)
feature_df_dict['VIX'] = vix_df
vix_df.head()
'''

'\n# brute force to just get the VIX series for the last 20 years\nvix_end = datetime.now()\nvix_start = datetime(end.year-max_years_back, end.month, end.day)\nprint("vix_start: ",vix_start)\nprint("vix_end: ",vix_end)\ncompany = \'^VIX\'\ntemp_df = DataReader(company,data_source=\'yahoo\', start=vix_start, end=vix_end)\nvix_df = temp_df[["Close"]].copy()\nvix_df.rename(columns={\'Close\':\'VIX\'}, inplace=True)\nfeature_df_dict[\'VIX\'] = vix_df\nvix_df.head()\n'

In [59]:
#vix_df.tail()

# Load URTH

In [60]:
# Post Aug 10 getting URTH along with other tickers from MarketStack
'''
# brute force to just get the URTH series for the last 20 years
urth_end = datetime.now()
urth_start = datetime(end.year-max_years_back, end.month, end.day)
print("urth_start: ",urth_start)
print("urth_end: ",urth_end)
company = 'URTH'
temp_df = DataReader(company,data_source='yahoo', start=urth_start, end=urth_end)
urth_df = temp_df[["Close"]].copy()
urth_df.rename(columns={'Close':'URTH'}, inplace=True)
feature_df_dict['URTH'] = urth_df
urth_df.head()
'''

'\n# brute force to just get the URTH series for the last 20 years\nurth_end = datetime.now()\nurth_start = datetime(end.year-max_years_back, end.month, end.day)\nprint("urth_start: ",urth_start)\nprint("urth_end: ",urth_end)\ncompany = \'URTH\'\ntemp_df = DataReader(company,data_source=\'yahoo\', start=urth_start, end=urth_end)\nurth_df = temp_df[["Close"]].copy()\nurth_df.rename(columns={\'Close\':\'URTH\'}, inplace=True)\nfeature_df_dict[\'URTH\'] = urth_df\nurth_df.head()\n'

In [61]:
#urth_df.tail()

# Load unemployment

In [62]:
# Civilian unemployment rate: https://www.quandl.com/data/FRED/UNRATE-Civilian-Unemployment-Rate
# monthly
unemp_df = quandl.get("FRED/UNRATE")
unemp_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1948-01-01,3.4
1948-02-01,3.8
1948-03-01,4.0
1948-04-01,3.9
1948-05-01,3.5


In [63]:
unemp_df.rename(columns={'Value':'unemployment'}, inplace=True)
feature_df_dict['unemployment'] = unemp_df

# Load non-farm payrolls

In [64]:
# non-farm payroll:  https://www.quandl.com/data/FRED/PAYEMS-All-Employees-Total-Nonfarm-Payrolls
# https://en.wikipedia.org/wiki/Nonfarm_payrolls
# monthly
non_farm_df = quandl.get("FRED/PAYEMS")
non_farm_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1939-01-01,29923.0
1939-02-01,30100.0
1939-03-01,30280.0
1939-04-01,30094.0
1939-05-01,30299.0


In [65]:
non_farm_df.rename(columns={'Value':'non_farm'}, inplace=True)
feature_df_dict['non_farm'] = non_farm_df

# Load M2 Money Supply

In [66]:
# non-farm payroll:   https://www.quandl.com/data/FRED/M2V-Velocity-of-M2-Money-Stock
# quarterly
m2_money_df = quandl.get("FRED/M2V")
m2_money_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1959-01-01,1.773
1959-04-01,1.789
1959-07-01,1.773
1959-10-01,1.779
1960-01-01,1.817


In [67]:
m2_money_df.rename(columns={'Value':'m2_money'}, inplace=True)
feature_df_dict['m2_money'] = m2_money_df

# St Louis Stress Index

In [68]:
# source: https://www.quandl.com/data/FRED/STLFSI-St-Louis-Financial-Stress-Index
# https://en.wikipedia.org/wiki/St._Louis_Fed_Financial_Stress_Index
# weekly
st_louis_df = quandl.get("FRED/STLFSI")
st_louis_df.head()

Unnamed: 0_level_0,Value
Date,Unnamed: 1_level_1
1993-12-31,0.259
1994-01-07,0.303
1994-01-14,0.246
1994-01-21,0.265
1994-01-28,0.242


In [70]:
st_louis_df.rename(columns={'Value':'st_louis'}, inplace=True)
feature_df_dict['st_louis'] = st_louis_df

# Save static loads

In [115]:
# load features on the static load list
str_date = datetime.now().strftime("%Y%m%d-%H%M%S")
add_date = np.datetime64(static_load_end)
add_date
ass_feat = []
print("udf shape before: ",ust_df.shape)
for feature_df in static_feature_list:
    print("filling in df for:",feature_df)
    # set the value of the end row
    feature_df_dict[feature_df].loc[add_date] = [feature_df_dict[feature_df][feature_df].iloc[-1]]
    # fill in the daily values
    feature_df_dict[feature_df] = feature_df_dict[feature_df].resample('D').ffill()
    # truncate all the values after the specified end date of static load
    feature_df_dict[feature_df] = feature_df_dict[feature_df].truncate(after = static_load_end)
    #data = data.loc[(data["deaths_7_days"] > 0) & (data["deaths_24_hours"] > 0)]
    print("shape of df: ",feature_df_dict[feature_df].shape)
    file_name = os.path.join(get_path(),feature_df+str_date+".csv")
    print("output static load: ",file_name)
    try:
        feature_df_dict[feature_df].to_csv(file_name)
    except Exception as f:
        if debug_on:
            raise
            print("exception generated "+str(f)+" attempting to write output CSV "+file_name)
        else:
            print("exception not generated")

    

udf shape before:  (7878, 12)
filling in df for: gdp
shape of df:  (27120, 1)
output static load:  C:\personal\karma_stocks_2021\stock_investigation\static_load\gdp20210627-222520.csv
filling in df for: cpi
shape of df:  (27120, 1)
output static load:  C:\personal\karma_stocks_2021\stock_investigation\static_load\cpi20210627-222520.csv
filling in df for: oil_price
shape of df:  (12874, 1)
output static load:  C:\personal\karma_stocks_2021\stock_investigation\static_load\oil_price20210627-222520.csv


In [116]:
#['2 YR','30 YR', 'Sector_Close','VIX','URTH', 'personal_sav_rate', 'oil_price', 'schiller_ratio', 'consumer_sentiment','public_debt', 'Bullish', 'Bearish', 'gdp', 'cpi', 'unemployment']

In [117]:
ust_df.head()

Unnamed: 0_level_0,1 MO,2 MO,3 MO,6 MO,1 YR,2 YR,3 YR,5 YR,7 YR,10 YR,20 YR,30 YR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1990-01-02,,,7.83,7.89,7.81,7.87,7.9,7.87,7.98,7.94,,8.0
1990-01-03,,,7.89,7.94,7.85,7.94,7.96,7.92,8.04,7.99,,8.04
1990-01-04,,,7.84,7.9,7.82,7.92,7.93,7.91,8.02,7.98,,8.04
1990-01-05,,,7.79,7.85,7.79,7.9,7.94,7.92,8.03,7.99,,8.06
1990-01-08,,,7.79,7.88,7.81,7.9,7.95,7.92,8.05,8.02,,8.09


In [118]:
ust_df.tail()

Unnamed: 0_level_0,1 MO,2 MO,3 MO,6 MO,1 YR,2 YR,3 YR,5 YR,7 YR,10 YR,20 YR,30 YR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-06-21,0.04,0.04,0.05,0.06,0.09,0.27,0.48,0.9,1.25,1.5,2.05,2.11
2021-06-22,0.04,0.05,0.04,0.06,0.09,0.25,0.44,0.87,1.23,1.48,2.03,2.1
2021-06-23,0.04,0.04,0.05,0.05,0.08,0.26,0.47,0.9,1.25,1.5,2.04,2.11
2021-06-24,0.05,0.05,0.05,0.05,0.08,0.26,0.48,0.9,1.26,1.49,2.03,2.1
2021-06-25,0.05,0.05,0.06,0.06,0.09,0.28,0.48,0.92,1.29,1.54,2.09,2.16


# Define helper functions
- define helper functions used by main model training block