# Stage 2a: Generate Alpha Factors using Stock Universe

- Compute custom apha factors
- Compute univeral quant factors
- Compute date information
- Save Alpha Factors

In [1]:
from platform import python_version
import time
from datetime import datetime
import os
import pandas as pd
import numpy as np
import math
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt

%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (20, 8)

# Set the import path for the tools directiory
import sys
# insert at position 1 in the path, as 0 is the path of this file.
sys.path.insert(1, '../tools')
import importlib
import ameritrade_functions as amc
importlib.reload(amc)
import utils
importlib.reload(utils)

print(f'Python version: {python_version()}')
print(f'Pandas version: {pd.__version__}')

Python version: 3.8.10
Pandas version: 0.25.3


## Configure Ameritrade Information

Ameritrade credentials are stored in environment variables to keep from having unencrypted passwords stored on disk.

The module automatically masks the account numbers to protect the actual accounts. An Ameritrade user can have many investment accounts. We will be working with only one for this demonstration.

In [2]:
username = os.getenv('maiotradeuser')
password = os.getenv('maiotradepw')
client_id = os.getenv('maiotradeclientid')

masked_account_number = '#---5311'
account_portfolios_file_name = 'data/portfolio_data.csv'
portfolio_file_name = 'data/portfolio_' + masked_account_number[-4:] + '.csv'
price_histories_file_name = 'data/price_histories.csv'

# Price History data from Stage 1

In [3]:
price_histories = utils.read_price_histories(price_histories_file_name)
print(f'Date range for price histories: {price_histories.date.min().date()} to {price_histories.date.max().date()}')
close = utils.get_close_values(price_histories)
print(f'You have {len(close.columns)} stocks')
print(close.columns.to_list())
close.tail()

Date range for price histories: 2018-07-02 to 2021-07-02
You have 32 stocks
['AAPL', 'AEI', 'AIH', 'BABA', 'CAN', 'COKE', 'CONN', 'DKNG', 'EFOI', 'GMGMF', 'GOOG', 'IBM', 'JZXN', 'LEDS', 'LX', 'MGM', 'MOSY', 'MSFT', 'NMRD', 'NNXPF', 'OCG', 'OEG', 'PDYPF', 'PRTK', 'QFIN', 'RCON', 'TELL', 'TLRY', 'TSLA', 'VTNR', 'WKHS', 'ZKIN']


ticker,AAPL,AEI,AIH,BABA,CAN,COKE,CONN,DKNG,EFOI,GMGMF,...,PDYPF,PRTK,QFIN,RCON,TELL,TLRY,TSLA,VTNR,WKHS,ZKIN
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-06-28 00:00:00+00:00,134.78,6.11,7.96,228.59,7.95,402.0,26.43,52.71,4.03,1.85,...,189.0,6.9,41.72,4.19,4.41,18.63,688.72,9.94,16.96,4.31
2021-06-29 00:00:00+00:00,136.33,6.02,8.28,229.44,8.15,398.39,25.43,52.03,4.08,1.72,...,185.5,6.81,42.55,4.1,4.23,17.86,680.76,10.27,17.2,4.2
2021-06-30 00:00:00+00:00,136.96,5.68,7.96,226.78,8.15,402.13,25.5,52.17,3.98,1.69536,...,181.615,6.82,41.84,4.33,4.65,18.08,679.7,13.23,16.59,4.29
2021-07-01 00:00:00+00:00,137.27,5.31,8.1,221.87,7.62,397.16,25.9,51.8,3.9,1.7,...,187.375,7.24,38.79,4.2,4.66,17.835,677.92,12.04,15.64,4.23
2021-07-02 00:00:00+00:00,139.96,5.63,8.22,217.75,7.32,393.39,25.77,51.28,4.42,1.72,...,186.0,6.94,35.74,4.09,4.43,17.23,678.9,11.04,14.17,3.95


# Factors

### General
- Momentum
- Mean Reversion
- Overnight Sentiment

### Universal Quant Features
- Volatility 
- Dollar Volume
- Market Dispersion
- Market Volatility
- Date Parts

#TODO: SMA for market disp/vol

In [8]:
import trading_factors as alpha_factors
importlib.reload(alpha_factors)

all_factors = pd.concat(
[
    alpha_factors.FactorMomentum(price_histories, 252).demean().rank().zscore().for_al(),
    alpha_factors.FactorMeanReversion(price_histories, 5).demean().rank().zscore().smoothed().rank().zscore().for_al(),
    alpha_factors.OvernightSentiment(price_histories, 5).demean().rank().zscore().smoothed(10).rank().zscore().for_al(),
    alpha_factors.AnnualizedVolatility(price_histories, 20).rank().zscore().for_al(),
    alpha_factors.AnnualizedVolatility(price_histories, 120).rank().zscore().for_al(),
    alpha_factors.AverageDollarVolume(price_histories, 20).rank().zscore().for_al(),
    alpha_factors.AverageDollarVolume(price_histories, 120).rank().zscore().for_al(),
    alpha_factors.MarketDispersion(price_histories, 20).for_al(),
    alpha_factors.MarketDispersion(price_histories, 120).for_al(),
    alpha_factors.MarketVolatility(price_histories, 20).for_al(),
    alpha_factors.MarketVolatility(price_histories, 120).for_al()
], axis=1)
alpha_factors.FactorDateParts(all_factors)
all_factors.sort_index(inplace=True)
all_factors = all_factors.dropna()

all_factors.to_csv('data/all_factors.csv')

In [9]:
all_factors = pd.read_csv('data/all_factors.csv', parse_dates=['date'])
all_factors.set_index(['date', 'ticker'], inplace=True)
all_factors.sort_index(inplace=True)
all_factors.iloc[all_factors.index.get_level_values('ticker') == 'AAPL'].tail()

Unnamed: 0_level_0,Unnamed: 1_level_0,momentum_252_day_logret,mean_reversion_5_day_logret_smoothed,overnight_sentiment_5_day_smoothed,annualzed_volatility_20_day,annualzed_volatility_120_day,average_dollar_volume_20_day,average_dollar_volume_120_day,market_dispersion20_day,market_dispersion120_day,market_volatility20_day,market_volatility120_day,is_January,is_December,weekday,quarter,year,month_start,month_end,quarter_start,quarter_end
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-06-28 00:00:00+00:00,AAPL,-0.270765,-0.487377,-0.487377,-1.24552,-1.24552,1.570438,1.570438,0.065142,0.066004,0.308469,0.355851,0,0,0,2,2021,0,0,0,0
2021-06-29 00:00:00+00:00,AAPL,-0.162459,-0.595683,-0.812296,-1.24552,-1.353826,1.570438,1.570438,0.05904,0.065889,0.248229,0.357112,0,0,1,2,2021,0,0,0,0
2021-06-30 00:00:00+00:00,AAPL,-0.162459,-0.703989,-0.920602,-1.24552,-1.462132,1.570438,1.570438,0.05731,0.065856,0.233206,0.352731,0,0,2,2,2021,0,1,0,1
2021-07-01 00:00:00+00:00,AAPL,-0.162459,-1.028908,-0.920602,-1.24552,-1.462132,1.570438,1.570438,0.054226,0.065714,0.211132,0.353084,0,0,3,3,2021,1,0,1,0
2021-07-02 00:00:00+00:00,AAPL,-0.162459,-1.137214,-0.812296,-1.24552,-1.462132,1.570438,1.570438,0.051825,0.065527,0.214403,0.355593,0,0,4,3,2021,0,0,0,0


In [10]:
all_factors.describe()

Unnamed: 0,momentum_252_day_logret,mean_reversion_5_day_logret_smoothed,overnight_sentiment_5_day_smoothed,annualzed_volatility_20_day,annualzed_volatility_120_day,average_dollar_volume_20_day,average_dollar_volume_120_day,market_dispersion20_day,market_dispersion120_day,market_volatility20_day,market_volatility120_day,is_January,is_December,weekday,quarter,year,month_start,month_end,quarter_start,quarter_end
count,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0,16160.0
mean,1.593885e-18,-7.694614999999999e-19,-5.221345999999999e-19,-3.833567e-18,-6.361798e-18,9.068653e-19,3.792346e-18,0.047965,0.044583,0.222876,0.202244,0.079208,0.085149,2.011881,2.516832,2020.0,0.041584,0.045545,0.011881,0.015842
std,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,1.000031,0.016102,0.010081,0.131909,0.07988,0.270071,0.279111,1.400134,1.115282,0.706428,0.199643,0.208502,0.108355,0.124866
min,-1.697516,-1.691818,-1.678898,-1.678744,-1.678744,-1.678744,-1.678744,0.02239,0.03067,0.05835,0.097013,0.0,0.0,0.0,1.0,2019.0,0.0,0.0,0.0,0.0
25%,-0.866528,-0.866528,-0.8394279,-0.8441167,-0.8441167,-0.8441167,-0.8441167,0.036259,0.03374,0.111941,0.124461,0.0,0.0,1.0,2.0,2020.0,0.0,0.0,0.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.04525,0.044352,0.199111,0.189831,0.0,0.0,2.0,3.0,2020.0,0.0,0.0,0.0,0.0
75%,0.8149136,0.8672437,0.866528,0.8441167,0.8441167,0.8441167,0.8441167,0.055153,0.05327,0.281006,0.276871,0.0,0.0,3.0,4.0,2020.0,0.0,0.0,0.0,0.0
max,1.697516,1.691818,1.678898,1.691818,1.691818,1.691818,1.691818,0.096061,0.066624,0.585165,0.358972,1.0,1.0,4.0,4.0,2021.0,1.0,1.0,1.0,1.0


In [11]:
all_factors.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,momentum_252_day_logret,mean_reversion_5_day_logret_smoothed,overnight_sentiment_5_day_smoothed,annualzed_volatility_20_day,annualzed_volatility_120_day,average_dollar_volume_20_day,average_dollar_volume_120_day,market_dispersion20_day,market_dispersion120_day,market_volatility20_day,market_volatility120_day,is_January,is_December,weekday,quarter,year,month_start,month_end,quarter_start,quarter_end
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2019-07-03 00:00:00+00:00,AAPL,1.14607,-1.25522,0.487377,-0.054575,0.600323,1.691818,1.691818,0.027712,0.033048,0.108734,0.124733,0,0,2,3,2019,0,0,0,0
2019-07-03 00:00:00+00:00,AEI,0.436598,0.764047,-1.353826,-1.309795,-1.309795,-1.309795,-1.309795,0.027712,0.033048,0.108734,0.124733,0,0,2,3,2019,0,0,0,0
2019-07-03 00:00:00+00:00,AIH,0.436598,0.764047,-1.462132,-1.309795,-1.309795,-1.309795,-1.309795,0.027712,0.033048,0.108734,0.124733,0,0,2,3,2019,0,0,0,0
2019-07-03 00:00:00+00:00,BABA,-0.272874,-1.691818,1.570438,1.036921,0.163724,1.582669,1.473519,0.027712,0.033048,0.108734,0.124733,0,0,2,3,2019,0,0,0,0
2019-07-03 00:00:00+00:00,CAN,0.436598,0.764047,-1.570438,-1.309795,-1.309795,-1.309795,-1.309795,0.027712,0.033048,0.108734,0.124733,0,0,2,3,2019,0,0,0,0
