# Portfolio Workflow

In [1]:
import logging
import logging.config

logging.config.fileConfig('./config/logging.ini')
logger = logging.getLogger('GenerateAlphaAndBetaFactors')

import configparser
from platform import python_version
from pathlib import Path

# Set the import path for the project tools directiory
import sys
# insert at position 1 in the path, as 0 is the path of this file.
sys.path.insert(1, 'tools')

# Project imports
import importlib
import trading_factors_yahoo as alpha_factors
importlib.reload(alpha_factors)
import utils
importlib.reload(utils)

import time
from datetime import datetime
import os
import pandas as pd
import numpy as np
import math
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pickle

%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (20, 8)

logger.info(f'Python version: {python_version()}')
logger.info(f'Pandas version: {pd.__version__}')

2022-05-14 13:04:06,119|numexpr.utils|INFO|NumExpr defaulting to 4 threads.
2022-05-14 13:04:09,658|GenerateAlphaAndBetaFactors|INFO|Python version: 3.8.8
2022-05-14 13:04:09,658|GenerateAlphaAndBetaFactors|INFO|Pandas version: 1.3.5


In [2]:
config = configparser.ConfigParser()
config.read('./config/config.ini')
default_config = config["DEFAULT"]

# Price History data

In [3]:
price_histories_file_name = default_config["DataDirectory"] + '/' + default_config["PriceHistoriesFileName"]
logger.info(f'PRICE_HISTORIES_FILE|{price_histories_file_name}...')
price_histories = pd.read_csv(price_histories_file_name, header=[0, 1], index_col=[0], parse_dates=True, low_memory=False)
logger.info(f'PRICE_HISTORIES|{price_histories.index.min()}|{price_histories.index.max()}')
logger.info(f'Using {default_config["NumberOfYearsForAlpha"]} years of price history data to generate alpha factors.')
latest_date = price_histories.index.max() 
earliest_date = latest_date - pd.DateOffset(years=int(default_config["NumberOfYearsForAlpha"]))
price_histories = price_histories[(price_histories.index >= earliest_date) & (price_histories.index <= latest_date)]
logger.info(f'PRICE_HISTORIES_ALPHA|{price_histories.index.min()}|{price_histories.index.max()}')
close = price_histories.Close
logger.info(f'STOCK_TICKERS|{len(close.columns)}')
alpha_factors_file_name = default_config["DataDirectory"] + '/' + default_config["AlphaFactorsFileName"]
beta_factors_file_name = default_config["DataDirectory"] + '/' + default_config["BetaFactorsFileName"]

2022-05-14 13:04:09,704|GenerateAlphaAndBetaFactors|INFO|PRICE_HISTORIES_FILE|./data/price_histories_yahoo.csv...
2022-05-14 13:04:10,738|GenerateAlphaAndBetaFactors|INFO|PRICE_HISTORIES|2017-05-15 00:00:00|2022-05-13 00:00:00
2022-05-14 13:04:10,739|GenerateAlphaAndBetaFactors|INFO|Using 5 years of price history data to generate alpha factors.
2022-05-14 13:04:10,756|GenerateAlphaAndBetaFactors|INFO|PRICE_HISTORIES_ALPHA|2017-05-15 00:00:00|2022-05-13 00:00:00
2022-05-14 13:04:10,758|GenerateAlphaAndBetaFactors|INFO|STOCK_TICKERS|501


# Stage 2a: Generate Alpha Factors using Stock Price History data

- Compute custom apha factors
- Save Alpha Factors

In [4]:
logger.info('Gathering snp500 stock ticker sector data...')
snp_500_stocks = utils.get_snp500()
sector_helper = alpha_factors.get_sector_helper(snp_500_stocks, 'GICS Sector', close.columns)
logger.info(f'Stock sector information gatherd.')
alpha_factors_list = []

logger.info(f'Generate Momentum alpha factors...')
alpha_factors_list.append(alpha_factors.FactorMomentum(price_histories, 252).demean(groupby=sector_helper.values()).rank().zscore().for_al())

logger.info(f'Generate Trailing Overnight Returns alpha factors...')
alpha_factors_list.append(alpha_factors.TrailingOvernightReturns(price_histories, 10).rank().zscore().smoothed(10).rank().zscore().for_al())

logger.info(f'Mean Reversion alpha factors...')
alpha_factors_list.append(alpha_factors.FactorMeanReversion(price_histories, 120).demean(groupby=sector_helper.values()).rank().zscore().for_al())

logger.info(f'Annulized Volatility alpha factors...')
alpha_factors_list.append(alpha_factors.AnnualizedVolatility(price_histories, 20).rank().zscore().for_al())

logger.info(f'Combining {len(alpha_factors_list)} alphas into one dataframe...')
all_factors = pd.concat(alpha_factors_list, axis=1)
all_factors.sort_index(inplace=True)
all_factors = all_factors.dropna()

if len(all_factors) == 0:
    logger.error(f'ALPHA_FACTORS_EMPTY|{len(all_factors)}')
    raise RuntimeError(f'Alpha Factors contains no data({len(all_factors)})') from None

logger.info(f'ALPHA_FACTORS_FILE|{alpha_factors_file_name}')
all_factors.to_csv(alpha_factors_file_name)
logger.info('Alpha factors saved.')

for alpha_factor in all_factors.columns:
    logger.info(f'ALPHA_FACTOR|{alpha_factor}')

2022-05-14 13:04:10,777|GenerateAlphaAndBetaFactors|INFO|Gathering snp500 stock ticker sector data...
2022-05-14 13:04:11,480|GenerateAlphaAndBetaFactors|INFO|Stock sector information gatherd.
2022-05-14 13:04:11,496|GenerateAlphaAndBetaFactors|INFO|Generate Momentum alpha factors...
2022-05-14 13:04:11,639|GenerateAlphaAndBetaFactors|INFO|Generate Trailing Overnight Returns alpha factors...
2022-05-14 13:04:11,982|GenerateAlphaAndBetaFactors|INFO|Mean Reversion alpha factors...
2022-05-14 13:04:12,123|GenerateAlphaAndBetaFactors|INFO|Annulized Volatility alpha factors...
2022-05-14 13:04:12,319|GenerateAlphaAndBetaFactors|INFO|Combining 4 alphas into one dataframe...
2022-05-14 13:05:02,681|GenerateAlphaAndBetaFactors|INFO|ALPHA_FACTORS_FILE|./data/all_factors.csv
2022-05-14 13:05:07,285|GenerateAlphaAndBetaFactors|INFO|Alpha factors saved.
2022-05-14 13:05:07,285|GenerateAlphaAndBetaFactors|INFO|ALPHA_FACTOR|momentum_252_day
2022-05-14 13:05:07,285|GenerateAlphaAndBetaFactors|INFO|AL

# Stage 2b: Generate Beta Factors

- Use Risk Model
- Compute Daily Betas for 1 year

In [6]:
logger.info(f'Generate beta factors...')
returns = alpha_factors.FactorReturns(price_histories).factor_data.dropna()
end_date = returns.index.max()
number_of_beta_years = int(default_config["NumberOfYearsPriceHistories"]) - 1
start_date = end_date - pd.offsets.DateOffset(years=number_of_beta_years)
logger.info(f'Generating {number_of_beta_years} year Betas from {start_date} to {end_date}')
beta_dates = pd.date_range(start_date, end_date, freq='D')
daily_betas = {}
for beta_date in tqdm(returns[start_date:].index, desc='Dates', unit=' Daily Beta'):
    start_of_returns = beta_date - pd.offsets.DateOffset(years=1)
    beta_returns = returns.loc[start_of_returns:beta_date]
    risk_model = alpha_factors.RiskModelPCA(beta_returns, 1, 20)
    daily_betas[beta_date.strftime('%m/%d/%Y')] = risk_model

logger.info(f'BETA_FACTORS_FILE|{beta_factors_file_name}')
with open(beta_factors_file_name, 'wb') as f:
    # Pickle the 'data' dictionary using the highest protocol available.
    pickle.dump(daily_betas, f, pickle.HIGHEST_PROTOCOL)

2022-05-14 13:07:49,548|GenerateAlphaAndBetaFactors|INFO|Generate beta factors...
2022-05-14 13:07:49,608|GenerateAlphaAndBetaFactors|INFO|Generating 4 year Betas from 2018-05-13 00:00:00 to 2022-05-13 00:00:00


Dates:   0%|          | 0/1009 [00:00<?, ? Daily Beta/s]

2022-05-14 13:08:28,526|GenerateAlphaAndBetaFactors|INFO|BETA_FACTORS_FILE|./data/daily_beta.pickle
