# Portfolio Workflow

In [1]:
import logging
import logging.config

logging.config.fileConfig('./config/logging.ini')
logger = logging.getLogger('GenerateAlphaFactors')

import configparser
from platform import python_version
from pathlib import Path

# Set the import path for the project tools directiory
import sys
# insert at position 1 in the path, as 0 is the path of this file.
sys.path.insert(1, 'tools')

# Project imports
import importlib
import trading_factors_yahoo as alpha_factors
importlib.reload(alpha_factors)
import utils
importlib.reload(utils)

import time
from datetime import datetime
import os
import pandas as pd
import numpy as np
import math
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import pickle

%matplotlib inline
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (20, 8)

logger.info(f'Python version: {python_version()}')
logger.info(f'Pandas version: {pd.__version__}')

2022-04-23 17:37:56,866|numexpr.utils|INFO|NumExpr defaulting to 4 threads.
2022-04-23 17:38:00,429|GenerateAlphaFactors|INFO|Python version: 3.8.8
2022-04-23 17:38:00,430|GenerateAlphaFactors|INFO|Pandas version: 1.3.5


In [2]:
config = configparser.ConfigParser()
config.read('./config/config.ini')
default_config = config["DEFAULT"]

# Price History data

In [3]:
price_histories_file_name = default_config["DataDirectory"] + '/' + default_config["PriceHistoriesFileName"]
logger.info(f'PRICE_HISTORIES_FILE|{price_histories_file_name}...')
price_histories = pd.read_csv(price_histories_file_name, header=[0, 1], index_col=[0], parse_dates=True, low_memory=False)
logger.info(f'PRICE_HISTORIES|{price_histories.index.min()}|{price_histories.index.max()}')
logger.info(f'Using {default_config["NumberOfYearsForAlpha"]} years of price history data to generate alpha factors.')
latest_date = price_histories.index.max() 
earliest_date = latest_date - pd.DateOffset(years=int(default_config["NumberOfYearsForAlpha"]))
price_histories = price_histories[(price_histories.index >= earliest_date) & (price_histories.index <= latest_date)]
logger.info(f'PRICE_HISTORIES_ALPHA|{price_histories.index.min()}|{price_histories.index.max()}')
close = price_histories.Close
logger.info(f'STOCK_TICKERS|{len(close.columns)}')
alpha_factors_file_name = default_config["DataDirectory"] + '/' + default_config["AlphaFactorsFileName"]

2022-04-23 17:38:00,473|GenerateAlphaFactors|INFO|PRICE_HISTORIES_FILE|./data/price_histories_yahoo.csv...
2022-04-23 17:38:01,366|GenerateAlphaFactors|INFO|PRICE_HISTORIES|2017-04-24 00:00:00|2022-04-22 00:00:00
2022-04-23 17:38:01,367|GenerateAlphaFactors|INFO|Using 5 years of price history data to generate alpha factors.
2022-04-23 17:38:01,382|GenerateAlphaFactors|INFO|PRICE_HISTORIES_ALPHA|2017-04-24 00:00:00|2022-04-22 00:00:00
2022-04-23 17:38:01,384|GenerateAlphaFactors|INFO|STOCK_TICKERS|431
2022-04-23 17:38:01,385|GenerateAlphaFactors|INFO|ALPHA_FACTORS_FILE|./data/all_factors.csv


# Stage 2a: Generate Alpha Factors using Stock Price History data

- Compute custom apha factors
- Save Alpha Factors

In [5]:
logger.info('Gathering snp500 stock ticker sector data...')
snp_500_stocks = utils.get_snp500()
sector_helper = alpha_factors.get_sector_helper(snp_500_stocks, 'GICS Sector', close.columns)
logger.info(f'Stock sector information gatherd.')
alpha_factors_list = []

logger.info(f'Generate Momentum alpha factors...')
alpha_factors_list.append(alpha_factors.FactorMomentum(price_histories, 252).demean(groupby=sector_helper.values()).rank().zscore().for_al())

logger.info(f'Generate Trailing Overnight Returns alpha factors...')
alpha_factors_list.append(alpha_factors.TrailingOvernightReturns(price_histories, 10).rank().zscore().smoothed(10).rank().zscore().for_al())

logger.info(f'Mean Reversion alpha factors...')
alpha_factors_list.append(alpha_factors.FactorMeanReversion(price_histories, 120).demean(groupby=sector_helper.values()).rank().zscore().for_al())

logger.info(f'Annulized Volatility alpha factors...')
alpha_factors_list.append(alpha_factors.AnnualizedVolatility(price_histories, 20).rank().zscore().for_al())

logger.info(f'Combining {len(alpha_factors_list)} alphas into one dataframe...')
all_factors = pd.concat(alpha_factors_list, axis=1)
all_factors.sort_index(inplace=True)
all_factors = all_factors.dropna()

logger.info(f'ALPHA_FACTORS_FILE|{alpha_factors_file_name}')
all_factors.to_csv(alpha_factors_file_name)
logger.info('Alpha factors saved.')

for alpha_factor in all_factors.columns:
    logger.info(f'ALPHA_FACTOR|{alpha_factor}')

2022-04-23 17:40:05,727|GenerateAlphaFactors|INFO|Gathering snp500 stock ticker sector data...
2022-04-23 17:40:06,482|GenerateAlphaFactors|INFO|Stock sector information gatherd.
2022-04-23 17:40:06,638|GenerateAlphaFactors|INFO|Generate Momentum alpha factors...
2022-04-23 17:40:06,761|GenerateAlphaFactors|INFO|Generate Trailing Overnight Returns alpha factors...
2022-04-23 17:40:07,046|GenerateAlphaFactors|INFO|Mean Reversion alpha factors...
2022-04-23 17:40:07,165|GenerateAlphaFactors|INFO|Annulized Volatility alpha factors...
2022-04-23 17:40:07,333|GenerateAlphaFactors|INFO|Combining 4 alphas into one dataframe...
2022-04-23 17:40:45,643|GenerateAlphaFactors|INFO|ALPHA_FACTORS_FILE|./data/all_factors.csv
2022-04-23 17:40:49,464|GenerateAlphaFactors|INFO|Alpha factors saved.
2022-04-23 17:40:49,464|GenerateAlphaFactors|INFO|ALPHA_FACTOR|momentum_252_day
2022-04-23 17:40:49,464|GenerateAlphaFactors|INFO|ALPHA_FACTOR|trailing_overnight_returns_10_day_smoothed
2022-04-23 17:40:49,464