<font size="6"> **Multifactor Risk Models** </font>

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
%run ../nb_config.py

In [3]:
import os

import numpy as np
import scipy

from src import utils
from src.load_data import io_utils
from src.mle_quant_utils import quant_helper, quant_factors

In [4]:
from zipline.data import bundles
from zipline.utils.calendars import get_calendar
from zipline.pipeline.factors import AverageDollarVolume, Returns, DailyReturns, SimpleMovingAverage, AnnualizedVolatility, CustomFactor
from zipline.pipeline.data import USEquityPricing
from zipline.pipeline import Pipeline
from zipline.data.data_portal import DataPortal

import alphalens as al

In [6]:
cfg = utils.read_conf()

BUNDLE_FOLDER = cfg['quantopian']['dataset2']['bundle_folder']
BUNDLE_NAME = cfg['quantopian']['dataset2']['bundle_name']
SECTOR_FOLDER = cfg['quantopian']['dataset2']['sector_folder']
SECTOR_DATA = cfg['quantopian']['dataset2']['sector_data']
SECTOR_NAMES = cfg['quantopian']['dataset2']['sector_names']

# Specify the bundle path
bundle_path = os.path.join(io_utils.raw_path, 'market_data', BUNDLE_FOLDER)
sector_path = os.path.join(io_utils.raw_path, 'market_data',SECTOR_FOLDER, SECTOR_DATA)
sector_file = os.path.join(io_utils.raw_path, 'market_data',SECTOR_FOLDER, SECTOR_NAMES)
os.path.isdir(bundle_path)

True

In [14]:
INFILE1 = cfg['output']['interim']['all_factors']

In [7]:
OUTFILE1 = cfg['output']['interim']['pricing']
OUTFILE2 = cfg['output']['interim']['volume']

In [8]:
adv_win = int(cfg['models']['universe']['window'])
adv_top = int(cfg['models']['universe']['adv_top'])
print('Universe ADV window: {} and top threshold: {}'.format(adv_win, adv_top))

Universe ADV window: 120 and top threshold: 500


In [None]:
universe_end_date =  pd.Timestamp( cfg['models']['universe']['start'], tz='UTC') # pd.Timestamp('2016-01-05', tz='UTC')
universe_start_date =  pd.Timestamp( cfg['models']['universe']['end_risk'], tz='UTC')  # universe_end_date - pd.DateOffset(years=5)
print('Universe start: {} and end: {} dates'.format(universe_end_date, universe_start_date))

In [None]:
split_col = cfg['models']['alpha_ml']['split_col']

In [None]:
MODEL_VERSION = 'v01'
print('Model version: ', MODEL_VERSION)

# Load Data

## Data Bundle

In [16]:
# Data Bundle
os.environ['ZIPLINE_ROOT'] = bundle_path
ingest_func = bundles.csvdir.csvdir_equities(['daily'], BUNDLE_NAME)
bundles.register(BUNDLE_NAME, ingest_func)
print('Data Registered')

Data Registered


In [17]:
universe = AverageDollarVolume(window_length=adv_win).top(adv_top) 
trading_calendar = get_calendar('NYSE') 
bundle_data = bundles.load(BUNDLE_NAME)
engine =  quant_helper.build_pipeline_engine(bundle_data, trading_calendar)

In [18]:
universe_tickers = engine\
    .run_pipeline(
        Pipeline(screen=universe),
        universe_end_date,
        universe_end_date)\
    .index.get_level_values(1)\
    .values.tolist()
    
universe_tickers[:5]

[Equity(0 [A]),
 Equity(1 [AAL]),
 Equity(2 [AAP]),
 Equity(3 [AAPL]),
 Equity(4 [ABBV])]

## Data Portal
Not that we have our pipeline built, let's access the returns data. We'll start by building a data portal.

In [19]:
data_portal = DataPortal(
    bundle_data.asset_finder,
    trading_calendar=trading_calendar,
    first_trading_day=bundle_data.equity_daily_bar_reader.first_trading_day,
    equity_minute_reader=None,
    equity_daily_reader=bundle_data.equity_daily_bar_reader,
    adjustment_reader=bundle_data.adjustment_reader)

In [20]:
pricing = quant_helper.get_pricing(
        data_portal,
        trading_calendar,
        universe_tickers,
        universe_start_date,
        universe_end_date)
   
volume  = quant_helper.get_pricing(
        data_portal,
        trading_calendar,
        universe_tickers,
        universe_start_date,
        universe_end_date, field='volume')

  end_dt = pd.Timestamp(end_date.strftime('%Y-%m-%d'), tz='UTC', offset='C')
  start_dt = pd.Timestamp(start_date.strftime('%Y-%m-%d'), tz='UTC', offset='C')


# Data Partition

In [22]:
valid_dates = splits[splits=='valid'].index.get_level_values(0)
valid_start = valid_dates[0].strftime('%Y-%m')
valid_end = valid_dates[-1].strftime('%Y-%m')

In [23]:
test_dates = splits[splits=='test'].index.get_level_values(0)
test_start = test_dates[0].strftime('%Y-%m')
test_end = test_dates[-1].strftime('%Y-%m')

In [24]:
train_valid_idx = splits[(splits=='train') | (splits=='valid')].index.get_level_values(0).drop_duplicates()

# Write Data

In [25]:
pricing.columns = [x.symbol for x in pricing.columns]
pricing.index.name = 'date'
pd.concat([pricing.head(2), pricing.tail(2)], axis=0)

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACN,ADBE,ADI,ADM,ADP,ADS,ADSK,AEE,AEP,AES,AET,AFL,AGN,AIG,AIV,AIZ,AJG,AKAM,...,WBA,WDC,WEC,WFC,WHR,WM,WMB,WMT,WRK,WU,WY,WYN,WYNN,XEC,XEL,XL,XLNX,XOM,XRAY,XRX,XYL,YUM,ZBH,ZION,ZTS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2011-01-06 00:00:00+00:00,57.014,10.822,59.209,42.399,,31.088,19.926,41.27,32.27,30.936,26.23,34.99,70.301,41.26,20.703,26.659,10.986,29.308,23.236,50.648,46.749,20.247,32.972,22.599,48.45,...,35.023,28.571,22.424,26.328,73.779,28.636,14.06,44.446,,15.49,15.583,25.603,85.93,85.815,17.834,19.045,25.095,59.697,33.386,25.945,,30.184,49.601,23.427,
2011-01-07 00:00:00+00:00,57.495,10.976,60.79,42.702,,31.15,20.009,41.338,32.04,30.756,26.445,35.07,70.026,40.76,20.703,26.555,10.986,29.371,23.145,51.44,47.314,20.088,32.596,22.458,48.69,...,34.814,28.589,22.594,25.796,73.468,28.479,14.283,44.545,,15.346,16.011,25.509,88.972,87.296,17.993,19.01,24.954,60.023,33.251,25.583,,30.56,49.592,23.182,
2016-01-04 00:00:00+00:00,130.838,39.933,151.537,100.621,52.526,97.613,40.544,96.984,91.97,51.31,33.183,77.587,267.23,60.31,39.633,53.39,8.493,106.357,27.897,300.779,57.212,36.45,74.077,37.451,52.31,...,79.032,56.158,47.153,49.176,139.143,49.421,22.642,57.521,37.972,16.153,27.113,66.912,65.687,89.186,32.915,36.226,43.194,70.617,58.018,26.056,34.923,49.3,99.714,25.983,46.398
2016-01-05 00:00:00+00:00,131.369,39.552,150.502,98.1,52.307,99.041,40.534,97.489,92.34,50.933,33.553,77.776,269.531,60.39,40.103,53.829,8.611,107.837,27.859,302.012,57.41,37.378,75.313,37.553,52.02,...,76.958,55.916,47.659,49.157,136.146,49.61,22.72,58.887,37.947,16.089,27.177,67.441,66.929,89.761,33.247,36.302,43.836,71.218,59.201,26.005,34.913,49.177,101.79,25.701,47.124


In [27]:
pricing.to_csv(io_utils.interim_path + OUTFILE1)

In [28]:
volume.columns = [x.symbol for x in volume.columns]
volume.index.name = 'date'
pd.concat([volume.head(2), volume.tail(2)], axis=0)

Unnamed: 0_level_0,A,AAL,AAP,AAPL,ABBV,ABC,ABT,ACN,ADBE,ADI,ADM,ADP,ADS,ADSK,AEE,AEP,AES,AET,AFL,AGN,AIG,AIV,AIZ,AJG,AKAM,...,WBA,WDC,WEC,WFC,WHR,WM,WMB,WMT,WRK,WU,WY,WYN,WYNN,XEC,XEL,XL,XLNX,XOM,XRAY,XRX,XYL,YUM,ZBH,ZION,ZTS
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1
2011-01-06 00:00:00+00:00,7507200.0,11968097.0,1513500.0,75107200.0,0.0,3344200.0,32471000.0,3859000.0,6230900.0,2987200.0,12048900.0,3760500.0,727700.0,2867800.0,1327800.0,3570500.0,13255400.0,5640500.0,5649000.0,1430300.0,11474200.0,1094800.0,825100.0,312400.0,2396600.0,...,8372500.0,3373700.0,2583600.0,32500000.0,1506800.0,2022000.0,3980600.0,15585500.0,0.0,6778200.0,10720000.0,1754100.0,2051200.0,1054100.0,2784400.0,6792000.0,4915000.0,22525300.0,500700.0,1782475.0,0.0,4095300.0,4292700.0,4421700.0,0.0
2011-01-07 00:00:00+00:00,6797000.0,11708043.0,1160600.0,77982800.0,0.0,2971000.0,21604400.0,2914600.0,6828500.0,3963300.0,8929400.0,3759600.0,413400.0,3329000.0,1891900.0,2727900.0,10993300.0,2570000.0,4870000.0,1170100.0,12774600.0,1098100.0,1090300.0,303700.0,2754100.0,...,6303500.0,4529700.0,2592800.0,81572300.0,1231800.0,1764300.0,7489000.0,7968600.0,0.0,4319400.0,11242200.0,1373700.0,3966000.0,664100.0,2731900.0,3849700.0,5513900.0,19297700.0,447700.0,2193150.0,0.0,3379100.0,2686900.0,5059400.0,0.0
2016-01-04 00:00:00+00:00,5719241.0,12037151.0,1935307.0,67649387.0,10308397.0,2326986.0,12227458.0,2817024.0,2993832.0,2648908.0,4040773.0,2246327.0,477420.0,2070653.0,2664386.0,4087757.0,4813485.0,4890511.0,4948306.0,3479796.0,10538617.0,1140015.0,734068.0,1163776.0,4341097.0,...,6347625.0,3024420.0,1877413.0,25984432.0,1097733.0,2067325.0,10992721.0,11988833.0,1646254.0,5887526.0,3556643.0,1626758.0,4249811.0,1109999.0,2819292.0,2922717.0,3472249.0,20399059.0,1387142.0,2280638.0,1353443.0,3466289.0,2042842.0,3523301.0,2871687.0
2016-01-05 00:00:00+00:00,3349871.0,10514164.0,1127677.0,55790992.0,7179634.0,2049940.0,8245208.0,2408961.0,1821298.0,2689457.0,3863221.0,2036731.0,362952.0,1323627.0,2295771.0,3434514.0,4393783.0,3476677.0,4099924.0,2187588.0,7832494.0,1692417.0,504992.0,1117199.0,1505033.0,...,7603141.0,1563237.0,2421124.0,14743907.0,981819.0,2401370.0,12394345.0,13325063.0,1566922.0,3689314.0,2722797.0,1692417.0,4286074.0,1557512.0,2141174.0,2223793.0,3895832.0,11992697.0,1611309.0,1620568.0,1075354.0,3791087.0,2227988.0,2653585.0,3117022.0


In [29]:
volume.to_csv(io_utils.interim_path + OUTFILE2)
