In [1]:
# Machine Learning Algo using Logistic Regression and Factor analysis to predict returns 21 days out
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import Latest
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.data import morningstar
from quantopian.pipeline import CustomFactor
from quantopian.pipeline.factors import Returns
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.filters import Q500US, Q1500US
from quantopian.pipeline.data.zacks import EarningsSurprises

import pandas as pd
import numpy as np
from time import time
import math

import matplotlib.pyplot as plt
from sklearn import ensemble, preprocessing, metrics
from sklearn.linear_model import LogisticRegression

In [2]:
n_fwd_days = 21

In [9]:
bs = morningstar.balance_sheet
cfs = morningstar.cash_flow_statement
is_ = morningstar.income_statement
or_ = morningstar.operation_ratios
er = morningstar.earnings_report
v = morningstar.valuation
vr = morningstar.valuation_ratios

def make_pipeline():
    
    base_universe = Q1500US()
    
    class StdDev(CustomFactor):
        """
        
        Calculate 3 months realized volatility
        
        """
        
        def compute(self, today, asset_ids, out, values):
            # will calculate the column wise standard deviation, ignoring NaNs
            out[:] = np.nanstd(values, axis=0)
            
    class Momentum(CustomFactor):
        # Default inputs
        inputs = [USEquityPricing.close]
        """
        Calculates 1-month price momentum
        1 month closiing price rate of change
        
        Notes High value suggest momentum (shorter term)
        Equivalent to analysis of returns (1-month window)
        
        source:https://www.pnc.com/content/dam/pnc-com/pdf/personal/wealth-investments/WhitePapers/FactorAnalysisFeb2014.pdf
        
        """
        
        # Compute momentum
        def compute(self,today, assets,out,close):
            out[:] = close[-1] / close[0]
            
    class Mean_Reversion_1M(CustomFactor):
        inputs =[Returns(window_length = 21)]
        window_length = 252
        
        def compute(self, today, assets, out, monthly_rets):
            out[:] = (monthly_rets[-1] - np.nanmean(monthly_rets, axis=0)) / \
            np.nanstd(monthly_rets, axis=0)
            
    class Price_Oscillator(CustomFactor):
        """
        4/52-Week Price Oscillator:
        Average close prices over 4 weeks divided by average close prices over 52 weeks all less 1
        source: https://www.math.nyu.edu/faculty/avellane/Lo13030.pdf
        
        Notes:
        A high value suggests momentum
        
        """
        
        inputs = [USEquityPricing.close]
        window_length = 252
        
        def compute(self, today, assets, out, close):
            four_week_period = close[-20:]
            out[:] = (np.nanmean(four_week_period, axis=0) / np.nanmean(close, axis=0))-1
        
    def Earnings_Quality():
        return cfs.operating_cash_flow.latest / \
        EarningsSurprises.eps_act.latest
    
    Price_Momentum_1M = Momentum(window_length=21)
    std_dev = StdDev(inputs=[USEquityPricing.close], window_length=63, mask=base_universe)
    Price_Oscillator = Price_Oscillator()
    
    return Pipeline(
        columns={'pb_ratio': vr.pb_ratio.latest,
                 'gp_ta': is_.gross_profit.latest / bs.total_assets.latest,
                 'roe': or_.roe.latest,
                 'net_margin': or_.net_margin.latest,
                 'assets_turnover': or_.assets_turnover.latest,
                 'gearing': bs.total_debt.latest / bs.total_equity.latest,
                 'forward_earning_yeild': vr.forward_earning_yield.latest,
                 'cf_yield': vr.cf_yield.latest,
                 'dividend_yield': vr.dividend_yield.latest,
                 'market_cap': v.market_cap.latest,
                 'vol':std_dev,
                 'Price_Momentum_1M': Price_Momentum_1M,
                 'Earnings_Quality': cfs.operating_cash_flow.latest / EarningsSurprises.eps_act.latest,
                 'Price_Oscillator': Price_Oscillator,
        }, screen=base_universe
    
    )

In [15]:
start = pd.Timestamp("2018-01-01")
end = pd.Timestamp("2018-02-01")
end_1m = pd.Timestamp("2018-02-20")
test_start = pd.Timestamp("2018-03-01")

In [16]:
start_timer = time()
results = run_pipeline(make_pipeline(), start,end)
end_timer = time()
print("Time to run pipeline %.2f secs" %(end_timer - start_timer))

Time to run pipeline 10.94 secs


In [17]:
results.index.names = ['date', 'security']

In [18]:
results.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Earnings_Quality,Price_Momentum_1M,Price_Oscillator,assets_turnover,cf_yield,dividend_yield,forward_earning_yeild,gearing,gp_ta,market_cap,net_margin,pb_ratio,roe,vol
date,security,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-01-02 00:00:00+00:00,Equity(2 [ARNC]),688000000.0,1.108581,0.003372,0.168792,0.0623,0.0088,0.055,1.152824,0.03171,13116080000.0,0.031211,2.226839,0.017456,1.288816
2018-01-02 00:00:00+00:00,Equity(24 [AAPL]),7563285000.0,0.985735,0.150833,0.145953,0.0716,0.0145,0.0671,0.862981,0.053104,860882500000.0,0.20377,6.422244,0.080414,7.668757
2018-01-02 00:00:00+00:00,Equity(53 [ABMD]),98084090.0,0.962117,0.296576,0.197232,0.0178,,0.0179,0.026387,0.160074,8286548000.0,0.184479,13.699354,0.041816,9.65717
2018-01-02 00:00:00+00:00,Equity(62 [ABT]),3037879000.0,1.013482,0.162189,0.095245,0.0532,0.0186,0.049,0.750031,0.054977,99336080000.0,0.0883,3.101151,0.018952,0.929016
2018-01-02 00:00:00+00:00,Equity(67 [ADSK]),121666700.0,0.956391,0.037483,0.121162,-0.0027,0.0004,0.011,14.775396,0.105443,23090570000.0,-0.232486,215.196374,-0.566698,8.286992


In [19]:
start_timer = time()
assets = results.index.levels[1].unique()
pricing = get_pricing(assets, start_date=start, end_date=end_1m, fields='price')
end_timer = time()
print("Time to extract prices %.2f secs" %(end_timer-start_timer))

Time to extract prices 0.83 secs


In [20]:
# historical Pricing data
pricing.head()

Unnamed: 0,Equity(2 [ARNC]),Equity(24 [AAPL]),Equity(53 [ABMD]),Equity(62 [ABT]),Equity(67 [ADSK]),Equity(76 [TAP]),Equity(114 [ADBE]),Equity(122 [ADI]),Equity(128 [ADM]),Equity(161 [AEP]),...,Equity(51016 [JBGS]),Equity(51046 [BHF]),Equity(51079 [RDFN]),Equity(51091 [VNTR]),Equity(51157 [DWDP]),Equity(51205 [BKI]),Equity(51231 [ROKU]),Equity(51268 [SWCH]),Equity(51494 [DLPH]),Equity(51689 [PAGS])
2018-01-02 00:00:00+00:00,27.705,171.551,192.43,58.51,107.1,82.25,177.69,90.31,39.769,71.743,...,34.54,57.67,31.04,22.85,71.89,45.9,52.011,17.17,54.898,
2018-01-03 00:00:00+00:00,27.914,171.481,195.77,58.63,109.36,81.46,180.98,91.37,39.481,71.109,...,34.46,58.52,28.17,23.11,73.03,45.75,55.38,17.28,53.792,
2018-01-04 00:00:00+00:00,29.261,172.317,199.3,58.54,112.09,82.05,183.21,91.29,40.136,70.277,...,33.68,59.5,27.3,23.01,74.45,46.9,51.169,17.26,56.264,
2018-01-05 00:00:00+00:00,29.541,174.26,202.28,58.709,110.82,82.93,185.35,91.64,39.858,70.099,...,33.63,60.61,28.19,23.47,75.36,46.85,46.25,17.58,56.294,
2018-01-08 00:00:00+00:00,29.691,173.632,207.8,58.56,111.41,83.87,185.02,91.84,39.769,70.733,...,33.74,60.49,27.68,23.62,75.14,48.775,47.45,17.98,55.676,


In [23]:
# calculate the 1 month forward returns predictions

df = pricing.pct_change(n_fwd_days).shift(-n_fwd_days)
df = df[:end]
df.head()

Unnamed: 0,Equity(2 [ARNC]),Equity(24 [AAPL]),Equity(53 [ABMD]),Equity(62 [ABT]),Equity(67 [ADSK]),Equity(76 [TAP]),Equity(114 [ADBE]),Equity(122 [ADI]),Equity(128 [ADM]),Equity(161 [AEP]),...,Equity(51016 [JBGS]),Equity(51046 [BHF]),Equity(51079 [RDFN]),Equity(51091 [VNTR]),Equity(51157 [DWDP]),Equity(51205 [BKI]),Equity(51231 [ROKU]),Equity(51268 [SWCH]),Equity(51494 [DLPH]),Equity(51689 [PAGS])
2018-01-02 00:00:00+00:00,0.086446,-0.026418,0.312685,0.062382,0.079458,0.015684,0.122067,0.014506,0.059393,-0.063644,...,-0.043109,0.111479,-0.365657,-0.0186,0.022256,0.078976,-0.232855,-0.036692,0.013261,
2018-01-03 00:00:00+00:00,0.042846,-0.068357,0.267355,0.051509,0.020849,-0.005524,0.080948,-0.024625,0.040728,-0.056547,...,-0.040917,0.061347,-0.2989,-0.090437,-0.031357,0.06776,-0.270134,-0.08015,0.009072,
2018-01-04 00:00:00+00:00,-0.093674,-0.095533,0.201054,0.003075,-0.025872,-0.053016,0.038535,-0.064629,0.003214,-0.063278,...,-0.04899,-0.006218,-0.279121,-0.125598,-0.089322,0.018124,-0.215736,-0.097914,-0.082006,
2018-01-05 00:00:00+00:00,-0.14424,-0.068071,0.147321,0.002913,0.016243,-0.059568,0.049474,-0.033828,0.045562,-0.082797,...,-0.049955,-0.030193,-0.217453,-0.094163,-0.046178,0.034152,-0.094054,-0.141069,-0.050165,
2018-01-08 00:00:00+00:00,-0.154626,-0.085007,0.150192,0.001878,-0.007719,-0.092166,0.039671,-0.070557,0.052654,-0.097875,...,-0.058091,-0.024632,-0.169436,-0.067316,-0.060687,-0.004613,-0.036059,-0.167964,-0.029025,


In [24]:
df1 = df.stack()
df1

2018-01-02 00:00:00+00:00  Equity(2 [ARNC])        0.086446
                           Equity(24 [AAPL])      -0.026418
                           Equity(53 [ABMD])       0.312685
                           Equity(62 [ABT])        0.062382
                           Equity(67 [ADSK])       0.079458
                           Equity(76 [TAP])        0.015684
                           Equity(114 [ADBE])      0.122067
                           Equity(122 [ADI])       0.014506
                           Equity(128 [ADM])       0.059393
                           Equity(161 [AEP])      -0.063644
                           Equity(166 [AES])       0.069404
                           Equity(168 [AET])       0.040870
                           Equity(185 [AFL])       0.019552
                           Equity(197 [AGCO])      0.022740
                           Equity(216 [HES])       0.057483
                           Equity(239 [AIG])       0.077983
                           Equity(266 [A

In [25]:
df1.index.names = ['date', 'security']

In [26]:
df1

date                       security            
2018-01-02 00:00:00+00:00  Equity(2 [ARNC])        0.086446
                           Equity(24 [AAPL])      -0.026418
                           Equity(53 [ABMD])       0.312685
                           Equity(62 [ABT])        0.062382
                           Equity(67 [ADSK])       0.079458
                           Equity(76 [TAP])        0.015684
                           Equity(114 [ADBE])      0.122067
                           Equity(122 [ADI])       0.014506
                           Equity(128 [ADM])       0.059393
                           Equity(161 [AEP])      -0.063644
                           Equity(166 [AES])       0.069404
                           Equity(168 [AET])       0.040870
                           Equity(185 [AFL])       0.019552
                           Equity(197 [AGCO])      0.022740
                           Equity(216 [HES])       0.057483
                           Equity(239 [AIG])       0

In [31]:
df1.name = '1month_predicted_returns'

In [35]:
df_combined = results.join(df1)
df_combined.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Earnings_Quality,Price_Momentum_1M,Price_Oscillator,assets_turnover,cf_yield,dividend_yield,forward_earning_yeild,gearing,gp_ta,market_cap,net_margin,pb_ratio,roe,vol,1month_predicted_returns
date,security,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2018-01-02 00:00:00+00:00,Equity(2 [ARNC]),688000000.0,1.108581,0.003372,0.168792,0.0623,0.0088,0.055,1.152824,0.03171,13116080000.0,0.031211,2.226839,0.017456,1.288816,0.086446
2018-01-02 00:00:00+00:00,Equity(24 [AAPL]),7563285000.0,0.985735,0.150833,0.145953,0.0716,0.0145,0.0671,0.862981,0.053104,860882500000.0,0.20377,6.422244,0.080414,7.668757,-0.026418
2018-01-02 00:00:00+00:00,Equity(53 [ABMD]),98084090.0,0.962117,0.296576,0.197232,0.0178,,0.0179,0.026387,0.160074,8286548000.0,0.184479,13.699354,0.041816,9.65717,0.312685
2018-01-02 00:00:00+00:00,Equity(62 [ABT]),3037879000.0,1.013482,0.162189,0.095245,0.0532,0.0186,0.049,0.750031,0.054977,99336080000.0,0.0883,3.101151,0.018952,0.929016,0.062382
2018-01-02 00:00:00+00:00,Equity(67 [ADSK]),121666700.0,0.956391,0.037483,0.121162,-0.0027,0.0004,0.011,14.775396,0.105443,23090570000.0,-0.232486,215.196374,-0.566698,8.286992,0.079458
