#Predictive Analysis (Copied from Alphalens + Quantopian | How To)

For more information on how to read and understand the plots check out:

- [Quantopian Lecture Series: Factor Analysis](https://www.quantopian.com/lectures#Factor-Analysis)

## Define our factor

In [None]:
import numpy as np
import math
import pandas as pd
from quantopian.research import run_pipeline, returns, get_pricing
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data.factset import Fundamentals as FstFundamentals
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.pipeline import factors, filters, classifiers
import alphalens
from quantopian.pipeline.data.factset.estimates import PeriodicConsensus

def signalize(df):
   return ((df.rank() - 0.5)/df.count()).replace(np.nan,0.5)

def transform(df, field, multiplier=1):
    return signalize(multiplier*df[field])

class MarketCap(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [USEquityPricing.close, Fundamentals.shares_outstanding]
    window_length = 1

    # Compute market cap value
    def compute(self, today, assets, out, close, shares):
        out[:] = close[-1] * shares[-1]

class LogMarketCap(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [USEquityPricing.close, Fundamentals.shares_outstanding]
    window_length = 1

    # Compute market cap value
    def compute(self, today, assets, out, close, shares):
        out[:] = np.log(close[-1] * shares[-1])

class Volatility(CustomFactor):  
    inputs = [USEquityPricing.close]  
    window_length = 20  
    def compute(self, today, assets, out, close):  
        # [0:-1] is needed to remove last close since diff is one element shorter  
        daily_returns = np.diff(close, axis = 0) / close[0:-1]  
        out[:] = daily_returns.std(axis = 0) * math.sqrt(252)

class Industry(CustomFactor):  
    inputs = [Fundamentals.morningstar_industry_code]  
    window_length = 1 
    def compute(self, today, assets, out, code):  
        out[:] = code[-1]

        
class IndustryGroup(CustomFactor):  
    inputs = [Fundamentals.morningstar_industry_group_code]  
    window_length = 1 
    def compute(self, today, assets, out, code):  
        out[:] = code[-1]


class Sector(CustomFactor):  
    inputs = [Fundamentals.morningstar_sector_code]  
    window_length = 1 
    def compute(self, today, assets, out, code):  
        out[:] = code[-1]


class Factor1(CustomFactor):   

    # Pre-declare inputs and window_length
    inputs = [Fundamentals.pcf_ratio] 
    window_length = 2
    
    # Compute factor1 value
    def compute(self, today, assets, out, var):
        out[:] = var[-2]
        
class Factor2(CustomFactor):   

    # Pre-declare inputs and window_length
    inputs = [Fundamentals.ps_ratio] 
    window_length = 2
    
    # Compute factor2 value
    def compute(self, today, assets, out, var):
        out[:] = var[-2]
    
class Factor3(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [Fundamentals.enterprise_value, Fundamentals.free_cash_flow, USEquityPricing.close, Fundamentals.shares_outstanding, Fundamentals.total_assets] 
    window_length = 2
    
    # Compute factor3 value
    def compute(self, today, assets, out, ev, var, close, shares, ta):
        out[:] = var[-2]/(ev[-2]*close[-2]*shares[-2]*ta[-2])**(1./3.)

class Factor4(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [Fundamentals.enterprise_value, Fundamentals.free_cash_flow, USEquityPricing.close, Fundamentals.shares_outstanding, Fundamentals.total_assets] 
    window_length = 2
    
    # Compute factor4 value
    def compute(self, today, assets, out, ev, var, close, shares, ta):
        out[:] = ta[-2]/(ev[-2]*close[-2]*shares[-2])**(1./2.)
        
class Factor5(CustomFactor):
    """
    TEM = standard deviation of past 6 quarters' reports
    """
    inputs = [Fundamentals.capital_expenditure, Fundamentals.enterprise_value] 
    window_length = 390
    
    # Compute factor5 value
    def compute(self, today, assets, out, capex, ev):
        values = capex/ev
        out[:] = values.std(axis = 0)

class Factor6(CustomFactor):  
        inputs = [Fundamentals.forward_earning_yield]  
        window_length = 2

        # Compute factor6 value  
        def compute(self, today, assets, out, syield):  
            out[:] =  syield[-2]

class Factor7(CustomFactor):  
        inputs = [Fundamentals.earning_yield]  
        window_length = 2

        # Compute factor6 value  
        def compute(self, today, assets, out, syield):  
            out[:] =  syield[-2]

class Factor8(CustomFactor):  
        inputs = [Fundamentals.sales_yield]  
        window_length = 2

        # Compute factor6 value  
        def compute(self, today, assets, out, syield):  
            out[:] =  syield[-2]

class Factor9(CustomFactor):
        inputs = [USEquityPricing.high, USEquityPricing.low, USEquityPricing.close, stocktwits.bull_scored_messages, stocktwits.bear_scored_messages, stocktwits.total_scanned_messages]
        window_length = 21
        window_safe = True
        def compute(self, today, assets, out, high, low, close, bull, bear, total):
            v = np.nansum((high-low)/close, axis=0)
            out[:] = v*np.nansum(total*(bear-bull), axis=0)

#Contest Entry#3 - 07/05/2019
class Factor10(CustomFactor):
    inputs = [Fundamentals.capital_expenditure, Fundamentals.cost_of_revenue] 
    window_length = 360
    
    def compute(self, today, assets, out, capex, cr):
        values = capex/cr
        out[:] = values.mean(axis = 0)
        
class Factor11(CustomFactor):
    inputs = [Fundamentals.revenue_growth] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)        

        
class Factor12(CustomFactor):
    inputs = [Fundamentals.gross_margin] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)        

        
class Factor13(CustomFactor):
    inputs = [Fundamentals.quick_ratio] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)        
 
        
class Factor14(CustomFactor):
    inputs = [Fundamentals.ebitda_margin] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = 1/rate.std(axis = 0)        

        
class Factor15(CustomFactor):
    inputs = [Fundamentals.current_ratio] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)
        
class Factor16(CustomFactor):
    inputs = (USEquityPricing.close, Returns(window_length=126))
    window_length = 252
    window_safe = True

    def compute(self, today, assets, out, prices, returns):
        am = np.divide(
            (
                (prices[-21] - prices[-252]) / prices[-252] -
                prices[-1] - prices[-21]
            ) / prices[-21],
            np.nanstd(returns, axis=0)
        )

        out[:] = -am

class Factor17(CustomFactor):
    inputs = [FstFundamentals.oper_inc_gr_ltm] #sales_gr_ltm 
    window_length = 126
    def compute(self, today, assets, out, rate):
        out[:] = rate[-1]
#         rate.mean(axis = 0) 
#         /rate.std(axis = 0) 




        
base_universe = QTradableStocksUS()

mkt_cap = MarketCap(mask = base_universe)
log_mkt_cap = LogMarketCap(mask = base_universe)
vol = Volatility(mask = base_universe)

industry = Industry(mask = base_universe)
industryGroup = IndustryGroup(mask = base_universe)
sector = Sector(mask = base_universe)

# f1 = Factor1(mask = base_universe)
# f2 = Factor2(mask = base_universe)
# f3 = Factor3(mask = base_universe)
# f4 = Factor4(mask = base_universe)
# f5 = Factor5(mask = base_universe)
# f6 = Factor6(mask = base_universe)
# f7 = Factor7(mask = base_universe)
# f8 = Factor8(mask = base_universe)
# f9 = Factor9(mask = base_universe)
# f10 = Factor10(mask = base_universe)
# f11 = Factor11(mask = base_universe)
# f12 = Factor12(mask = base_universe)
# f13 = Factor13(mask = base_universe)
# f14 = Factor14(mask = base_universe)
# f15 = Factor15(mask = base_universe)
# f16 = Factor16(mask = base_universe)
# f17 = Factor17(mask = base_universe)

# f18 = f3.demean(mask = f13.notnan(), groupby=classifiers.fundamentals.Sector())

fq1_eps_cons = PeriodicConsensus.slice('EPS', 'qf', 1)
fq2_eps_cons = PeriodicConsensus.slice('EPS', 'qf', 2)
fq1_eps_mean = fq1_eps_cons.mean.latest
fq2_eps_mean = fq2_eps_cons.mean.latest
estimated_growth_factor = (fq2_eps_mean - fq1_eps_mean) / fq1_eps_mean


pipe_my_alpha_factor = Pipeline(
    columns={
#         'f1': f1,
#         'f2': f2,
        'vol': vol,
        'log_mkt_cap': log_mkt_cap,
        'mkt_cap':mkt_cap,
        'industry': industry,
        'industryGroup': industryGroup,
        'sector': sector,
        'est_grwth_fctr': estimated_growth_factor

    },
    screen=base_universe
)

output = run_pipeline(pipe_my_alpha_factor, '2015-06-01', '2018-09-01')
output.head()

# Market and Volatility Factors
mk = transform(output, 'log_mkt_cap', 1)
mk_inv = transform(output, 'log_mkt_cap', -1)
vl = transform(output, 'vol', -1)

volatility = output['vol']
mkt_cap = output['mkt_cap']

# size_noextreme = mk
# size_noextreme = abs(size_noextreme.sub(size_noextreme.mean()))
# size_noextreme[size_noextreme > 0.45] = 0
# size_noextreme.head(100)
print "Done"


# Define our universe

In [None]:
# Alpha Factors
# alpha1 = signalize(transform(output, 'f1', 1)*mk)        
# alpha2 = signalize(signalize(transform(output, 'f2', 1)*mk)*vl)

# output['mkt_cap'] = mk_inv*output['mkt_cap']
# output['weightedAlpha'] = -output['f1']*output['mkt_cap']
# print output['sector'].head(100)
# print output.index.get_level_values(1)

# output.groupby([output.index.get_level_values(0),'sector']).count()
# gk = output.groupby([output.index.get_level_values(0),'sector'])[['weightedAlpha', 'mkt_cap']].sum().reset_index()
# output['sumWeightedAlpha'] = output['weightedAlpha'].groupby([output.index.get_level_values(0), output['sector']]).transform('sum')
# output['sumWeight'] = output['mkt_cap'].groupby([output.index.get_level_values(0), output['sector']]).transform('sum')
# alpha1_s = signalize(output['sumWeightedAlpha']/output['sumWeight'])


# alpha3 = transform(output, 'f3',1)
# alpha3_o = transform(output, 'f3',1)
# alpha4 = transform(output, 'f4', -1)
# alpha4_o = transform(output, 'f4', -1)
# alpha5 = transform(output, 'f5', -1)
# alpha6 = transform(output, 'f6', -1)
# alpha6_o = signalize(signalize(transform(output, 'f6', -1)*mk)*vl)

# alpha7 = transform(output, 'f7', -1)
# alpha8 = transform(output, 'f8', -1)
# alpha9 = signalize(transform(output, 'f9', 1)*vl)
# alpha9_o = transform(output, 'f9', 1)
# alpha10 = transform(output, 'f10', 1)
# alpha10_o = signalize(signalize(transform(output, 'f10', 1)*mk)*vl)

# alpha11 = transform(output, 'f11', 1)
# alpha12 = transform(output, 'f12', 1)
# alpha13_o = transform(output, 'f13', 1)
# alpha14_o = signalize(transform(output, 'f14', 1)*mk)
# alpha15_o = transform(output, 'f15', 1)

# # New Construction
# alpha13 = signalize(transform(output, 'f13', 1)*mkt_cap)
# alpha14 = signalize(transform(output, 'f14', 1)*(1/mkt_cap))
# alpha15 = signalize(transform(output, 'f15', 1)*(1/mkt_cap))
# alpha16 = signalize(transform(output, 'f16', 1)*(1/volatility))

# alpha17 = transform(output, 'f17', 1)
# alpha17 = signalize(transform(output, 'f17', 1)*mk)
# alpha17 = signalize(transform(output, 'f17', 1)*vl)
# alpha17 = signalize(transform(output, 'f17', 1)*mkt_cap)
# alpha17 = signalize(transform(output, 'f17', 1)*(1/mkt_cap))
# alpha17 = signalize(transform(output, 'f17', 1)*(volatility))
# alpha17 = signalize(transform(output, 'f17', 1)*(1/volatility))



# alpha18 = signalize(transform(output, 'f18', 1)*(1/volatility))
# alpha18 = transform(output, 'f18', 1)

# alpha18 = signalize(signalize(output.groupby(by="industry")['f16'].transform('mean'))*(1/volatility))

# alpha18 = signalize(transform(output, 'f18', 1)*(1/volatility))


# alpha1 = alpha1.sub(alpha1.mean())
# alpha1_s = alpha1_s.sub(alpha1_s.mean())

# alpha2 = alpha2.sub(alpha2.mean())



# alpha3 = alpha3.sub(alpha3.mean())
# alpha3[alpha3 < 0] = 0

# alpha3_o = alpha3_o.sub(alpha3_o.mean())
# alpha3_o[alpha3_o > 0] = 0

# alpha4 = alpha4.sub(alpha4.mean())

# alpha4_o = alpha4_o.sub(alpha4_o.mean())
# alpha4_o[alpha4_o < 0] = 0

# alpha5 = alpha5.sub(alpha5.mean())
# alpha6 = alpha6.sub(alpha6.mean())
# alpha7 = alpha7.sub(alpha7.mean())
# alpha8 = alpha8.sub(alpha8.mean())

# alpha9 = alpha9.sub(alpha9.mean())
# alpha9_o = alpha9_o.sub(alpha9_o.mean())

# alpha10 = alpha10.sub(alpha10.mean())
# alpha10_o = alpha10_o.sub(alpha10_o.mean())

# alpha11 = alpha11.sub(alpha11.mean())
# alpha12 = alpha12.sub(alpha12.mean())

# alpha13 = alpha13.sub(alpha13.mean())
# alpha13_o = alpha13_o.sub(alpha13_o.mean())

# alpha14 = alpha14.sub(alpha14.mean())
# alpha14_o = alpha14_o.sub(alpha14_o.mean())

# alpha15 = alpha15.sub(alpha15.mean())
# alpha15_o = alpha15_o.sub(alpha15_o.mean())

# alpha16 = alpha16.sub(alpha16.mean())
alpha17 = alpha17.sub(alpha17.mean())
# alpha18 = alpha18.sub(alpha18.mean())

# ra =  alpha1 + alpha2 + alpha3 + alpha4 + alpha5 + alpha6 + alpha7 + alpha8 + alpha9 + alpha10 + alpha13 + alpha14 + alpha15 + alpha16
# alpha_factor = ra + alpha17

alpha_factor = alpha17
# + alpha17
# alpha_factor_old = alpha1 + alpha2+ alpha3_o + alpha4_o + alpha5 + alpha6 + alpha7 + alpha9_o + alpha10_o + alpha11 + alpha12 + alpha13_o + alpha14_o + alpha15_o


# print ra.corr(alpha_factor)
# print alpha_factor_old.corr(alpha_factor)
alpha_factor.tail(100)

print alpha17.corr(alpha1)



## Create the Pipeline

In [None]:
# alpha_factor = alpha
# alpha_factor = 0.61*alpha1 + 0.39*alpha2 
# alpha_factor = 0.4095926617*alpha1 + 0.2560634328*alpha2 + 0.3343439055*alpha3
# alpha_factor = 0.2873660386*alpha1 + 0.1796514957*alpha2 + 0.2345722778*alpha3 + 0.2984101879*alpha4
# alpha_factor = 0.2120919374*alpha1 + 0.132592682*alpha2 + 0.1731272391*alpha3 + 0.2202431268*alpha4 + 0.2619450147*alpha5
# alpha_factor = 0.18662558*alpha1 + 0.1166719796*alpha2 + 0.1523394609*alpha3 + 0.1937980378*alpha4 + 0.2304926859*alpha5 + 0.1200722559*alpha6
# alpha_factor = 0.1606598366*alpha1 + 0.1004390779*alpha2 + 0.131144042*alpha3 + 0.16683437*alpha4 + 0.1984235882*alpha5 + 0.1033662642*alpha6 + 0.1391328211*alpha8
# alpha_factor = 0.1677838457*alpha1 + 0.1048927667*alpha2 + 0.1369592561*alpha3 + 0.1742321716*alpha4 + 0.2072221249*alpha5 + 0.1079497508*alpha6 + 0.1009600841*alpha10
# alpha_factor = alpha1 + alpha2 + alpha3 + alpha4 + alpha5 + alpha6 + alpha7 + alpha8 + alpha9 + alpha10 + alpha13 + alpha14 + alpha15 + alpha16

assets = output.index.levels[1].unique()
# We need to get a little more pricing data than the 
# length of our factor so we can compare forward returns.
# We'll tack on another month in this example.
pricing = get_pricing(assets, start_date='2015-01-01', end_date='2018-05-08', fields='close_price')
# rets = returns(assets, '2015-01-01','2018-05-08', 5) 
# rets.head(10)
print "DONE Pricing"


## Run the Pipeline

In [None]:
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(alpha_factor,
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(5,10))

# alphalens.tears.create_returns_tear_sheet(factor_data)
# alphalens.tears.create_full_tear_sheet(factor_data)
alphalens.tears.create_summary_tear_sheet(factor_data)

### 

## Get pricing

In [None]:
df = rets.stack()
df.head(100)

In [None]:
# from linearmodels import PanelOLS
import statsmodels.api as sm

sec1 = df.index.tolist()
sec2 = alpha_factor.index.tolist()
secCommon = list(set(sec1) & set(sec2))

df = df.loc[secCommon,:]
alpha_factor = alpha_factor.loc[secCommon,:]


regression = sm.OLS(df, alpha_factor)
