In [None]:
import numpy as np
import math
import pandas as pd
from quantopian.research import run_pipeline, returns, get_pricing
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data.factset import Fundamentals as FstFundamentals
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.pipeline import factors, filters, classifiers
from quantopian.pipeline.data import factset
import alphalens

sd = '2010-01-01'
ed = '2018-10-01'

def signalize(df):
   return ((df.rank() - 0.5)/df.count()).replace(np.nan,0.5)

def transform(df, field, multiplier=1):
    return signalize(multiplier*df[field])

class MarketCap(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [USEquityPricing.close, Fundamentals.shares_outstanding]
    window_length = 1

    # Compute market cap value
    def compute(self, today, assets, out, close, shares):
        out[:] = close[-1] * shares[-1]

class LogMarketCap(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [USEquityPricing.close, Fundamentals.shares_outstanding]
    window_length = 1

    # Compute market cap value
    def compute(self, today, assets, out, close, shares):
        out[:] = np.log(close[-1] * shares[-1])

class Volatility(CustomFactor):  
    inputs = [USEquityPricing.close]  
    window_length = 20  
    def compute(self, today, assets, out, close):  
        # [0:-1] is needed to remove last close since diff is one element shorter  
        daily_returns = np.diff(close, axis = 0) / close[0:-1]  
        out[:] = daily_returns.std(axis = 0) * math.sqrt(252)

class Industry(CustomFactor):  
    inputs = [Fundamentals.morningstar_industry_code]  
    window_length = 1 
    def compute(self, today, assets, out, code):  
        out[:] = code[-1]

        
class IndustryGroup(CustomFactor):  
    inputs = [Fundamentals.morningstar_industry_group_code]  
    window_length = 1 
    def compute(self, today, assets, out, code):  
        out[:] = code[-1]


class Sector(CustomFactor):  
    inputs = [Fundamentals.morningstar_sector_code]  
    window_length = 1 
    def compute(self, today, assets, out, code):  
        out[:] = code[-1]


class Factor1(CustomFactor):   

    # Pre-declare inputs and window_length
    inputs = [Fundamentals.pcf_ratio] 
    window_length = 2
    
    # Compute factor1 value
    def compute(self, today, assets, out, var):
        out[:] = -var[-2]
        
class Factor2(CustomFactor):   

    # Pre-declare inputs and window_length
    inputs = [Fundamentals.ps_ratio] 
    window_length = 2
    
    # Compute factor2 value
    def compute(self, today, assets, out, var):
        out[:] = -var[-2]
    
class Factor3(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [Fundamentals.enterprise_value, Fundamentals.free_cash_flow, USEquityPricing.close, Fundamentals.shares_outstanding, Fundamentals.total_assets] 
    window_length = 2
    
    # Compute factor3 value
    def compute(self, today, assets, out, ev, var, close, shares, ta):
        out[:] = var[-2]/(ev[-2]*close[-2]*shares[-2]*ta[-2])**(1./3.)

class Factor4(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [Fundamentals.enterprise_value, Fundamentals.free_cash_flow, USEquityPricing.close, Fundamentals.shares_outstanding, Fundamentals.total_assets] 
    window_length = 2
    
    # Compute factor4 value
    def compute(self, today, assets, out, ev, var, close, shares, ta):
        out[:] = ta[-2]/(ev[-2]*close[-2]*shares[-2])**(1./2.)
        
class Factor5(CustomFactor):
    """
    TEM = standard deviation of past 6 quarters' reports
    """
    inputs = [Fundamentals.capital_expenditure, Fundamentals.enterprise_value] 
    window_length = 390
    
    # Compute factor5 value
    def compute(self, today, assets, out, capex, ev):
        values = capex/ev
        out[:] = -values.std(axis = 0)

class Factor6(CustomFactor):  
        inputs = [Fundamentals.forward_earning_yield]  
        window_length = 2

        # Compute factor6 value  
        def compute(self, today, assets, out, syield):  
            out[:] =  syield[-2]

class Factor7(CustomFactor):  
        inputs = [Fundamentals.earning_yield]  
        window_length = 2

        # Compute factor6 value  
        def compute(self, today, assets, out, syield):  
            out[:] =  syield[-2]

class Factor8(CustomFactor):  
        inputs = [Fundamentals.sales_yield]  
        window_length = 2

        # Compute factor6 value  
        def compute(self, today, assets, out, syield):  
            out[:] =  -syield[-2]

class Factor9(CustomFactor):
        inputs = [USEquityPricing.high, USEquityPricing.low, USEquityPricing.close, stocktwits.bull_scored_messages, stocktwits.bear_scored_messages, stocktwits.total_scanned_messages]
        window_length = 21
        window_safe = True
        def compute(self, today, assets, out, high, low, close, bull, bear, total):
            v = np.nansum((high-low)/close, axis=0)
            out[:] = -v*np.nansum(total*(bear-bull), axis=0)

#Contest Entry#3 - 07/05/2019
class Factor10(CustomFactor):
    inputs = [Fundamentals.capital_expenditure, Fundamentals.cost_of_revenue] 
    window_length = 360
    
    def compute(self, today, assets, out, capex, cr):
        values = capex/cr
        out[:] = values.mean(axis = 0)
        
class Factor11(CustomFactor):
    inputs = [Fundamentals.revenue_growth] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)        

        
class Factor12(CustomFactor):
    inputs = [Fundamentals.gross_margin] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)        

        
class Factor13(CustomFactor):
    inputs = [Fundamentals.quick_ratio] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = -rate.mean(axis = 0)/rate.std(axis = 0)        
 
        
class Factor14(CustomFactor):
    inputs = [Fundamentals.ebitda_margin] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = 1/rate.std(axis = 0)        

        
class Factor15(CustomFactor):
    inputs = [Fundamentals.current_ratio] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)
        
class Factor16(CustomFactor):
    inputs = (USEquityPricing.close, Returns(window_length=126))
    window_length = 252
    window_safe = True

    def compute(self, today, assets, out, prices, returns):
        am = np.divide(
            (
                (prices[-21] - prices[-252]) / prices[-252] -
                prices[-1] - prices[-21]
            ) / prices[-21],
            np.nanstd(returns, axis=0)
        )

        out[:] = -am

class Factor17(CustomFactor):
    inputs = [FstFundamentals.oper_inc_gr_ltm] #sales_gr_ltm 
    window_length = 126
    def compute(self, today, assets, out, rate):
        out[:] = rate[-1]
#         rate.mean(axis = 0) 
#         /rate.std(axis = 0) 



class FCFStd(CustomFactor):
    inputs = [Fundamentals.fcf_yield]
    window_length = 66*8
    window_safe = True
    def compute(self, today, assets, out, fcf_yield):
        std = np.std([fcf_yield[-1], 
            fcf_yield[-66], 
            fcf_yield[-66*2],
            fcf_yield[-66*3],
            fcf_yield[-66*4],
            fcf_yield[-66*5],
            fcf_yield[-66*6],
            fcf_yield[-66*7]], axis=0)

        out[:] = -std
        
class peg_ratio_std(CustomFactor):
    inputs = [Fundamentals.peg_ratio]
    window_length = 66*8
    window_safe = True
    def compute(self, today, assets, out, var):
        std = np.std([1/var[-1], 
            1/var[-66], 
            1/var[-66*2],
            1/var[-66*3],
            1/var[-66*4],
            1/var[-66*5],
            1/var[-66*6],
            1/var[-66*7]], axis=0)

        out[:] = std

class SalesGrowthStability(CustomFactor):
    inputs = [factset.Fundamentals.sales_gr_qf]
    window_length = 66*8
    window_safe = True
    def compute(self, today, assets, out, var):
        mean = np.nanmean([var[-1], 
            var[-66], 
            var[-66*2],
            var[-66*3],
            var[-66*4],
            var[-66*5],
            var[-66*6],
            var[-66*7]], axis=0)

        std = np.nanstd([var[-1], 
            var[-66], 
            var[-66*2],
            var[-66*3],
            var[-66*4],
            var[-66*5],
            var[-66*6],
            var[-66*7]], axis=0)
        
        out[:] = mean/std

        
class fcf_growth_mean(CustomFactor):
    inputs=[
        Fundamentals.fcf_per_share,
        Fundamentals.shares_outstanding,
        Fundamentals.enterprise_value,]
    window_length = 66*8
    window_safe = True
    def compute(self, today, assets, out, fcf, shares, ev):
        var = fcf*shares
        var[np.isinf(var)] = np.nan
        
        arr = [var[-1]/var[-66] -1, 
            var[-66]/var[-2*66] -1,
            var[-66*2]/var[-3*66] -1, 
            var[-66*3]/var[-4*66] -1, 
            var[-66*4]/var[-5*66] -1, 
            var[-66*5]/var[-6*66] -1, 
            var[-66*6]/var[-7*66] -1]

        std = np.nanstd(arr, axis=0)
        mean = np.nanmean(arr, axis=0)
        
        out[:] = mean
        
class Altman_Z_Stability(CustomFactor):
    inputs=[factset.Fundamentals.zscore_qf]
    window_length = 66*8
    window_safe = True
    def compute(self, today, assets, out, var):
        arr = [var[-1],
            var[-66],
            var[-66*2],
            var[-66*3],
            var[-66*4],
            var[-66*5],
            var[-66*6]]

        std = np.nanstd(arr, axis=0)
        mean = np.nanmean(arr, axis=0)
        
        out[:] = -mean/std

        
class Current_Ratio_Std(CustomFactor):
    inputs=[Fundamentals.current_ratio]  
    window_length = 66*8
    window_safe = True
    def compute(self, today, assets, out, var):
        arr = [var[-1],
            var[-66],
            var[-66*2],
            var[-66*3],
            var[-66*4],
            var[-66*5],
            var[-66*6]]

        std = np.nanstd(arr, axis=0)
        mean = np.nanmean(arr, axis=0)
        
        out[:] = -std

base_universe = QTradableStocksUS()

mkt_cap = MarketCap(mask = base_universe)
log_mkt_cap = LogMarketCap(mask = base_universe)
vol = Volatility(mask = base_universe)

industry = Industry(mask = base_universe)
industryGroup = IndustryGroup(mask = base_universe)
sector = Sector(mask = base_universe)

f1 = Factor1(mask = base_universe)
f2 = Factor2(mask = base_universe)
f3 = Factor3(mask = base_universe)
f4 = Factor4(mask = base_universe)
f5 = Factor5(mask = base_universe)
f6 = Factor6(mask = base_universe)
f7 = Factor7(mask = base_universe)
f8 = Factor8(mask = base_universe)
f9 = Factor9(mask = base_universe)
f10 = Factor10(mask = base_universe)
f11 = Factor11(mask = base_universe)
f12 = Factor12(mask = base_universe)
f13 = Factor13(mask = base_universe)
f14 = Factor14(mask = base_universe)
f15 = Factor15(mask = base_universe)
f16 = Factor16(mask = base_universe)

# f17 = FCFStd(mask = base_universe)
f18 = peg_ratio_std(mask = base_universe)
# f19 = SalesGrowthStability(mask = base_universe)
# f20 = fcf_growth_mean(mask = base_universe)
# f21 = Altman_Z_Stability(mask = base_universe)
# f22 = Current_Ratio_Std(mask = base_universe)

pipe_my_alpha_factor = Pipeline(
    columns={
        'f1': f1,
        'f2': f2,
        'f3': f3,
        'f4': f4,
        'f5': f5,
        'f6': f6,
        'f7': f7,
        'f8':f8,
        'f9': f9,
        'f10': f10,
        'f11': f11,
        'f12': f12,
        'f13': f13,
        'f14': f14,
        'f15': f15,
        'f16': f16,
       
#         'f17': f17,
#         'f18': f18,
#         'f19': f19,
#         'f20': f20,
#         'f21': f21,
#         'f22': f22,
        
        'vol': vol,
        'log_mkt_cap': log_mkt_cap,
        'mkt_cap':mkt_cap,
        'industry': industry,
        'sector': sector,
        'industryGroup': industryGroup
    },
    screen=base_universe
)




output = run_pipeline(pipe_my_alpha_factor, sd, ed)
print output.head()

In [None]:
print output[['f6','f7']].tail(20)

In [None]:
# factors = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22] 
# factors = [17,18,19,20,21,22] 
factors = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16]

allnames = []
#Compute Weighted Factor value (within sector)
for x in factors:
    wfname = 'weightedFactor' + str(x)
    tfname = 'f'+str(x)
    output[wfname] = output[tfname]*output['mkt_cap']
    allnames.append(wfname)
    allnames.append('mkt_cap_'+tfname)
    

#Compute Sectoral Market Cap
# agg_sector_ = output.groupby([output.index.get_level_values(0),'sector'])[allnames].sum().reset_index()
for x in factors:
    output['flag'] = 0
    tfname = 'f'+str(x)
    wfname = 'weightedFactor' + str(x)
    output.loc[output[tfname].notnull(), 'flag'] = 1
    output['mkt_cap_'+tfname] = output['mkt_cap'] * output['flag']
    output[wfname] = output[wfname]*output['flag']
    
agg_sector_ = output.groupby([output.index.get_level_values(0),'sector'])[allnames].sum().reset_index()

for x in factors:
    wfname = 'weightedFactor' + str(x)
    tfname = 'f'+str(x)
    agg_sector_[wfname] = agg_sector_[wfname]/agg_sector_['mkt_cap_'+tfname]
    agg_sector_.drop(['mkt_cap_'+tfname], axis=1, inplace=True)
    
# agg_sector_.rename(columns = {'mkt_cap_f':'sector_mkt_cap'}, inplace=True)
# agg_sector_.drop(['sector_mkt_cap'], axis=1, inplace=True)
agg_sector_['sector'] = agg_sector_['sector'].astype(str)
# print agg_sector_.head()

sector_code_sid = pd.DataFrame({
    'sector':['101.0','102.0','103.0','104.0','205.0','206.0','207.0','308.0','309.0','310.0','311.0'],
    'sid':symbols(['XLB','XLY','XLF','IYR','XLP','XLV','XLU','XTL','XLE','XLI','XLK'])})

# print data.head()

In [None]:
print agg_sector_.head()

In [None]:
# print sector_code_sid
data = pd.merge(agg_sector_, sector_code_sid, on='sector', how='outer').dropna()  
data.set_index(['level_0', 'sid'], inplace=True)

# factor_names = [
#     'alpha1',
#     'alpha2',
#     'alpha3',
#     'alpha4',
#     'alpha5',
#     'alpha6',
#     'alpha7',
#     'alpha8',
#     'alpha9',
#     'alpha11',
#     'alpha13',
#     'alpha14',
#     'alpha15',
#     'alpha16',
#     'alpha17',
#     'alpha18',
#     'alpha19',
#     'alpha20', 
#     'alpha21',
#     'alpha22'
# ]


In [None]:
print data.head()

In [None]:
# data['alpha1'] = signalize(-data['weightedFactor1'])
# data['alpha2'] = signalize(-data['weightedFactor2'])
# data['alpha3'] = signalize(data['weightedFactor3'])
# data['alpha4'] = signalize(data['weightedFactor4'])
# data['alpha5'] = signalize(-data['weightedFactor5'])
# data['alpha6'] = signalize(data['weightedFactor6'])
# data['alpha7'] = signalize(data['weightedFactor7'])
# data['alpha8'] = signalize(-data['weightedFactor8'])
# data['alpha9'] = signalize(-data['weightedFactor9'])
# data['alpha11'] = signalize(data['weightedFactor11'])
# data['alpha13'] = signalize(-data['weightedFactor13'])
# data['alpha14'] = signalize(data['weightedFactor14'])
# data['alpha15'] = signalize(data['weightedFactor15'])
# data['alpha16'] = signalize(data['weightedFactor16'])
# data['alpha17'] = signalize(data['weightedFactor17'])
# data['alpha18'] = signalize(data['weightedFactor18'])
# data['alpha19'] = signalize(data['weightedFactor19'])
# data['alpha20'] = signalize(data['weightedFactor20'])
# data['alpha21'] = signalize(data['weightedFactor21'])
# data['alpha22'] = signalize(data['weightedFactor22'])

# print data
for x in factors:
    wfname = 'weightedFactor' + str(x)
    tfname = 'f'+str(x)
    alphaname = 'alpha'+str(x)
    data[alphaname] = signalize(data[wfname])
    data[alphaname] = data[alphaname].sub(data[alphaname].mean(level=0), level=0)


In [None]:
# We need to get a little more pricing data than the 
# length of our factor so we can compare forward returns.
# We'll tack on another month in this example.
assets = data.index.levels[1].unique()
pricing = get_pricing(assets, start_date=sd, end_date=ed, fields='close_price')
# pricing.head(10)

In [None]:
returns = pricing.pct_change(periods=5).shift(-5)
#Demean the returns
returns = returns.sub(returns.mean(axis=1), axis=0).stack()
# print returns

In [None]:
data = pd.concat([returns.rename('returns'), data], axis=1).dropna()
data.reset_index(inplace=True)
data = data.rename(columns={'level_0':'dates', 'level_1': 'sid'})

In [None]:
#Alpha3, Alpha4, Alpha5, Alpha9 are highly correlated
print data['alpha3'].corr(data['alpha6'])
# data['alpha3459'] = (data['alpha3'] + data['alpha4'] + data['alpha5'] + data['alpha9'])/4
# print data['alpha15']
# d = data[['alpha1','alpha2']][-3002:]
# print d

In [None]:
#2014 -> 1,5 
import statsmodels.api as sm

# data['final'] = data['alpha1'] + data['alpha2'] + data['alpha3'] + data['alpha4'] + data['alpha5']
data['final1'] = data['alpha3'] + data['alpha5'] + data['alpha6'] + data['alpha9'] + data['alpha11']
data['final2'] = data['alpha3'] + data['alpha5'] + data['alpha9']
# data['final2'] = data['alpha3'] + data['alpha5'] + data['alpha6'] + data['alpha9'] + data['alpha11'] + data['alpha18']
# data['final3'] = data['alpha6'] + data['alpha18']
# 'alpha3',' alpha5', 'alpha6', 'alpha9', 'alpha11', 'alpha4',

est = sm.OLS(
    100*data[['returns']], 
    data[['final2']])
#     data[['alpha3', 'alpha5', 'alpha6', 'alpha9', 'alpha11']])
#         data[factor_names])
#     data[['alpha3', 'alpha16']])
#     data[['alpha2','alpha3', 'alpha4', 'alpha9']])
#     data[['alpha3', 'alpha16']])
#     data[['alpha2', 'alpha6', 'alpha9', 'alpha13', 'alpha15']])
est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
print data['final1'].corr(data['final2'])

## Run the Pipeline

In [None]:
d = data.set_index(['dates', 'sid'])
# net_alpha = d['alpha2'] + d['alpha5'] + d['alpha6'] \
# + d['alpha9'] + d['alpha13'] + d['alpha15']

# net_alpha = d['alpha2']  + d['alpha6'] \
# + d['alpha9'] + d['alpha13'] + d['alpha15']

# net_alpha = d['alpha1'] + d['alpha5'] + d['alpha8']
# net_alpha = 0.45*d['alpha1'] + 0.6*d['alpha5'] + 0.4*d['alpha8']

net_alpha = d['alpha3'] + d['alpha5'] + d['alpha6'] + d['alpha9'] + d['alpha11']
# net_alpha = d['alpha3'] + d['alpha5'] + d['alpha9']
# net_alpha = d['alpha18']
# + d['alpha4'] + d['alpha9']

# net_alpha = -0.2*d['alpha2'] + 0.2*d['alpha5'] + 0.4*d['alpha6'] \
# - 0.5*d['alpha9'] - 0.7*d['alpha13'] + 0.7*d['alpha15']


factor_data = alphalens.utils.get_clean_factor_and_forward_returns(net_alpha,
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(5,10))

# alphalens.tears.create_returns_tear_sheet(factor_data)
# alphalens.tears.create_full_tear_sheet(factor_data)
alphalens.tears.create_summary_tear_sheet(factor_data)

### 