In [None]:
import numpy as np
import statsmodels.api as sm
import math
import pandas as pd
from quantopian.research import run_pipeline, returns, get_pricing
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, PercentChange, SimpleMovingAverage
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data import factset
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.pipeline import factors, filters, classifiers
import alphalens
from scipy.stats.mstats import winsorize
from sklearn import preprocessing
from scipy.stats.mstats import gmean
from zipline.utils.numpy_utils import (
    repeat_first_axis,
    repeat_last_axis,
)
from quantopian.pipeline.data.factset import RBICSFocus




In [None]:
#Factors taken from https://www.quantopian.com/posts/alpha-combination-via-clustering
QL = 66
WIN_LIMIT = 0

def signalize(df):
    z = (df.rank() - 0.5)/df.count()
    return z.replace(np.nan, z.mean())

def preprocess(a):
    a = a.astype(np.float64)
    a[np.isinf(a)] = np.nan
    a = np.nan_to_num(a - np.nanmean(a))
    a = winsorize(a, limits=[WIN_LIMIT,WIN_LIMIT])
    
    return preprocessing.scale(a)

class Volatility(CustomFactor):  
    inputs = [USEquityPricing.close]  
    window_length = 22  
    def compute(self, today, assets, out, close):  
        # [0:-1] is needed to remove last close since diff is one element shorter  
        daily_returns = np.diff(close, axis = 0) / close[0:-1]  
        out[:] = daily_returns.std(axis = 0) * math.sqrt(252)
        
class MarketCap(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [USEquityPricing.close, Fundamentals.shares_outstanding]
    window_length = 1
    
    # Compute market cap value
    def compute(self, today, assets, out, close, shares):
        out[:] = close[-1] * shares[-1]

class FCFStability(CustomFactor):
    inputs = [Fundamentals.fcf_yield]
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, fcf_yield):
        std = np.std([
            1*fcf_yield[-1], 
            0.9*fcf_yield[-QL], 
            0.8*fcf_yield[-QL*2],
            0.7*fcf_yield[-QL*3],
            0.6*fcf_yield[-QL*4],
            0.5*fcf_yield[-QL*5],
            0.4*fcf_yield[-QL*6],
            0.3*fcf_yield[-QL*7]], axis=0)

        out[:] = preprocess(-std)
        
class mean_rev(CustomFactor):   
    inputs = [USEquityPricing.high,USEquityPricing.low,USEquityPricing.close]
    window_length = 30
    window_safe = True
    def compute(self, today, assets, out, high, low, close):

        p = (high+low+close)/3

        m = len(close[0,:])
        n = len(close[:,0])
        b = np.zeros(m)
        a = np.zeros(m)

        for k in range(10,n+1):
            price_rel = np.nanmean(p[-k:,:],axis=0)/p[-1,:]
            wt = np.nansum(price_rel)
            b += wt*price_rel
            price_rel = 1.0/price_rel
            wt = np.nansum(price_rel)
            a += wt*price_rel

        out[:] = preprocess(b-a)

class growthscorestability(CustomFactor):
    inputs = [Fundamentals.growth_score]
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, var):
        std = np.std([
            1.0*var[-1], 
            0.9*var[-QL], 
            0.8*var[-QL*2],
            0.7*var[-QL*3],
            0.6*var[-QL*4],
            0.5*var[-QL*5],
            0.4*var[-QL*6],
            0.3*var[-QL*7]], axis=0)

        out[:] = preprocess(-std)
                
class peg_ratio_stability(CustomFactor):
    inputs = [Fundamentals.peg_ratio]
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, var):
        std = np.std([1/var[-1], 
            0.9/var[-QL], 
            0.8/var[-QL*2],
            0.7/var[-QL*3],
            0.6/var[-QL*4],
            0.5/var[-QL*5],
            0.4/var[-QL*6],
            0.3/var[-QL*7]], axis=0)

        out[:] = preprocess(std)
        
                
class SalesGrowth(CustomFactor):
    inputs = [factset.Fundamentals.sales_gr_qf]
    window_length = 2*252
    window_safe = True
    def compute(self, today, assets, out, sales_growth):
        sales_growth = np.nan_to_num(sales_growth)
        sales_growth = preprocessing.scale(sales_growth,axis=0)
        out[:] = preprocess(sales_growth[-1])

class SalesGrowthStability(CustomFactor):
    inputs = [factset.Fundamentals.sales_gr_qf]
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, var):
        mean = np.mean([
            var[-1], 
            0.9*var[-QL], 
            0.8*var[-QL*2],
            0.7*var[-QL*3],
            0.6*var[-QL*4],
            0.5*var[-QL*5],
            0.4*var[-QL*6],
            0.3*var[-QL*7]], axis=0)

        std = np.std([
            var[-1], 
            0.9*var[-QL], 
            0.8*var[-QL*2],
            0.7*var[-QL*3],
            0.6*var[-QL*4],
            0.5*var[-QL*5],
            0.4*var[-QL*6],
            0.3*var[-QL*7]], axis=0)
        
        out[:] = preprocess(mean/std)

        
        
class Gross_Income_Margin(CustomFactor):
    #Gross Income Margin:
    #Gross Profit divided by Net Sales
    #Notes:
    #High value suggests that the company is generating large profits
    inputs = [Fundamentals.cost_of_revenue, Fundamentals.total_revenue]
    window_length = 1
    window_safe = True
    def compute(self, today, assets, out, cost_of_revenue, sales):
        gross_income_margin = sales[-1]/sales[-1] - cost_of_revenue[-1]/sales[-1]
        out[:] = preprocess(-gross_income_margin)

        
class MaxGap(CustomFactor): 
    # the biggest absolute overnight gap in the previous 90 sessions
    inputs = [USEquityPricing.close] ; window_length = 90
    window_safe = True
    def compute(self, today, assets, out, close):
        abs_log_rets = np.abs(np.diff(np.log(close),axis=0))
        max_gap = np.max(abs_log_rets, axis=0)
        out[:] = preprocess(max_gap)



class fcf_growth_mean(CustomFactor):
    inputs=[
        Fundamentals.fcf_per_share,
        Fundamentals.shares_outstanding,
        Fundamentals.enterprise_value,]
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, fcf, shares, ev):
        var = fcf*shares
        var[np.isinf(var)] = np.nan
        
        arr = [
            var[-1]/var[-QL] -1, 
            0.9*(var[-QL]/var[-2*QL] -1),
            0.8*(var[-QL*2]/var[-3*QL] -1), 
            0.7*(var[-QL*3]/var[-4*QL] -1), 
            0.6*(var[-QL*4]/var[-5*QL] -1), 
            0.5*(var[-QL*5]/var[-6*QL] -1), 
            0.4*(var[-QL*6]/var[-7*QL] -1)
             ]

        std = np.std(arr, axis=0)
        mean = np.mean(arr, axis=0)
        
        out[:] = preprocess(mean)
        

class TEM_GROWTH(CustomFactor):
    """
    TEM = standard deviation of past 6 quarters' reports
    """
    inputs=[factset.Fundamentals.capex_qf_asof_date,
        factset.Fundamentals.capex_qf,
        factset.Fundamentals.assets]
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, asof_date, capex, total_assets):
        var = capex/total_assets
        var[np.isinf(var)] = np.nan
        arr = [
            var[-1]/var[-QL] -1, 
            0.9*(var[-QL]/var[-2*QL] -1),
            0.8*(var[-QL*2]/var[-3*QL] -1), 
            0.7*(var[-QL*3]/var[-4*QL] -1), 
            0.6*(var[-QL*4]/var[-5*QL] -1), 
            0.5*(var[-QL*5]/var[-6*QL] -1), 
            0.4*(var[-QL*6]/var[-7*QL] -1)]

        std = np.std(arr, axis=0)
        mean = np.mean(arr, axis=0)
        
        out[:] = preprocess(-std)
                
class STA_Stability(CustomFactor):  
    inputs = [Fundamentals.operating_cash_flow,  
              Fundamentals.net_income_continuous_operations,  
              Fundamentals.total_assets]  
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, ocf, ni, ta):  
        ta = np.where(np.isnan(ta), 0, ta)  
        ocf = np.where(np.isnan(ocf), 0, ocf)  
        ni = np.where(np.isnan(ni), 0, ni)  
        var = abs(ni - ocf)/ ta
        
        arr = [
            var[-1],
            0.9*var[-QL],
            0.8*var[-QL*2],
            0.7*var[-QL*3],
            0.6*var[-QL*4],
            0.5*var[-QL*5],
            0.4*var[-QL*6]]

        std = np.std(arr, axis=0)
        mean = np.mean(arr, axis=0)
        
        out[:] = preprocess(mean/std)
        
class FCFTA(CustomFactor):  
    inputs = [Fundamentals.free_cash_flow,  
             Fundamentals.total_assets]  
    window_length = 1
    window_safe = True
    def compute(self, today, assets, out, fcf, ta):  
        out[:] = preprocess(np.where(fcf[-1]/ta[-1]>0,1,0))
        
class Current_Ratio_Mean(CustomFactor):
    inputs=[Fundamentals.current_ratio]  
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, var):
        arr = [
            var[-1],
            0.9*var[-QL],
            0.8*var[-QL*2],
            0.7*var[-QL*3],
            0.6*var[-QL*4],
            0.5*var[-QL*5],
            0.4*var[-QL*6]]

        std = np.std(arr, axis=0)
        mean = np.mean(arr, axis=0)
        
        out[:] = preprocess(mean)


class pcf_ratio_stability(CustomFactor):
    inputs = [Fundamentals.pcf_ratio]
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, var):
        std = np.std([1/var[-1], 
            0.9/var[-QL], 
            0.8/var[-QL*2],
            0.7/var[-QL*3],
            0.6/var[-QL*4],
            0.5/var[-QL*5],
            0.4/var[-QL*6],
            0.3/var[-QL*7]], axis=0)

        out[:] = preprocess(std)
        
class ps_ratio_stability(CustomFactor):
    inputs = [Fundamentals.ps_ratio]
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, var):
        std = np.std([1/var[-1], 
            0.9/var[-QL], 
            0.8/var[-QL*2],
            0.7/var[-QL*3],
            0.6/var[-QL*4],
            0.5/var[-QL*5],
            0.4/var[-QL*6],
            0.3/var[-QL*7]], axis=0)

        out[:] = preprocess(std)

class revenue_growth_mean(CustomFactor):  
    inputs = [Fundamentals.revenue_growth]  
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, var):  
        arr = [
            var[-1],
            0.9*var[-QL],
            0.8*var[-QL*2],
            0.7*var[-QL*3],
            0.6*var[-QL*4],
            0.5*var[-QL*5],
            0.4*var[-QL*6]]

        std = np.std(arr, axis=0)
        mean = np.mean(arr, axis=0)
        
        out[:] = preprocess(mean)

class revenue_growth_std(CustomFactor):  
    inputs = [Fundamentals.revenue_growth]  
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            0.9*var[-QL],
            0.8*var[-QL*2],
            0.7*var[-QL*3],
            0.6*var[-QL*4],
            0.5*var[-QL*5],
            0.4*var[-QL*6]]

        std = np.std(arr, axis=0)
        mean = np.mean(arr, axis=0)
        
        out[:] = preprocess(-std)

class revenue_growth_stability(CustomFactor):  
    inputs = [Fundamentals.revenue_growth]  
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            0.9*var[-QL],
            0.8*var[-QL*2],
            0.7*var[-QL*3],
            0.6*var[-QL*4],
            0.5*var[-QL*5],
            0.4*var[-QL*6]]

        std = np.std(arr, axis=0)
        mean = np.mean(arr, axis=0)
        
        out[:] = preprocess(mean/std)
        
class pretax_margin_stability(CustomFactor):  
    inputs = [Fundamentals.pretax_margin]  
    window_length = QL*8
    window_safe = True
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            0.9*var[-QL],
            0.8*var[-QL*2],
            0.7*var[-QL*3],
            0.6*var[-QL*4],
            0.5*var[-QL*5],
            0.4*var[-QL*6]]

        std = np.std(arr, axis=0)
        mean = np.mean(arr, axis=0)
        
        out[:] = preprocess(mean/std)
        
sector = RBICSFocus.l1_name.latest
base_universe = QTradableStocksUS() & sector.eq('Finance')
# base_universe = QTradableStocksUS() & sector.eq('Technology')

mkt_cap = MarketCap(mask = base_universe)
vol = Volatility(mask = base_universe)

alpha4 = mean_rev(mask = base_universe)        
alpha12 = Gross_Income_Margin(mask = base_universe)        
alpha13 = MaxGap(mask = base_universe)        
alpha25 = FCFTA(mask = base_universe)        
alpha43 = FCFStability(mask = base_universe)
alpha44 = growthscorestability(mask = base_universe)

alpha45 = peg_ratio_stability(mask = base_universe)
alpha46 = SalesGrowthStability(mask = base_universe)
alpha52 = fcf_growth_mean(mask = base_universe)

alpha54 = TEM_GROWTH(mask = base_universe)
alpha59 = STA_Stability(mask = base_universe)

alpha65 = Current_Ratio_Mean(mask = base_universe)
alpha71 = pcf_ratio_stability(mask = base_universe)
alpha72 = ps_ratio_stability(mask = base_universe)

alpha76 = revenue_growth_mean(mask = base_universe)
alpha78 = revenue_growth_stability(mask = base_universe)

alpha84 = pretax_margin_stability(mask = base_universe)


pipe_alpha_factors = Pipeline(
    columns={
'vol': vol,
'mkt_cap':mkt_cap,
        
        'alpha4':alpha4,

        'alpha12':alpha12,
        'alpha13':alpha13,
        
        'alpha25':alpha25,

        'alpha43': alpha43,
        'alpha44':alpha44,
        
        'alpha46': alpha46,

        'alpha52': alpha52,

        'alpha54': alpha54,
        'alpha59': alpha59,

        'alpha65':alpha65,

        'alpha71':alpha71,
        'alpha72':alpha72,
        'alpha76': alpha76,

        'alpha78': alpha78,

        'alpha84': alpha84
    },
    screen=base_universe
)   

output = run_pipeline(pipe_alpha_factors, '2015-06-01', '2018-09-01')

In [None]:
assets = output.index.levels[1].unique()
# We need to get a little more pricing data than the 
# length of our factor so we can compare forward returns.
# We'll tack on another month in this example.
pricing = get_pricing(assets, start_date='2015-06-01', end_date='2018-09-01', fields='close_price')


In [None]:
# factor_names = ['alpha1','alpha2','alpha3','alpha4','alpha5','alpha6','alpha7','alpha8','alpha9','alpha10',
# 'alpha11','alpha12','alpha13','alpha14','alpha15','alpha16','alpha17','alpha18','alpha19','alpha20',
# 'alpha21','alpha22','alpha23','alpha24','alpha25','alpha26','alpha27','alpha28','alpha29','alpha30',
# 'alpha31','alpha32','alpha33','alpha34','alpha35','alpha36','alpha37','alpha38','alpha39','alpha40',
# 'alpha41']

# factor_names = ['alpha2','alpha4','alpha11','alpha14','alpha31',
#                 'alpha35', 'alpha43', 'alpha45', 'alpha46', 'alpha52', 
#                 'alpha55', 'alpha66', 'alpha73', 'alpha77']
# # 'alpha82', 'alpha83', 'alpha84']

factor_names = ['alpha4', 'alpha43', 'alpha46', 'alpha52', 'alpha12', 'alpha13', 'alpha25', 'alpha44', 
                  'alpha46', 'alpha54', 'alpha59', 'alpha65', 'alpha71', 'alpha72', 'alpha76', 'alpha78', 'alpha84' 
               ]

# for i in range(53,85):
#     if i not in [4,43,46,52,12,13,25,44]:
#         factor_names.append('alpha'+str(i))

def wmean(name):
    return (output['mkt_cap']*output[name]).sum(level=0)/output['mkt_cap'].mean(level=0)

data = pd.DataFrame()
#Demean the alphas
for name in factor_names:
    output['s'+name] = output[name]
    data[name] = output['s'+name].sub(output['s'+name].mean(level=0), level=0)
#     output[name] = output[name].sub(wmean(name), level=0)



In [None]:
# output['vol_rank'] = signalize(output['vol'])
# output['mk_rank'] = signalize(output['mkt_cap']**(1.0/3.0))
# # output['weights'] = 0.3*output['vol_rank'] + 0.7*output['mk_rank']
# # output['weights'] = output['vol_rank'] 
# output['weights'] = output['vol_rank']/output['vol_rank'].sum()

In [None]:
returns = pricing.pct_change(periods=5).shift(-5)
#Demean the returns
returns = returns.sub(returns.mean(axis=1), axis=0).stack()
data = pd.concat([returns.rename('returns'), data], axis=1).dropna()

In [None]:
data.reset_index(inplace=True)
data = data.rename(columns={'level_0':'dates', 'level_1': 'sid'})

In [None]:
# significant_factor_names = ['alpha2', 'alpha4', 'alpha11', 'alpha14', 
#                             'alpha31', 'alpha35', 'alpha43', 'alpha45', 'alpha46', 
#                             'alpha52', 'alpha55', 
#                             'alpha66', 
#                             'alpha73',
#                             'alpha77'
#                              ]

# data['alpha_k1'] = data['alpha10'] + data['alpha12'] + data['alpha13'] + data['alpha25'] + data['alpha27'] 
# data['alpha_k2'] = data['alpha17'] + data['alpha18'] + data['alpha19'] 

# data['alpha_s1'] = -(data['alpha9'] + data['alpha16'] + data['alpha20'] + \
#                      data['alpha23'] + data['alpha36'] + data['alpha37'] + \
#                      data['alpha40'] + data['alpha41'] + data['alpha47'] + \
#                      data['alpha48'] + data['alpha50'])

# data['alpha_s2'] = -(data['alpha9'] + data['alpha16'] + data['alpha20'] + \
#                      data['alpha23'] + data['alpha47'] + \
#                      data['alpha48'])


# data['alpha_k3'] = data['alpha_k1'] + data['alpha_k2'] + data['alpha44'] + data['alpha51']
data['alpha_k4'] = data['alpha12'] + data['alpha13'] + data['alpha25'] + data['alpha44'] 
data['alpha_k5'] = data['alpha_k4'] + data['alpha76']
data['alpha_k6'] = data['alpha_k4'] + data['alpha59'] + data['alpha76'] 

# - 'alpha65', 'alpha71', 'alpha72', 'alpha74', 'alpha79', 'alpha84'


data['alpha_s3'] = -(data['alpha65'] + data['alpha71'] + data['alpha72'] + data['alpha84'])

#78in 46out


# data['final'] = data['alpha4'] + data['alpha52'] + data['alpha_k6'] + data['alpha54'] + data['alpha78'] + data['alpha_s3']

significant_factor_names = [
#                             'alpha4',
                            'alpha43', 
#                             'alpha46',
                            'alpha52',
                            'alpha_k6',
                            'alpha54',
                            'alpha78',
#                             'alpha_s3'
                           ]

# significant_factor_names = ['alpha12', 'alpha13', 'alpha25',
#                              'alpha44', 'alpha59', 'alpha76']

# significant_factor_names = ['alpha65', 'alpha71', 'alpha72', 'alpha84']

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])
#            data[['final']]) 
est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
d = data.set_index(['dates', 'sid'])
net_alpha = d['alpha4']  + d['alpha43'] + d['alpha46'] + d['alpha52'] + \
d['alpha_k6'] + d['alpha54'] + d['alpha78']

# net_alpha = d['alpha2'] + d['alpha4'] + d['alpha14'] + \
# d['alpha31'] + d['alpha35'] + d['alpha43'] + d['alpha45'] + \
# d['alpha46'] + d['alpha52'] + d['alpha55'] + d['alpha66'] + d['alpha73'] 
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(net_alpha,
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(5,10))

# alphalens.tears.create_returns_tear_sheet(factor_data)
# alphalens.tears.create_full_tear_sheet(factor_data)
alphalens.tears.create_summary_tear_sheet(factor_data)

In [None]:
d = data.set_index(['dates', 'sid'])
net_alpha = d['alpha4'] + d['alpha52'] + d['alpha_k6'] + d['alpha54'] + d['alpha78'] + d['alpha_s3']
# net_alpha = d['alpha2'] + d['alpha4'] + d['alpha14'] + \
# d['alpha31'] + d['alpha35'] + d['alpha43'] + d['alpha45'] + \
# d['alpha46'] + d['alpha52'] + d['alpha55'] + d['alpha66'] + d['alpha73'] 
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(net_alpha,
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(5,10))

# alphalens.tears.create_returns_tear_sheet(factor_data)
# alphalens.tears.create_full_tear_sheet(factor_data)
alphalens.tears.create_summary_tear_sheet(factor_data)