In [None]:
import numpy as np
import statsmodels.api as sm
import math
import pandas as pd
from quantopian.research import run_pipeline, returns, get_pricing
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, PercentChange, SimpleMovingAverage
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.pipeline import factors, filters, classifiers
import alphalens

In [None]:
def signalize(df):
    z = (df.rank() - 0.5)/df.count()
    return z.replace(np.nan, z.mean())

class Volatility(CustomFactor):  
    inputs = [USEquityPricing.close]  
    window_length = 66  
    def compute(self, today, assets, out, close):  
        # [0:-1] is needed to remove last close since diff is one element shorter  
        daily_returns = np.diff(close, axis = 0) / close[0:-1]  
        out[:] = daily_returns.std(axis = 0) * math.sqrt(252)
        
class MarketCap(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [USEquityPricing.close, Fundamentals.shares_outstanding]
    window_length = 1
    
    # Compute market cap value
    def compute(self, today, assets, out, close, shares):
        out[:] = close[-1] * shares[-1]
        
        
class EnterpriseValue(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [Fundamentals.enterprise_value]
    window_length = 1

    # Compute market cap value
    def compute(self, today, assets, out, ev):
        out[:] = ev[-1]
        
class LogEV(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [Fundamentals.enterprise_value]
    window_length = 1
    # Compute market cap value
    def compute(self, today, assets, out, ev):
        out[:] = np.log(ev[-1]) 

class LogMarketCap(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [USEquityPricing.close, Fundamentals.shares_outstanding]
    window_length = 1
    # Compute market cap value
    def compute(self, today, assets, out, close, shares):
        out[:] = np.log(close[-1] * shares[-1])

class Factor1(CustomFactor):   
    # Pre-declare inputs and window_length
    inputs = [Fundamentals.pcf_ratio] 
    window_length = 2
    # Compute factor1 value
    def compute(self, today, assets, out, var):
        out[:] = var[-2]
        
class Factor2(CustomFactor):   
    # Pre-declare inputs and window_length
    inputs = [Fundamentals.ps_ratio] 
    window_length = 2
    # Compute factor2 value
    def compute(self, today, assets, out, var):
        out[:] = var[-2]
    
class Factor3(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [Fundamentals.enterprise_value, Fundamentals.free_cash_flow, USEquityPricing.close, Fundamentals.shares_outstanding, Fundamentals.total_assets] 
    window_length = 2
    # Compute factor3 value
    def compute(self, today, assets, out, ev, var, close, shares, ta):
        out[:] = var[-2]/(ev[-2]*close[-2]*shares[-2]*ta[-2])**(1./3.)
        
                
class Factor4(CustomFactor):
    # Pre-declare inputs and window_length
    inputs = [Fundamentals.enterprise_value, Fundamentals.free_cash_flow, USEquityPricing.close, Fundamentals.shares_outstanding, Fundamentals.total_assets] 
    window_length = 2
    # Compute factor4 value
    def compute(self, today, assets, out, ev, var, close, shares, ta):
        out[:] = ta[-2]/(ev[-2]*close[-2]*shares[-2])**(1./2.)
        
class Factor5(CustomFactor):
    """
    TEM = standard deviation of past 6 quarters' reports
    """
    inputs = [Fundamentals.capital_expenditure, Fundamentals.enterprise_value] 
    window_length = 390
    # Compute factor5 value
    def compute(self, today, assets, out, capex, ev):
        values = capex/ev
        out[:] = values.std(axis = 0)

class Factor6(CustomFactor):  
    inputs = [Fundamentals.forward_earning_yield]  
    window_length = 2
    # Compute factor6 value  
    def compute(self, today, assets, out, syield):  
        out[:] =  syield[-2]

class Factor7(CustomFactor):  
    inputs = [Fundamentals.earning_yield]  
    window_length = 2
    # Compute factor6 value  
    def compute(self, today, assets, out, syield):  
        out[:] =  syield[-2]

class Factor8(CustomFactor):  
    inputs = [Fundamentals.sales_yield]  
    window_length = 2
    # Compute factor6 value  
    def compute(self, today, assets, out, syield):  
        out[:] =  syield[-2]

class Factor9(CustomFactor):
        inputs = [USEquityPricing.high, USEquityPricing.low, USEquityPricing.close, stocktwits.bull_scored_messages, stocktwits.bear_scored_messages, stocktwits.total_scanned_messages]
        window_length = 21
        window_safe = True
        def compute(self, today, assets, out, high, low, close, bull, bear, total):
            v = np.nansum((high-low)/close, axis=0)
            out[:] = v*np.nansum(total*(bear-bull), axis=0)


class Factor10(CustomFactor):
    inputs = [Fundamentals.capital_expenditure, Fundamentals.cost_of_revenue] 
    window_length = 360
    
    def compute(self, today, assets, out, capex, cr):
        values = capex/cr
        out[:] = values.mean(axis = 0)

class Factor11(CustomFactor):
    inputs = [Fundamentals.revenue_growth] 
#     inputs = [Fundamentals.operation_revenue_growth3_month_avg] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)        

class Factor12(CustomFactor):
    inputs = [Fundamentals.gross_margin] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)        

class Factor13(CustomFactor):
    inputs = [Fundamentals.quick_ratio] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)        

class Factor14(CustomFactor):
    inputs = [Fundamentals.ebitda_margin] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = 1/rate.std(axis = 0)        

class Factor15(CustomFactor):
    inputs = [Fundamentals.current_ratio] 
    window_length = 360
    def compute(self, today, assets, out, rate):
        out[:] = rate.mean(axis = 0)/rate.std(axis = 0)        


base_universe = QTradableStocksUS()

mkt_cap = MarketCap(mask = base_universe)
log_mkt_cap = LogMarketCap(mask = base_universe)
vol = Volatility(mask = base_universe)

f1 = Factor1(mask = base_universe)
f2 = Factor2(mask = base_universe)
f3 = Factor3(mask = base_universe)
f4 = Factor4(mask = base_universe)
f5 = Factor5(mask = base_universe)
f6 = Factor6(mask = base_universe)
f7 = Factor7(mask = base_universe)
f8 = Factor8(mask = base_universe)
f9 = Factor9(mask = base_universe)
f10 = Factor10(mask = base_universe)
f11 = Factor11(mask = base_universe)
f12 = Factor12(mask = base_universe)
f13 = Factor13(mask = base_universe)
f14 = Factor14(mask = base_universe)
f15 = Factor15(mask = base_universe)


# daily_returns

# Filter stocks out with Mcap < mcap of 100th stock in S&P500
pipe_screen = base_universe 

pipe_columns = {
    'mkt_cap': mkt_cap,
    'log_mkt_cap': log_mkt_cap,
    'vol': vol,
    'f1': f1,
    'f2': f2,
    'f3': f3,
    'f4': f4,
    'f5': f5,
    'f6': f6,
    'f7': f7,
    'f8': f8,
    'f9': f9,
    'f10': f10,
    'f11': f11,
    'f12': f12,
    'f13': f13,
    'f14': f14,
    'f15': f15}
        
pipe_alpha_factors = Pipeline(columns=pipe_columns, screen=pipe_screen) 

output = run_pipeline(pipe_alpha_factors, '2015-06-01', '2018-09-01')


In [None]:
assets = output.index.levels[1].unique()
# We need to get a little more pricing data than the 
# length of our factor so we can compare forward returns.
# We'll tack on another month in this example.
pricing = get_pricing(assets, start_date='2015-06-01', end_date='2018-09-01', fields='close_price')

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler(feature_range=(0.3,1))

output['mk_rank'] = signalize(output['log_mkt_cap'])
output['vol_rank'] = signalize(-output['vol'])

# output['efficiency'] = scaler.fit_transform(output['eff_raw'].values)
# print output['efficiency']

# Alpha Factors
output['alpha1'] = signalize(output['f1'])

output['alpha2'] = signalize(output['f2'])

output['alpha3'] = signalize(output['f3'])
                              
output['alpha4'] = signalize(-output['f4'])
                              
output['alpha5'] = signalize(-output['f5'])
                              
output['alpha6'] = signalize(-output['f6'])
                              
output['alpha7'] = signalize(-output['f7'])
                              
output['alpha8'] = signalize(-output['f8'])
                              
output['alpha9'] = signalize(output['f9'])
                              
output['alpha10'] = signalize(output['f10'])
                              
output['alpha11'] = signalize(output['f11'])

output['alpha12'] = signalize(output['f12'])
                              
output['alpha13'] = signalize(output['f13'])
                              
output['alpha14'] = signalize(output['f14'])
                              
output['alpha15'] = signalize(output['f15'])


In [None]:
factor_names = ['alpha1',
                'alpha2',
                'alpha3',
                'alpha4',
                'alpha5',
                'alpha6',
                'alpha7',
                'alpha8',
                'alpha9',
                'alpha10',
                'alpha11',
                'alpha12',
                'alpha13',
                'alpha14',
                'alpha15',]

data = pd.DataFrame()                
for name in factor_names:
    data[name] = output[name].sub(output[name].mean(level=0), level=0)

In [None]:
returns = pricing.pct_change(periods=5).shift(-5)
#Demean the returns
returns = returns.sub(returns.mean(axis=1), axis=0).stack()


In [None]:
data = pd.concat([returns.rename('returns'), data], axis=1).dropna()
data.reset_index(inplace=True)
data = data.rename(columns={'level_0':'dates', 'level_1': 'sid'})

In [None]:
est = sm.OLS(
    100*data[['returns']], 
#         data[factor_names_e])
        data[['alpha13', 'alpha11','alpha12']])
#     data[['alpha1', 'alpha2', 'alpha7', 'alpha12', 'alpha13', 'alpha14', 'alpha15']])
#     data[['alpha2', 'alpha7', 'alpha12', 'alpha13', 'alpha14', 'alpha15']])
#     data[['alpha7', 'alpha12', 'alpha13', 'alpha14']])
est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
net_alpha = output['alpha7'] + output['alpha12'] + output['alpha13'] + output['alpha14'] 
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(net_alpha,
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(5,10))

# alphalens.tears.create_returns_tear_sheet(factor_data)
# alphalens.tears.create_full_tear_sheet(factor_data)
alphalens.tears.create_summary_tear_sheet(factor_data)

In [None]:
net_alpha = output['alpha1'] + output['alpha2'] + output['alpha3'] + output['alpha4'] + output['alpha5'] + output['alpha6'] + output['alpha7'] + output['alpha8'] + output['alpha9'] + output['alpha10'] + output['alpha11'] + output['alpha13'] + output['alpha14'] 
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(net_alpha,
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(5,10))

# alphalens.tears.create_returns_tear_sheet(factor_data)
# alphalens.tears.create_full_tear_sheet(factor_data)
alphalens.tears.create_summary_tear_sheet(factor_data)