In [None]:
import numpy as np
import statsmodels.api as sm
import math
import pandas as pd
from quantopian.research import run_pipeline, returns, get_pricing
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, PercentChange, SimpleMovingAverage, BusinessDaysSincePreviousEvent, BusinessDaysUntilNextEvent
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data import factset
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.pipeline import factors, filters, classifiers
import alphalens
from scipy.stats.mstats import winsorize
from sklearn import preprocessing
from scipy.stats.mstats import gmean
from zipline.utils.numpy_utils import (
    repeat_first_axis,
    repeat_last_axis,
)
# from quantopian.pipeline.data.factset import RBICSFocus
from quantopian.pipeline.classifiers.fundamentals import Sector

import quantopian.pipeline.data.factset.estimates as fe


In [None]:
#Factors taken from https://www.quantopian.com/posts/alpha-combination-via-clustering
WIN_LIMIT = 0
QLq = 80

def signalize(df):
    z = (df.rank() - 0.5)/df.count()
    return z.replace(np.nan, z.mean())

def preprocess(a):
    a = a.astype(np.float64)
    a[np.isinf(a)] = np.nan
    a = np.nan_to_num(a - np.nanmean(a))
    a = winsorize(a, limits=[WIN_LIMIT,WIN_LIMIT])
    return preprocessing.scale(a)


class NII_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.net_interest_income]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
              
              
class TA_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.total_assets]  
    window_length = QLq
    window_safe = True
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        
        
class TI_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.total_investments]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)


                
class TS_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.trading_securities]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        

class CrCrd_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.credit_card]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        
        
        
class DivIncm_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.dividend_income]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        
              
class EqtyIntrstEarng_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.earningsfrom_equity_interest_net_of_tax]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
   
        
class FnC_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.fees_and_commissions]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        
        
class Fetg_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.foreign_exchange_trading_gains]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        
class GainLoans_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.gainon_saleof_loans]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
    
            
class IncomeDeposits_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.interest_income_from_deposits]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
               
               
class IncomeFdrlFunds_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.interest_income_from_federal_funds_sold_and_securities_purchase_under_agreements_to_resell]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        
                
class LeaseIncome_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.interest_income_from_leases]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        

class IntrstIncmSec_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.interest_income_from_securities]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)

        
class IntrstIncmNonOper_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.interest_income_non_operating]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
               
               
class NonInrstIncm_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.non_interest_income]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)


class PolicyHldrIntrst_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.policyholder_interest]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
               
        
class PolicyDiv_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.policyholder_dividends]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        
        
class TotalPremium_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.total_premiums_earned]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        
               
class TradingGL_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.trading_gain_loss]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
               
        
class Cash_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.cash]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
        
        
class CDTA_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.current_deferred_taxes_assets]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
               
                       
class CDTL_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.current_deferred_taxes_liabilities]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
               
               
class CNP_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.current_notes_payable]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
    
        
class CustAccnts_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.customer_accounts]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
    
        
class DepositsBank_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.depositsby_bank]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)

        
class FdrlsFundPurch_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.federal_funds_purchased]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)

               
class FdrlFundsSold_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.federal_funds_sold]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)
               

class ForeclosedAssets_growth_q0(CustomFactor):  
    inputs = [USEquityPricing.close, Fundamentals.foreclosed_assets]  
    window_length = QLq
    window_safe = True
    
    def compute(self, today, assets, out, close, var):
        close_g = close[-1]/close[-QLq] - 1
        var_g = var[-1]/var[-QLq] - 1
        out[:] = preprocess(var_g/close_g)

        
####Testing Var - banking related var growth and var/ta ratio
#ceded_premium - NO
#cost_of_revenue - WORKS (s/r)
#credit_card - WORkS_P(s)
#credit_losses_provision - NO
#credit_risk_provisions - No data
#dividend_income  WORKS(r)
#earningsfrom_equity_interest_net_of_tax - Yes (s)
#fees_and_commissions - WORKS (R)
#foreign_exchange_trading_gains - WORKS(R)
#gain_on_sale_of_security - NO
#gainon_investment_properties -  no data 
# Fundamentals.gainon_saleof_loans - WORKS (R)
#Fundamentals.interest_income - NO
#Fundamentals.interest_income_after_provision_for_loan_loss - NO
#Fundamentals.interest_income_from_deposits  - Works (s)
#Fundamentals.interest_income_from_federal_funds_sold_and_securities_purchase_under_agreements_to_resell (R)
#Fundamentals.interest_income_from_leases (R)
#Fundamentals.interest_income_from_securities (s)
#Fundamentals.interest_income_non_operating (s)
#Fundamentals.interest_income_other_operating_income - no data
#Fundamentals.investment_banking_profit - NO
#Fundamentals.non_interest_income - (s/R)
#Fundamentals.operating_income - WORKS but correlated
#Fundamentals.other_operating_income_total - no data
#Fundamentals.policyholder_dividends - WORKS
#Fundamentals.policyholder_interest - WORKS
#Fundamentals.securities_activities - NO
#Fundamentals.service_charge_on_depositor_accounts - NO
#Fundamentals.total_money_market_investments - NO
#Fundamentals.total_premiums_earned - WORKS (R)
#Fundamentals.total_revenue - WORKS  but correlated
#Fundamentals.trading_gain_loss -  WORKS but little data (Rs)
#Fundamentals.underwriting_expenses - NO
#advancesfrom_central_banks - No data
#assets_held_for_sale - NO
#bank_loans_total - No data
#cash  - WORKS (R)

# x = Fundamentals.claims_outstanding

# class Var_growth_mean(CustomFactor):  
#     inputs = [x]  
#     window_length = QL*9
#     window_safe = True
    
#     def compute(self, today, assets, out, var):  
#         arr = var[-1],
#             var[-QL],
#             var[-QL*2],
#             var[-QL*3],
#             var[-QL*4],
#             var[-QL*5],
#             var[-QL*6],
#            var[-QL*7],
#          var[-QL*8]]
        
#         arr = np.diff(arr, axis=0)/arr[1:]
#         mean = np.mean(arr, axis=0)
#         out[:] = preprocess(mean)

# class Var_growth_std(CustomFactor):  
#     inputs = [x]  
#     window_length = QL*9
#     window_safe = True
    
#     def compute(self, today, assets, out, var):  
#         arr = var[-1],
#             var[-QL],
#             var[-QL*2],
#             var[-QL*3],
#             var[-QL*4],
#             var[-QL*5],
#             var[-QL*6],
#            var[-QL*7],
#          var[-QL*8]]
        
#         arr = np.diff(arr, axis=0)/arr[1:]
#         std = np.std(arr, axis=0)
#         out[:] = preprocess(-std)

        
# class Var_growth_stability(CustomFactor):  
#     inputs = [x]  
#     window_length = QL*9
#     window_safe = True
    
#     def compute(self, today, assets, out, var):  
#         arr = var[-1],
#             var[-QL],
#             var[-QL*2],
#             var[-QL*3],
#             var[-QL*4],
#             var[-QL*5],
#             var[-QL*6],
#            var[-QL*7],
#          var[-QL*8]]
        
#         arr = np.diff(arr, axis=0)/arr[1:]
#         mean = np.mean(arr, axis=0)
#         std = np.std(arr, axis=0)
#         out[:] = preprocess(mean/std)


# class VarTa_mean(CustomFactor):  
#     inputs = [x, Fundamentals.total_assets]  
#     window_length = QL*9
#     window_safe = True
    
#     def compute(self, today, assets, out, v, ta):  
#         var  = v/ta
#         arr = var[-1],
#             var[-QL],
#             var[-QL*2],
#             var[-QL*3],
#             var[-QL*4],
#             var[-QL*5],
#             var[-QL*6],
#            var[-QL*7],
#          var[-QL*8]]
        
#         mean = np.mean(arr, axis=0)
#         out[:] = preprocess(mean)

# class VarTa_std(CustomFactor):  
#     inputs = [x, Fundamentals.total_assets]  
#     window_length = QL*9
#     window_safe = True
    
#     def compute(self, today, assets, out, v, ta):  
#         var  = v/ta
#         arr = var[-1],
#             var[-QL],
#             var[-QL*2],
#             var[-QL*3],
#             var[-QL*4],
#             var[-QL*5],
#             var[-QL*6],
#            var[-QL*7],
#          var[-QL*8]]
        
#         std = np.std(arr, axis=0)
#         out[:] = preprocess(-std)
    
# class VarTa_stability(CustomFactor):  
#     inputs = [x, Fundamentals.total_assets]  
#     window_length = QL*9
#     window_safe = True
    
#     def compute(self, today, assets, out, v, ta):  
#         var  = v/ta
#         arr = var[-1],
#             var[-QL],
#             var[-QL*2],
#             var[-QL*3],
#             var[-QL*4],
#             var[-QL*5],
#             var[-QL*6],
#            var[-QL*7],
#          var[-QL*8]]
        
#         mean = np.mean(arr, axis=0)
#         std = np.std(arr, axis=0)
#         out[:] = preprocess(mean/std)
        
        
# sector = RBICSFocus.l1_name.latest
# base_universe = QTradableStocksUS() & sector.eq('Finance')

base_universe = QTradableStocksUS() & Sector().element_of( [103, 104])

guidance_days_prev = BusinessDaysSincePreviousEvent(inputs=[fe.Actuals.slice('SALES', 'qf', 0).asof_date])  
# guidance_days_next = BusinessDaysUntilNextEvent(inputs=[fe.Actuals.slice('SALES', 'qf', 0).asof_date])  


#Pure Financials Factors

alpha1 = NII_growth_q0(mask = base_universe)
alpha2 = TA_growth_q0(mask = base_universe)
alpha3 = TI_growth_q0(mask = base_universe)
# alpha4 = TotalInv_growth_q0(mask = base_universe) 
alpha5 = TS_growth_q0(mask = base_universe)
# alpha6 = CR_growth_q0(mask = base_universe)
alpha7 = CrCrd_growth_q0(mask = base_universe)
alpha8 = DivIncm_growth_q0(mask = base_universe)
alpha9 = EqtyIntrstEarng_growth_q0(mask = base_universe)
alpha10 = FnC_growth_q0(mask = base_universe)
alpha11 = Fetg_growth_q0(mask = base_universe)
alpha12 = GainLoans_growth_q0(mask = base_universe)
alpha13 = IncomeDeposits_growth_q0(mask = base_universe)
alpha14 = IncomeFdrlFunds_growth_q0(mask = base_universe)
alpha15 = LeaseIncome_growth_q0(mask = base_universe)
alpha16 = IntrstIncmSec_growth_q0(mask = base_universe)
alpha17 = IntrstIncmNonOper_growth_q0(mask = base_universe)
alpha18 = NonInrstIncm_growth_q0(mask = base_universe)
alpha19 = PolicyHldrIntrst_growth_q0(mask = base_universe)
alpha20 = PolicyDiv_growth_q0(mask = base_universe)
alpha21 = TotalPremium_growth_q0(mask = base_universe) 
alpha22 = TradingGL_growth_q0(mask = base_universe) 
alpha23 = Cash_growth_q0(mask = base_universe) 
alpha24 = CDTA_growth_q0(mask = base_universe) 
alpha25 = CDTL_growth_q0(mask = base_universe)
alpha26 = CNP_growth_q0(mask = base_universe)
alpha27 = CustAccnts_growth_q0(mask = base_universe)
alpha28 = DepositsBank_growth_q0(mask = base_universe)
alpha29 = FdrlsFundPurch_growth_q0(mask = base_universe)
alpha30 = FdrlFundsSold_growth_q0(mask = base_universe)
alpha31 = ForeclosedAssets_growth_q0(mask = base_universe)


pipe_alpha_factors = Pipeline(
    columns={
#         'alpha1':alpha1,
#         'alpha2':alpha2,
#         'alpha3':alpha3,
#         'alpha4':alpha4,   #DNE
#         'alpha5': alpha5,
#         'alpha6':alpha6,  #DNE
        
#         'alpha7': alpha7,
#         'alpha8': alpha8,

#         'alpha9': alpha9,   
#         'alpha10': alpha10,
#         'alpha11':alpha11,
#         'alpha12':alpha12,
        
#         'alpha13':alpha13,
#         'alpha14': alpha14, 

#         'alpha15': alpha15,
#         'alpha16': alpha16,
#         'alpha17': alpha17,
#         'alpha18': alpha18,
#         'alpha19': alpha19,
#         'alpha20': alpha20,

#         'alpha21': alpha21,
#         'alpha22': alpha22,
#         'alpha23': alpha23,
#         'alpha24': alpha24,
#         'alpha25': alpha25,
#         'alpha26': alpha26,
#         'alpha27': alpha27,
        
        'alpha28': alpha28,
        
        'alpha29': alpha29,  
        'alpha30': alpha30,
        'alpha31': alpha31,  
        
                'guidance_days_prev': guidance_days_prev
            
    },
    screen=base_universe
)   

sd = '2008-06-01'
ed = '2018-09-01'
output = run_pipeline(pipe_alpha_factors, sd, ed)

In [None]:
assets = output.index.levels[1].unique()
# We need to get a little more pricing data than the 
# length of our factor so we can compare forward returns.
# We'll tack on another month in this example.
pricing = get_pricing(assets, start_date=sd, end_date=ed, fields='close_price')


In [None]:
# print data['alpha87'].tail(10)

In [None]:
#YES
# '-alpha2', 'alpha10', '-alpha19', '-alpha24', 'alpha27'
#NO
#  'alpha1', 'alpha2', 'alpha3', 'alpha5', 'alpha7', 'alpha8', 'alpha9','alpha11', 'alpha12'
# 'alpha13', 'alpha14', 'alpha15', 'alpha16', 'alpha17', 'alpha18', 'alpha20', 'alpha21', 
# 'alpha22', 'alpha23', 'alpha25', 'alpha26', 'alpha28', 'alpha29', 'alpha30', 'alpha31'


factor_names = ['alpha28', 'alpha29', 'alpha30', 'alpha31']

def wmean(name):
    return (output['mkt_cap']*output[name]).sum(level=0)/output['mkt_cap'].mean(level=0)

# data = pd.DataFrame()
data = output[['guidance_days_prev']]

#Demean the alphas
for name in factor_names:
    output['s'+name] = output[name]
    data[name] = output['s'+name].sub(output['s'+name].mean(level=0), level=0)
#     output[name] = output[name].sub(wmean(name), level=0)



In [None]:
returns = pricing.pct_change(periods=5).shift(-5)
#Demean the returns
returns = returns.sub(returns.mean(axis=1), axis=0).stack()
data = pd.concat([returns.rename('returns'), data], axis=1).dropna()

In [None]:
data.reset_index(inplace=True)
data = data.rename(columns={'level_0':'dates', 'level_1': 'sid'})

In [None]:
significant_factor_names = ['alpha28', 'alpha29', 'alpha30', 'alpha31']

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
data['d1'] = 0
data['d2'] = 0
data['d3'] = 0
data['d4'] = 0
data['d5'] = 0
data['d6'] = 0

N1 = 10
N2 = 50

data.loc[(data['guidance_days_prev'] < N1), 'd1'] = 1.0
data.loc[(data['guidance_days_prev'] > N1) & (data['guidance_days_prev'] < N2), 'd2'] = 1.0
data.loc[data['guidance_days_prev'] > N2, 'd3'] = 1.0
data.loc[(data['d2'] == 1) | (data['d3'] == 1), 'd4'] = 1.0
data.loc[(data['d1'] == 1) | (data['d2'] == 1), 'd5'] = 1.0
data.loc[(data['d1'] == 1) | (data['d3'] == 1), 'd6'] = 1.0


K = '12'
tf = 'alpha'+str(K)
f1 = tf + '_d1'
f2 = tf + '_d2'
f3 = tf + '_d3'
f4 = tf + '_d4'
f5 = tf + '_d5'
f6 = tf + '_d6'
data[f1] = data[tf]*data['d1']
data[f2] = data[tf]*data['d2']
data[f3] = data[tf]*data['d3']
data[f4] = data[tf]*data['d4']
data[f5] = data[tf]*data['d5']
data[f6] = data[tf]*data['d6']

significant_factor_names = ['alpha9_d2', 'alpha14_d3']
# significant_factor_names = ['alpha2_d4', 'alpha10_d2', 'alpha19_d2', 'alpha24_d2', 'alpha27_d3']
# significant_factor_names = [f4, 'alpha2', 'alpha19', 'alpha24', 'alpha27']

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()


In [None]:
# d = data.set_index(['dates', 'sid'])
# # net_alpha = -d['alpha2'] - d['alpha10'] - d['alpha19'] - d['alpha24'] + d['alpha27']
# net_alpha =  d['alpha24']
# factor_data = alphalens.utils.get_clean_factor_and_forward_returns(net_alpha,
#                                                                    pricing,
#                                                                    quantiles=5,
#                                                                    periods=(5,10))

# # alphalens.tears.create_returns_tear_sheet(factor_data)
# # alphalens.tears.create_full_tear_sheet(factor_data)
# alphalens.tears.create_summary_tear_sheet(factor_data)

In [None]:
# d = data.set_index(['dates', 'sid'])
# # net_alpha = -d['alpha2_d4'] - d['alpha10_d2'] - d['alpha19_d2'] - d['alpha24_d2'] + d['alpha27_d3']
# net_alpha = d['alpha27_d3']
# factor_data = alphalens.utils.get_clean_factor_and_forward_returns(net_alpha,
#                                                                    pricing,
#                                                                    quantiles=5,
#                                                                    periods=(5,10))

# # alphalens.tears.create_returns_tear_sheet(factor_data)
# # alphalens.tears.create_full_tear_sheet(factor_data)
# alphalens.tears.create_summary_tear_sheet(factor_data)

In [None]:
print data['alpha15'].head()

In [None]:
d = data.set_index(['dates', 'sid'])
net_alpha = d['alpha30']
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(net_alpha,
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(5,10))

# alphalens.tears.create_returns_tear_sheet(factor_data)
# alphalens.tears.create_full_tear_sheet(factor_data)
alphalens.tears.create_summary_tear_sheet(factor_data)