In [None]:
import numpy as np
import statsmodels.api as sm
import math
import pandas as pd
from quantopian.research import run_pipeline, returns, get_pricing
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, PercentChange, SimpleMovingAverage, BusinessDaysSincePreviousEvent, BusinessDaysUntilNextEvent
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data import factset
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.pipeline import factors, filters, classifiers
import alphalens
from scipy.stats.mstats import winsorize
from sklearn import preprocessing
from scipy.stats.mstats import gmean
from zipline.utils.numpy_utils import (
    repeat_first_axis,
    repeat_last_axis,
)
from quantopian.pipeline.data.factset import RBICSFocus
import quantopian.pipeline.data.factset.estimates as fe
from quantopian.pipeline.experimental import Momentum, ShortTermReversal, Size, Value, Volatility



In [None]:
#Factors taken from https://www.quantopian.com/posts/alpha-combination-via-clustering
sd = '2014-01-01'
ed = '2018-10-01'
WIN_LIMIT = 0
QL = 80

def signalize(df):
    z = (df.rank() - 0.5)/df.count()
    return z.replace(np.nan, z.mean())

def preprocess(a):
    a = a.astype(np.float64)
    a[np.isinf(a)] = np.nan
    a = np.nan_to_num(a - np.nanmean(a))
    a = winsorize(a, limits=[WIN_LIMIT,WIN_LIMIT])
    
    return preprocessing.scale(a)
        
####Testing Var - banking related var growth and var/ta ratio
#Fundamentals.operating_income - f3
#Fundamentals.total_revenue f1, -f3, f13

#cash
#tax_assets_total - no data
#total_deposits  (f7 , f15)
#total_equity  (-f8, f15)
#total_non_current_assets - NO
#trading_liabilities (f1)
#trading_assets  (-f5)
#accounts_payable -f14
#accounts_receivable f7
#accrued_investment_income f15, f16
#available_for_sale_securities - NO
#bank_loans_total - no data
#cash  f7, f12, f16
#current_assets f12, f15
#current_debt  -f1,-f14, -f16
#current_liabilities f1, f15
#dividends_payable -f3, f7
#equity_investments f2, -f14
#financial_assets f1, f3, -f4, f7
#financial_liabilities_current - NO
#goodwill - NO
#gross_loan -f7, f17
#interest_payable  f3, -f6, f8
#inventory   -f1
#invested_capital  f8
#line_of_credit  -f1, f5, f8, -f14
#long_term_debt -f16
#long_term_investments  -f6
#net_assets - no data
#net_debt  -f14, -f16
#net_loan  -f16
#Payables  f1, -f4, -f7, -f14
#properties - NO
#receivables f9 + f15 + f17
#restricted_cash  f1 f4 f15
#retained_earnings f12
#securities_and_investments f2, -f3, f12, f16 (but little data)
#tangible_book_value  f4
#total_partnership_capital  f5
#trading_assets    -(f5 + f9 + f17)
#trading_liabilities   (f1 + f8 + f2)
#working_capital  f12, f15
#cap_ex_reported f1, f4
#capital_expenditure  f10, f14
#cash_dividends_paid   -f6, -f17
#cash_flow_from_continuing_financing_activities  f2, -f4, -f7
#cash_flow_from_continuing_investing_activities  -(f1+f3) , f7
#cash_flow_from_continuing_operating_activities - NO
#change_in_account_payable   (f4) (f4+f8)
#change_in_accrued_expense   f6, f12
#change_in_accrued_investment_income    f1
#change_in_dividend_payable  -f1
#change_in_income_tax_payable  -  NO
#change_in_interest_payable -f1, -f4, f5
#change_in_inventory   #f13
#change_in_payable  #f4
#change_in_receivables   #f1, -f6, -f9, -f16
#change_in_restricted_cash  f8 -f14 -f17
#change_in_working_capital  f1, -f13 
#changein_accrued_income - no data
#changein_deferred_income - no data
#changes_in_account_receivables f1, f5, -f12, -f17
#changes_in_cash   -f1, f5
#classesof_cash_payments   f3, f7
#common_stock_dividend_paid  -f7 -f13
#common_stock_payments -f3 -f10
#dividend_paid_cfo f12
#dividend_received_cfo  - NO
#dividends_paid_direct - no data
#dividends_received_cfi -f8 
#dividends_received_direct - no data
#domestic_sales  f1, -f3
#earnings_losses_from_equity_investments   -f12, -f15
#financing_cash_flow - NO
#foreign_sales -f3 -f4 f7
#free_cash_flow  f11, f12
#gain_loss_on_investment_securities  f3, f7, -f11
#gain_loss_on_sale_of_ppe -f2, f7, f15
#gain_loss_on_sale_of_business   (f9 + f11 + f15)
#increase_decrease_in_deposit  -f7
#interest_paid_cfo  -f2
#interest_paid_direct    -f1, -f5
#interest_received_cfi    -f3
#interest_received_cfo    f3
#interest_received_direct  f2+f7
#investing_cash_flow   f4, -f17
#long_term_debt_issuance   f13
#long_term_debt_payments   -f1, f5
#net_business_purchase_and_sale   -(f14 + f18)
#net_income_from_continuing_operations  -f16, f17
#net_investment_purchase_and_sale   f3, f4, f8, -f17
#net_issuance_payments_of_debt  -(f4 + f13)
#net_outward_loans - no data
#operating_cash_flow  f17
#operating_gains_losses -f3, (f15 + f19)
#profiton_disposals  - NO
#purchase_of_business  - NO
#purchase_of_intangibles  f4, f14
#purchase_of_investment  f2
#purchaseof_subsidiaries  - no data
#receiptsfrom_customers   f3
#repurchase_of_capital_stock   -f3, -f10, -f19
#sale_of_business   -f18
#sale_of_investment  f1, f8, -f15
#unrealized_gain_loss_on_investment_securities  -f11 -f14

#--------#Income Statement----------------#
#administrative_expense   f1, -f16, -f17
#average_dilution_earn #field not present 
#changesin_inventoriesof_finished_goodsand_workin_progress - no data
#claimsand_paid_incurred - no data
#cost_of_revenue  f5
#dividend_income  NO
#earningsfrom_equity_interest_net_of_tax  -f14, f17
#ebit   -f3, f17
#ebitda  -f1, f17
#fee_revenue_and_other_income   f13
#fees_and_commissions   f13, f17
#feesand_commission_income   - no data
#foreign_exchange_trading_gains   -f3
#gain_losson_saleof_assets   -f14, -f16
#gain_on_sale_of_business  f3, -f16
#gain_on_sale_of_security  f5, f16 (f19)
#gainon_investment_properties - no data
#gainon_saleof_investment_property - no data
#gainon_saleof_loans   -(f4 + f8 + f10 + f12 + f14 + f18)
#gross_dividend_payment - no data
#gross_profit   f15
#interest_income   f7, f16
#interest_income_from_deposits - NO
#interest_income_from_leases  f8, -f10
#interest_income_from_loans    f2, f15 
#interest_income_from_securities   (f5 + f13 + f15)
#interest_income_non_operating  NO
#interest_income_other_operating_income   no data
#minority_interests   (f4 + f18)
#net_income_income_statement  f17
#net_income_common_stockholders  -f3*, f17
#net_income_continuous_operations f17
#net_income_discontinuous_operations  f3, -f6, f17
#net_income_extraordinary  f1, -f2, -f14
#net_income_from_tax_loss_carryforward   -f6, f7
#net_income_including_noncontrolling_interests   f17
#net_interest_income  NO
#net_investment_income f19
#non_interest_expense f18
#non_interest_income   f7, f18
#normalized_income  -f3, f17
#operating_expense  -f16
#operating_income    -f3, f17
#operating_revenue   -f3, f13
#pension_costs  - no data
#pretax_income   f1, -f3, f17
#reconciled_cost_of_revenue -f2, f5
#rent_and_landing_fees  f1
#research_and_development  -f7
#salaries_and_wages      -f7, f9, -f14
#securities_activities   -f7, f15
#selling_and_marketing_expense  f1, f18
#share_based_payments - no data
#staff_costs - no data
#stock_based_compensation_income_statement  - no data
#total_expenses   f1
#total_money_market_investments  - no data
#trading_gain_loss  - no data
#wagesand_salaries  - no data

#Ratios
#diluted_cont_eps_growth    f19
#diluted_eps_growth  f1, f19
#dps_growth    f6, f19
#equity_per_share_growth   f5, f12, -f14
#assets_turnover  NO
#cash_conversion_cycle   f2, f5
#common_equity_to_assets  -f1, f17
#current_ratio  -f4
#days_in_inventory  f3
#days_in_payment   f1, f3
#days_in_sales  f1, f3
#debtto_assets   -f12
#ebit_margin   -f3, f4, f17
#ebitda_margin   f17
#financial_leverage  f17
#fix_assets_turonver   -f3, -f8
#gross_margin    NO
#interest_coverage   -f12, -f14 , -(f12 + f14)
#inventory_turnover  -f2, -f14
#long_term_debt_equity_ratio  f2, f3, -f16
#long_term_debt_total_capital_ratio  f2, f3
#net_income_cont_ops_growth   -f7, f13, -f14
#net_income_growth   -f7, f13, -f14
#net_margin   f17
#normalized_net_profit_margin   f17
#operation_income_growth  -f7, -f12, -f18
#operation_margin   f12, f17
#operation_revenue_growth3_month_avg  f5
#payment_turnover   -f2, -f14
#pretax_margin   f17
#quick_ratio -f4, f7
#receivable_turnover   -f10
#revenue_growth    -f3, f5
#roa   -f3, f17
#roe   -f3, f17
#roic  NO
#sales_per_employee  f17, -f18
#total_debt_equity_ratio    -f16

x = Fundamentals.long_term_debt_equity_ratio

class Var_growth_mean(CustomFactor):  
    inputs = [x]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.nanmean(arr, axis=0)
        out[:] = preprocess(mean)

class Var_growth_q0(CustomFactor):  
    inputs = [x]  
    window_length = QL
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        gr = var[-1]/var[-QL] - 1.0
        out[:] = preprocess(gr)

        
class Var_growth_stability(CustomFactor):  
    inputs = [x]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.nanmean(arr, axis=0)
        std = np.nanstd(arr, axis=0)
        out[:] = preprocess(mean/std)


class Var_to_TA_mean(CustomFactor):  
    inputs = [x, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        out[:] = preprocess(mean)
    
class Var_to_TA_stability(CustomFactor):  
    inputs = [x, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        std = np.nanstd(arr, axis=0)
        out[:] = preprocess(mean/std)

        
class Var_to_TL_mean(CustomFactor):  
    inputs = [x, Fundamentals.total_debt]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        out[:] = preprocess(mean)
    
class Var_to_TL_stability(CustomFactor):  
    inputs = [x, Fundamentals.total_debt]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        std = np.nanstd(arr, axis=0)
        out[:] = preprocess(mean/std)
        
class Var_to_EV_mean(CustomFactor):  
    inputs = [x, Fundamentals.enterprise_value]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        out[:] = preprocess(mean)
    
class Var_to_EV_stability(CustomFactor):  
    inputs = [x, Fundamentals.enterprise_value]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        std = np.nanstd(arr, axis=0)
        out[:] = preprocess(mean/std)
        
        
class Var_to_TC_mean(CustomFactor):  
    inputs = [x, Fundamentals.total_capitalization]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        out[:] = preprocess(mean)
    
class Var_to_TC_stability(CustomFactor):  
    inputs = [x, Fundamentals.total_capitalization]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        std = np.nanstd(arr, axis=0)
        out[:] = preprocess(mean/std)
        

class Var_to_TE_mean(CustomFactor):  
    inputs = [x, Fundamentals.total_equity]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        out[:] = preprocess(mean)
    
class Var_to_TE_stability(CustomFactor):  
    inputs = [x, Fundamentals.total_equity]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        std = np.nanstd(arr, axis=0)
        out[:] = preprocess(mean/std)
        
        
class Var_to_TR_mean(CustomFactor):  
    inputs = [x, Fundamentals.total_revenue]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        out[:] = preprocess(mean)
    
class Var_to_TR_stability(CustomFactor):  
    inputs = [x, Fundamentals.total_revenue]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        std = np.nanstd(arr, axis=0)
        out[:] = preprocess(mean/std)
        
class Var_to_TInv_mean(CustomFactor):  
    inputs = [x, Fundamentals.total_investments]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        out[:] = preprocess(mean)
    
class Var_to_TInv_stability(CustomFactor):  
    inputs = [x, Fundamentals.total_investments]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        std = np.nanstd(arr, axis=0)
        out[:] = preprocess(mean/std)

class Var_to_MCap_mean(CustomFactor):  
    inputs = [x, USEquityPricing.close, Fundamentals.shares_outstanding]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, close, shares):  
        var  = v/(close*shares)
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        out[:] = preprocess(mean)
    
class Var_to_MCap_stability(CustomFactor):  
    inputs = [x, USEquityPricing.close, Fundamentals.shares_outstanding]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, close, shares):  
        var  = v/(close*shares)
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.nanmean(arr, axis=0)
        std = np.nanstd(arr, axis=0)
        out[:] = preprocess(mean/std)
        
       
    
#assets - liabilities = equity
#priced assets * mcap/equity 
    
base_universe = QTradableStocksUS() 
guidance_days_prev = BusinessDaysSincePreviousEvent(inputs=[fe.Actuals.slice('SALES', 'qf', 0).asof_date])  

        
f1 = Var_growth_mean(mask = base_universe)
f2 = Var_growth_q0(mask = base_universe)
f3 = Var_growth_stability(mask = base_universe)

f4 = Var_to_TA_mean(mask = base_universe)
f5 = Var_to_TA_stability(mask = base_universe)

f6 = Var_to_TL_mean(mask = base_universe)
f7 = Var_to_TL_stability(mask = base_universe)

f8 = Var_to_TC_mean(mask = base_universe)
f9 = Var_to_TC_stability(mask = base_universe)

f10 = Var_to_EV_mean(mask = base_universe)
f11 = Var_to_EV_stability(mask = base_universe)

f12 = Var_to_TE_mean(mask = base_universe)
f13 = Var_to_TE_stability(mask = base_universe)

f14 = Var_to_TR_mean(mask = base_universe)
f15 = Var_to_TR_stability(mask = base_universe)

f16 = Var_to_TInv_mean(mask = base_universe)
f17 = Var_to_TInv_stability(mask = base_universe)

f18 = Var_to_MCap_mean(mask = base_universe)
f19 = Var_to_MCap_stability(mask = base_universe)

mom = Momentum(mask = base_universe)
strv = ShortTermReversal(mask = base_universe)
sz = Size(mask = base_universe)
val = Value(mask = base_universe)
vol = Volatility(mask = base_universe)


pipe_alpha_factors = Pipeline(
    columns={
            'f1':f1,
            'f2':f2,
            'f3':f3,
            'f4':f4,
            'f5':f5,
            'f6':f6,
            'f7':f7,
            'f8':f8,
            'f9':f9,
            'f10':f10,
            'f11':f11,
            'f12':f12,
            'f13':f13,
            'f14':f14,
            'f15':f15,
            'f16': f16,
            'f17': f17,
            'f18': f18,
            'f19': f19,
        
            'momentum': mom,
            'reversal': strv,
            'size': sz,
            'value': val,
            'volatility': vol,
                  
        'guidance_days_prev': guidance_days_prev
    },
    screen=base_universe
)   

output = run_pipeline(pipe_alpha_factors, sd, ed)

In [None]:
assets = output.index.levels[1].unique()
# We need to get a little more pricing data than the 
# length of our factor so we can compare forward returns.
# We'll tack on another month in this example.
pricing = get_pricing(assets, start_date=sd, end_date=ed, fields='close_price')


In [None]:
factor_names = ['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7',
'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19', 
'momentum', 'size', 'value', 'volatility', 'reversal']

def wmean(name):
    return (output['mkt_cap']*output[name]).sum(level=0)/output['mkt_cap'].mean(level=0)

data = pd.DataFrame()
# data = output[['guidance_days_prev']]

#Demean the alphas
for name in factor_names:
    output['s'+name] = output[name]
    data[name] = output['s'+name].sub(output['s'+name].mean(level=0), level=0)
#     output[name] = output[name].sub(wmean(name), level=0)



In [None]:
returns = pricing.pct_change(periods=5).shift(-5)
#Demean the returns
returns = returns.sub(returns.mean(axis=1), axis=0).stack()
data = pd.concat([returns.rename('returns'), data], axis=1).dropna()

In [None]:
data.reset_index(inplace=True)
data = data.rename(columns={'level_0':'dates', 'level_1': 'sid'})

In [None]:
print data['f2'].corr(data['f3'])
# print data['f17'].head(10)

In [None]:
# significant_factor_names = f1, f14, f16
data['x'] = data['f12'] + data['f14'] 
# + data['f19'] + data['f9'] + data['f11']

significant_factor_names = ['f2', 'f3', 'f16'] #
# significant_factor_names = ['f1', 'momentum', 'reversal', 'size', 'value', 'volatility']

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f1'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f2'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f3'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f4'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f5'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f6'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f7'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f8'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f9'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f10'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f11'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f12'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f13'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f14'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f15'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f16'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f17'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f18'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f19'] 

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
significant_factor_names = ['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7',
'f8', 'f9', 'f10', 'f11', 'f12', 'f13', 'f14', 'f15', 'f16', 'f17', 'f18', 'f19']

# significant_factor_names = ['f1', 'f2', 'f3', 'f4', 'f5', 'f6', 'f7',
# 'f8', 'f9', 'f10', 'f11', 'f14', 'f15']


est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()

In [None]:
d = data.set_index(['dates', 'sid'])
net_alpha = d['f12']
factor_data = alphalens.utils.get_clean_factor_and_forward_returns(net_alpha,
                                                                   pricing,
                                                                   quantiles=5,
                                                                   periods=(5,10))

# alphalens.tears.create_returns_tear_sheet(factor_data)
# alphalens.tears.create_full_tear_sheet(factor_data)
alphalens.tears.create_summary_tear_sheet(factor_data)