In [None]:
import numpy as np
import statsmodels.api as sm
import math
import pandas as pd
from quantopian.research import run_pipeline, returns, get_pricing
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns, PercentChange, SimpleMovingAverage, BusinessDaysSincePreviousEvent, BusinessDaysUntilNextEvent
from quantopian.pipeline.filters import QTradableStocksUS
from quantopian.pipeline.data import Fundamentals
from quantopian.pipeline.data import factset
from quantopian.pipeline.data.psychsignal import stocktwits
from quantopian.pipeline import factors, filters, classifiers
import alphalens
from scipy.stats.mstats import winsorize
from sklearn import preprocessing
from scipy.stats.mstats import gmean
from zipline.utils.numpy_utils import (
    repeat_first_axis,
    repeat_last_axis,
)
from quantopian.pipeline.data.factset import RBICSFocus
import quantopian.pipeline.data.factset.estimates as fe


In [None]:
#Factors taken from https://www.quantopian.com/posts/alpha-combination-via-clustering
WIN_LIMIT = 0
QL = 66

def signalize(df):
    z = (df.rank() - 0.5)/df.count()
    return z.replace(np.nan, z.mean())

def preprocess(a):
    a = a.astype(np.float64)
    a[np.isinf(a)] = np.nan
    a = np.nan_to_num(a - np.nanmean(a))
    a = winsorize(a, limits=[WIN_LIMIT,WIN_LIMIT])
    
    return preprocessing.scale(a)
        
####Testing Var - banking related var growth and var/ta ratio
#ceded_premium - NO
#cost_of_revenue - WORKS (s/r)
#credit_card - WORkS_P(s)
#credit_losses_provision - NO
#credit_risk_provisions - No data
#dividend_income  WORKS(r)
#earningsfrom_equity_interest_net_of_tax - Yes (s)
#fees_and_commissions - WORKS (R)
#foreign_exchange_trading_gains - WORKS(R)
#gain_on_sale_of_security - NO
#gainon_investment_properties -  no data 
# Fundamentals.gainon_saleof_loans - WORKS (R)
#Fundamentals.interest_income - NO
#Fundamentals.interest_income_after_provision_for_loan_loss - NO
#Fundamentals.interest_income_from_deposits  - Works (s)
#Fundamentals.interest_income_from_federal_funds_sold_and_securities_purchase_under_agreements_to_resell (R)
#Fundamentals.interest_income_from_leases (R)
#Fundamentals.interest_income_from_securities (s)
#Fundamentals.interest_income_non_operating (s)
#Fundamentals.interest_income_other_operating_income - no data
#Fundamentals.investment_banking_profit - NO
#Fundamentals.non_interest_income - (s/R)
#Fundamentals.operating_income - WORKS but correlated
#Fundamentals.other_operating_income_total - no data
#Fundamentals.policyholder_dividends - WORKS
#Fundamentals.policyholder_interest - WORKS
#Fundamentals.securities_activities - NO
#Fundamentals.service_charge_on_depositor_accounts - NO
#Fundamentals.total_money_market_investments - NO
#Fundamentals.total_premiums_earned - WORKS (R)
#Fundamentals.total_revenue - WORKS  but correlated
#Fundamentals.trading_gain_loss -  WORKS but little data (Rs)
#Fundamentals.underwriting_expenses - NO
#advancesfrom_central_banks - No data
#assets_held_for_sale - NO
#bank_loans_total - No data

#cash  - WORKS (R) D
#claims_outstanding - no data
#convertible_loans_current - no data
#current_assets -> (-alpha93) D
#current_deferred_taxes_assets -> (-alpha93/-alpha94/-alpha95) #D
#current_deferred_taxes_liabilities -> (alpha91/alpha95) #D
#current_notes_payable -> (alpha94) #D
#current_provisions -> (alpha95) #D
#customer_acceptances - NO
#customer_accounts - (alpha91/alpha92  - sum works (dd)) #D
#debt_securities - no data
#deferred_assets - NO
#deferred_costs - No
#deferred_tax_assets - alpha92 #D
#deferred_tax_liabilities_total - No
#deposits_madeunder_assumed_reinsurance_contract - No data
#depositsby_bank - alpha91/alpha94  #D
#derivative_assets  - NO
#dividends_payable - NO
#equity_investments - NO
#federal_funds_purchased - alpha96 #D
#federal_funds_sold - alpha91/alpha92    #D
#financial_assets - NO
#financial_liabilities_current - No data
#fixed_maturity_investments - NO
#foreclosed_assets - (-alpha96) #D
#future_policy_benefits - NO
#gains_losses_not_affecting_retained_earnings - NO
#hedging_assets_current - NO
#held_to_maturity_securities -  NO
#income_tax_payable - NO
#insurance_contract_assets - NO
#insurance_contract_liabilities - NO
#insurance_payables - NO
#interest_bearing_deposits_assets - NO
#interest_payable - NO
#investment_contract_liabilities - no data
#investment_properties - alpha93/alpha94/alpha96
#investmentin_financial_assets - alpha93/alpha96
#investments_and_advances  - alpha95
#investments_in_other_ventures_under_equity_method - NO
#investmentsin_associatesat_cost - alpha95
#investmentsin_subsidiariesat_cost - alpha94/alpha96
#investmentsin_joint_venturesat_cost - alpah96
#line_of_credit - NO
#loans_held_for_sale - NO
#loansand_advancesto_bank - NO
#loansand_advancesto_customer - NO
#long_term_investments - alpha93/alpha95
#long_term_provisions - alpah92
#minority_interest_balance_sheet - alpha93
#money_market_investments - (-alpha91)
#mortgage_and_consumerloans - NO
# mortgage_loan - NO
#policyholder_funds - alpha95
#prepaid_assets - NO
#properties - alpha94
#provisions_total - no data
#real_estate - no data
#receivables - NO
#regulatory_assets - no data
#reinsurance_assets - no data
#reinsurance_balances_payable - NO
#reinsurance_receivables - NO
#restricted_cash - NO
#restricted_investments - alpha94
#retained_earnings - NO
#securities_and_investments - alpha91
#securities_lending_collateral -NO
#securities_loaned - NO
#security_borrowed - (-alpha96)
#security_sold_not_yet_repurchased - NO
#short_term_investments_available_for_sale - NO
#short_term_investments_held_to_maturity - no data
#subordinated_liabilities - alpha91/alpha94
#tangible_book_value - NO
#tax_assets_total - no data
#total_deposits - NO
#total_equity - alpha96
#total_non_current_assets - (-alpha93)
#trading_assets - NO

#Cash to TA
class CashTa_mean(CustomFactor):  
    inputs = [Fundamentals.cash, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)

class CashTa_stability(CustomFactor):  
    inputs = [Fundamentals.cash, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)

# Current Assets to TA
class CurrentAssetsTa_stability(CustomFactor):  
    inputs = [Fundamentals.current_assets, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = -preprocess(mean/std)


        
#current_deferred_taxes_assets  to TA
class CDTATa_mean(CustomFactor):  
    inputs = [Fundamentals.current_deferred_taxes_assets, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = -preprocess(mean)
        
        

class CDTATa_stability(CustomFactor):  
    inputs = [Fundamentals.current_deferred_taxes_assets, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = -preprocess(mean/std)
        
        
#current_deferred_taxes_liabilities to TA mean
#current_deferred_taxes_liabilities growth mean


class CDTLTa_mean(CustomFactor):  
    inputs = [Fundamentals.current_deferred_taxes_liabilities, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean)


class CDTL_growth_mean(CustomFactor):  
    inputs = [Fundamentals.current_deferred_taxes_liabilities]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)

        
#current_notes_payable -> (alpha94) 
class CNPTa_mean(CustomFactor):  
    inputs = [Fundamentals.current_notes_payable, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean)



#current_provisions -> (alpha95)
class CPTa_std(CustomFactor):  
    inputs = [Fundamentals.current_notes_payable, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)


#customer_accounts - (alpha91/alpha92  - sum works (dd))
class CustAccnts_growth_dd(CustomFactor):  
    inputs = [Fundamentals.customer_accounts]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean) + preprocess(-std)

        
#deferred_tax_assets - alpha92
class DTA_growth_std(CustomFactor):  
    inputs = [Fundamentals.deferred_tax_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)
        
    
#depositsby_bank - alpha91/alpha94
class DepositsBank_growth_mean(CustomFactor):  
    inputs = [Fundamentals.depositsby_bank]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)


class DepositsBankTa_mean(CustomFactor):  
    inputs = [Fundamentals.depositsby_bank, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)
        

#federal_funds_purchased - alpha96
class FdrlsFundPurchTa_stability(CustomFactor):  
    inputs = [Fundamentals.federal_funds_purchased, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)



#federal_funds_sold - alpha91/alpha92
class FdrlFundsSold_growth_mean(CustomFactor):  
    inputs = [Fundamentals.federal_funds_sold]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)

        
class FdrlFundsSold_growth_std(CustomFactor):  
    inputs = [Fundamentals.federal_funds_sold]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)
        
        
#foreclosed_assets - (-alpha96)
class ForeclosedAssetsTa_stability(CustomFactor):  
    inputs = [Fundamentals.foreclosed_assets, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = -preprocess(mean/std)


#investment_properties - alpha93/alpha94/alpha96
class InvPropTa_mean(CustomFactor):  
    inputs = [Fundamentals.investment_properties, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)

class InvPropTa_std(CustomFactor):  
    inputs = [Fundamentals.investment_properties, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)
    
class InvPropTa_stability(CustomFactor):  
    inputs = [Fundamentals.investment_properties, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)

        
        
#investmentin_financial_assets - alpha93/alpha96
class InvFinAssts_growth_stability(CustomFactor):  
    inputs = [Fundamentals.investmentin_financial_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)

class InvFinAsstsTa_stability(CustomFactor):  
    inputs = [Fundamentals.investmentin_financial_assets, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)


#investments_and_advances  - alpha95
class InvAndAdvTa_stability(CustomFactor):  
    inputs = [Fundamentals.investments_and_advances, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)


#investmentsin_associatesat_cost - alpha95
class InvAssTa_stability(CustomFactor):  
    inputs = [Fundamentals.investmentsin_associatesat_cost, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)


#investmentsin_subsidiariesat_cost - alpha94/alpha96
class InvSubdrTa_mean(CustomFactor):  
    inputs = [Fundamentals.investmentsin_subsidiariesat_cost, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)

        
class InvSubdrTa_stability(CustomFactor):  
    inputs = [Fundamentals.investmentsin_subsidiariesat_cost, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(std/mean)



#investmentsin_joint_venturesat_cost - alpah96
class InvJntVentrTa_stability(CustomFactor):  
    inputs = [Fundamentals.investmentsin_joint_venturesat_cost, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(std/mean)


#long_term_investments - alpha93/alpha95
class LTI_growth_stability(CustomFactor):  
    inputs = [Fundamentals.long_term_investments]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)

        
class LTITa_std(CustomFactor):  
    inputs = [Fundamentals.long_term_investments, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)


#long_term_provisions - alpah92
class LTP_growth_std(CustomFactor):  
    inputs = [Fundamentals.long_term_provisions]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)



#minority_interest_balance_sheet - alpha93
class MIBS_growth_stability(CustomFactor):  
    inputs = [Fundamentals.minority_interest_balance_sheet]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)


#money_market_investments - (-alpha91)
class MMI_growth_mean(CustomFactor):  
    inputs = [Fundamentals.money_market_investments]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        out[:] = -preprocess(mean)
        

#policyholder_funds - alpha95
class PolicyFndsTa_std(CustomFactor):  
    inputs = [Fundamentals.policyholder_funds, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)


#properties - alpha94
class PropertiesTa_mean(CustomFactor):  
    inputs = [Fundamentals.properties, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)
        
#restricted_investments - alpha94
class RestInvTa_mean(CustomFactor):  
    inputs = [Fundamentals.restricted_investments, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)

#securities_and_investments - alpha91
class SecInv_growth_mean(CustomFactor):  
    inputs = [Fundamentals.securities_and_investments]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)


#security_borrowed - (-alpha96)
class SecuritiesBorrowedTa_stability(CustomFactor):  
    inputs = [Fundamentals.security_borrowed, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)


#subordinated_liabilities - alpha91/alpha94
class SubordLiab_growth_mean(CustomFactor):  
    inputs = [Fundamentals.subordinated_liabilities]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)

class SubordLiabTa_mean(CustomFactor):  
    inputs = [Fundamentals.subordinated_liabilities, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)

#total_equity - alpha96
class TotEqTa_stability(CustomFactor):  
    inputs = [Fundamentals.total_equity, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)


#total_non_current_assets - (-alpha93)
class TotNCA_growth_stability(CustomFactor):  
    inputs = [Fundamentals.total_non_current_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)

class TrdngLiabTa_stability(CustomFactor):  
    inputs = [Fundamentals.trading_liabilities, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)
        
        
x = Fundamentals.trading_liabilities

class Var_growth_mean(CustomFactor):  
    inputs = [x]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)

class Var_growth_std(CustomFactor):  
    inputs = [x]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)

        
class Var_growth_stability(CustomFactor):  
    inputs = [x]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, var):  
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        arr = np.diff(arr, axis=0)/arr[1:]
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)


class VarTa_mean(CustomFactor):  
    inputs = [x, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        out[:] = preprocess(mean)

class VarTa_std(CustomFactor):  
    inputs = [x, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        std = np.std(arr, axis=0)
        out[:] = preprocess(-std)
    
class VarTa_stability(CustomFactor):  
    inputs = [x, Fundamentals.total_assets]  
    window_length = QL*9
    window_safe = True
    
    def compute(self, today, assets, out, v, ta):  
        var  = v/ta
        arr = [var[-1],
            var[-QL],
            var[-QL*2],
            var[-QL*3],
            var[-QL*4],
            var[-QL*5],
            var[-QL*6],
           var[-QL*7],
         var[-QL*8]]
        
        mean = np.mean(arr, axis=0)
        std = np.std(arr, axis=0)
        out[:] = preprocess(mean/std)
        
        
sector = RBICSFocus.l1_name.latest
base_universe = QTradableStocksUS() & sector.eq('Finance')

guidance_days_prev = BusinessDaysSincePreviousEvent(inputs=[fe.Actuals.slice('SALES', 'qf', 0).asof_date])  

        
f1a = CashTa_mean(mask = base_universe)
f1b = CashTa_stability(mask = base_universe)
f2 = CurrentAssetsTa_stability(mask = base_universe)
f3a = CDTATa_mean(mask = base_universe)      
f3b = CDTATa_stability(mask = base_universe)
f4a = CDTLTa_mean(mask = base_universe)
f4b = CDTL_growth_mean(mask = base_universe)
f5 = CNPTa_mean(mask = base_universe)
f6 = CPTa_std(mask = base_universe)
f7 = CustAccnts_growth_dd(mask = base_universe)
f8 = DTA_growth_std(mask = base_universe)
f9a = DepositsBank_growth_mean(mask = base_universe)
f9b = DepositsBankTa_mean(mask = base_universe)
f10 = FdrlsFundPurchTa_stability(mask = base_universe)
f11a = FdrlFundsSold_growth_mean(mask = base_universe)
f11b = FdrlFundsSold_growth_std(mask = base_universe)
f12 = ForeclosedAssetsTa_stability(mask = base_universe)
f13a = InvPropTa_mean(mask = base_universe)
f13b = InvPropTa_std(mask = base_universe)
f13c = InvPropTa_stability(mask = base_universe)
f14a = InvFinAssts_growth_stability(mask = base_universe)
f14b = InvFinAsstsTa_stability(mask = base_universe)
f15 = InvAndAdvTa_stability(mask = base_universe)
f16 = InvAssTa_stability(mask = base_universe)
f17a = InvSubdrTa_mean(mask = base_universe)
f17b = InvSubdrTa_stability(mask = base_universe)
f18 = InvJntVentrTa_stability(mask = base_universe)
f19a = LTI_growth_stability(mask = base_universe)
f19b = LTITa_std(mask = base_universe)
f20 = LTP_growth_std(mask = base_universe)
f21 = MIBS_growth_stability(mask = base_universe)
f22 = MMI_growth_mean(mask = base_universe)
f23 = PolicyFndsTa_std(mask = base_universe)
f24 = PropertiesTa_mean(mask = base_universe)
f25 = RestInvTa_mean(mask = base_universe)
f26 = SecInv_growth_mean(mask = base_universe)
f27 = SecuritiesBorrowedTa_stability(mask = base_universe)
f28a = SubordLiab_growth_mean(mask = base_universe)
f28b = SubordLiabTa_mean(mask = base_universe)
f29 = TotEqTa_stability(mask = base_universe)
f30 = TotNCA_growth_stability(mask = base_universe)
f31 = TrdngLiabTa_stability(mask = base_universe)


alpha91 = Var_growth_mean(mask = base_universe)
alpha92 = Var_growth_std(mask = base_universe)
alpha93 = Var_growth_stability(mask = base_universe)

alpha94 = VarTa_mean(mask = base_universe)
alpha95 = VarTa_std(mask = base_universe)
alpha96 = VarTa_stability(mask = base_universe)


pipe_alpha_factors = Pipeline(
    columns={
        
        
        'f1a':f1a,
                'f1b':f1b,
                'f2':f2,
                'f3a':f3a,
                'f3b':f3b,
                'f4a':f4a,
         'f4b':f4b,
                'f5':f5,
                'f6':f6,
                'f7':f7,
                'f8':f8,
                'f9a':f9a,
'f9b':f9b,
                'f10':f10,
                'f11a':f11a,
                'f11b':f11b,
        
                'f12':f12,
                'f13a':f13a,
'f13b':f13b,
                'f13c':f13c,
                'f14a':f14a,
                'f14b':f14b,
                'f15':f15,
                'f16':f16,
'f17a':f17a,
                'f17b':f17b,
                'f18':f18,
                'f19a':f19a,
                'f19b':f19b,
                'f21':f21,
            'f22':f22,
                'f23':f23,
                'f24':f24,
                'f25':f25,
                'f26':f26,
                'f27':f27,
'f28a':f28a,
                'f28b':f28b,
                'f29':f29,
                'f30':f30,
                'f31':f31,

                            
        'alpha91': alpha91,
        'alpha92': alpha92,
        'alpha93': alpha93,        

        'alpha94': alpha94,
        'alpha95': alpha95,
        'alpha96': alpha96,

        'guidance_days_prev': guidance_days_prev
    },
    screen=base_universe
)   

output = run_pipeline(pipe_alpha_factors, '2015-06-01', '2018-09-01')

In [None]:
assets = output.index.levels[1].unique()
# We need to get a little more pricing data than the 
# length of our factor so we can compare forward returns.
# We'll tack on another month in this example.
pricing = get_pricing(assets, start_date='2015-06-01', end_date='2018-09-01', fields='close_price')


In [None]:
# print data['alpha87'].tail(10)

In [None]:
factor_names = ['f1a', 'f1b', 'f2', 'f3a', 'f3b', 'f4a', 'f4b',
'f5', 'f6', 'f7', 'f8', 'f9a', 'f9b', 'f10', 'f11a', 'f11b', 'f12', 'f13a', 'f13b', 'f13c',
'f14a', 'f14b', 'f15', 'f16', 'f17a', 'f17b', 'f18', 'f19a', 'f19b', 'f21', 
'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28a', 'f28b', 'f29',
'f30', 'f31',
'alpha91', 'alpha92', 'alpha93', 'alpha94', 'alpha95', 'alpha96']

def wmean(name):
    return (output['mkt_cap']*output[name]).sum(level=0)/output['mkt_cap'].mean(level=0)

# data = pd.DataFrame()
data = output[['guidance_days_prev']]

#Demean the alphas
for name in factor_names:
    output['s'+name] = output[name]
    data[name] = output['s'+name].sub(output['s'+name].mean(level=0), level=0)
#     output[name] = output[name].sub(wmean(name), level=0)



In [None]:
returns = pricing.pct_change(periods=5).shift(-5)
#Demean the returns
returns = returns.sub(returns.mean(axis=1), axis=0).stack()
data = pd.concat([returns.rename('returns'), data], axis=1).dropna()

In [None]:
data.reset_index(inplace=True)
data = data.rename(columns={'level_0':'dates', 'level_1': 'sid'})

In [None]:
# print data['alpha91'].corr(data['alpha92'])
# print data['alpha94'].head(10)

In [None]:
data['dd'] = data['alpha91'] + data['alpha92']
significant_factor_names = [
    'f1a', 'f1b', 'f2', 'f3a', 'f3b', 'f4a', 'f4b',
'f5', 'f6', 'f7', 'f8', 'f9a', 'f9b', 'f10', 'f11a', 'f11b', 'f12', 'f13a', 'f13b', 'f13c',
'f14a', 'f14b', 'f15', 'f16', 'f17a', 'f17b', 'f18', 'f19a', 'f19b', 'f21', 
'f22', 'f23', 'f24', 'f25', 'f26', 'f27', 'f28a', 'f28b', 'f29',
'f30', 'f31']

est = sm.OLS(
    100*data[['returns']], 
        data[significant_factor_names])

est2 = est.fit(cov_type='cluster', cov_kwds={'groups': data['dates']})

print est2.summary()