# Classifiers
### Example: Built-in vs. Generic Classifiers

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import Returns
from quantopian.research import run_pipeline

In [2]:
from quantopian.pipeline.data.morningstar import asset_classification, valuation
from quantopian.pipeline.classifiers.morningstar import Sector

# These produce teh same data, but Sector has symbolic constants and hand-written docs:
sector_generic = asset_classification.morningstar_sector_code
sector_builtin = Sector()

In [3]:
sector_generic

asset_classification<US>.morningstar_sector_code::int64

In [4]:
sector_builtin

Sector([asset_classification<US>.morningstar_sector_code], 1)

In [5]:
print("Docs for built-in Sector class:\n" + sector_builtin.__doc__)

Docs for built-in Sector class:

        Classifier that groups assets by Morningstar Sector Code.

        There are 11 possible classifications:

        * 101 - Basic Materials
        * 102 - Consumer Cyclical
        * 103 - Financial Services
        * 104 - Real Estate
        * 205 - Consumer Defensive
        * 206 - Healthcare
        * 207 - Utilities
        * 308 - Communication Services
        * 309 - Energy
        * 310 - Industrials
        * 311 - Technology

        These values are provided as integer constants on the class.

        For more information on morningstar classification codes, see:
        https://www.quantopian.com/help/fundamentals#industry-sector.
        


In [6]:
print "Symbolic Constants:"
dir(sector_builtin)[1:12]

Symbolic Constants:


['BASIC_MATERIALS',
 'COMMUNICATION_SERVICES',
 'CONSUMER_CYCLICAL',
 'CONSUMER_DEFENSIVE',
 'ENERGY',
 'FINANCIAL_SERVICES',
 'HEALTHCARE',
 'INDUSTRIALS',
 'REAL_ESTATE',
 'SECTOR_NAMES',
 'TECHNOLOGY']

In [7]:
from quantopian.pipeline.data.morningstar import valuation_ratios
# https://www.quantopian.com/posts/pipeline-classifiers-are-here

def grouped_earnings_yield_example():
    sector = Sector()
    # earning_yield = valuation_ratios.earning_yield.latest
    
    # zscored_naive = earning_yield.zscore()
    # zscored_grouped = earning_yield.zscore(groupby=sector)
    return Pipeline(
        columns={
            'sector': sector,
            # 'yield': earning_yield,
            #'yield_zscored': zscored_naive,
            #'yield_zscored_grouped': zscored_grouped,
        },
        # screen=zscored_grouped.notnull(),
    )
    
yields = run_pipeline(grouped_earnings_yield_example(), '2019-07-26', '2019-07-26')
yields.head()



Unnamed: 0,Unnamed: 1,sector
2019-07-26 00:00:00+00:00,Equity(2 [ARNC]),310
2019-07-26 00:00:00+00:00,Equity(21 [AAME]),103
2019-07-26 00:00:00+00:00,Equity(24 [AAPL]),311
2019-07-26 00:00:00+00:00,Equity(25 [ARNC_PR]),101
2019-07-26 00:00:00+00:00,Equity(41 [ARCB]),310


In [8]:
sector_df = yields.sector.unstack().fillna(0)
sector_df

Unnamed: 0,Equity(2 [ARNC]),Equity(21 [AAME]),Equity(24 [AAPL]),Equity(25 [ARNC_PR]),Equity(41 [ARCB]),Equity(52 [ABM]),Equity(53 [ABMD]),Equity(62 [ABT]),Equity(64 [GOLD]),Equity(66 [AB]),...,Equity(53369 [CNBS]),Equity(53370 [HCAT]),Equity(53371 [PDEV]),Equity(53372 [PLC]),Equity(53373 [PSM]),Equity(53374 [CSTL]),Equity(53379 [LVGO]),Equity(53380 [TOKE]),Equity(53381 [RCB]),Equity(53382 [FLLC_U])
2019-07-26 00:00:00+00:00,310,103,311,101,310,310,206,206,101,103,...,-1,311,-1,-1,-1,206,206,-1,-1,310


### Quiz 1
How many unique sectors are in the yields variable?

In [9]:
from scipy.stats import itemfreq
itemfreq(yields.sector)

array([[  -1, 2728],
       [ 101,  297],
       [ 102,  603],
       [ 103, 1531],
       [ 104,  453],
       [ 205,  219],
       [ 206,  908],
       [ 207,  138],
       [ 308,   90],
       [ 309,  317],
       [ 310,  813],
       [ 311,  732]])

In [10]:
unique, counts = np.unique(yields.sector, return_counts = True)
print(np.asarray((unique, counts)).T)

[[  -1 2728]
 [ 101  297]
 [ 102  603]
 [ 103 1531]
 [ 104  453]
 [ 205  219]
 [ 206  908]
 [ 207  138]
 [ 308   90]
 [ 309  317]
 [ 310  813]
 [ 311  732]]


In [11]:
len(unique)

12

In [12]:
len(counts)

12

In [13]:
yields.shape

(8829, 1)

In [14]:
from quantopian.pipeline.factors import AverageDollarVolume
from quantopian.pipeline.filters.morningstar import IsPrimaryShare

def masked_zscore_returns_example():
    returns = Returns(window_length=252)
    
    is_liquid = AverageDollarVolume(window_length=30).percentile_between(75, 100)
    is_primary = IsPrimaryShare()
    no_returns_outliers = returns.percentile_between(2, 98)
    base_universe = is_liquid & no_returns_outliers & is_primary
    
    masked_zscored = returns.zscore(mask=base_universe)
    
    return Pipeline(
        columns={'1YearReturns': returns}, 
        screen=masked_zscored.notnull()
    )

results2 = run_pipeline(masked_zscore_returns_example(), '2017-07-26', '2019-07-26')
results2.head()



Unnamed: 0,Unnamed: 1,1YearReturns
2017-07-26 00:00:00+00:00,Equity(2 [ARNC]),-0.179684
2017-07-26 00:00:00+00:00,Equity(24 [AAPL]),0.610342
2017-07-26 00:00:00+00:00,Equity(53 [ABMD]),0.297803
2017-07-26 00:00:00+00:00,Equity(62 [ABT]),0.194298
2017-07-26 00:00:00+00:00,Equity(67 [ADSK]),0.911319


In [15]:
returns_df = results2['1YearReturns'].unstack().fillna(0)
returns_df

Unnamed: 0,Equity(2 [ARNC]),Equity(24 [AAPL]),Equity(31 [ABAX]),Equity(41 [ARCB]),Equity(52 [ABM]),Equity(53 [ABMD]),Equity(62 [ABT]),Equity(66 [AB]),Equity(67 [ADSK]),Equity(76 [TAP]),...,Equity(52084 [VNE]),Equity(52100 [AVLR]),Equity(52121 [EPRT]),Equity(52144 [LOVE]),Equity(52159 [BJ]),Equity(52165 [DOMO]),Equity(52200 [RPAY]),Equity(52209 [ALLK]),Equity(52211 [TLRY]),Equity(52233 [BE])
2017-07-26 00:00:00+00:00,-0.179684,0.610342,0.0,0.0,0.000000,0.297803,0.194298,0.0,0.911319,-0.082025,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2017-07-27 00:00:00+00:00,-0.196155,0.519862,0.0,0.0,0.000000,0.251034,0.182805,0.0,0.914108,-0.027925,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2017-07-28 00:00:00+00:00,-0.205803,0.470509,0.0,0.0,0.000000,0.243652,0.155659,0.0,0.876682,-0.068139,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2017-07-31 00:00:00+00:00,-0.206236,0.462187,0.0,0.0,0.000000,0.239702,0.136459,0.0,0.875526,-0.113254,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2017-08-01 00:00:00+00:00,-0.201164,0.429557,0.0,0.0,0.000000,0.238645,0.113174,0.0,0.854728,-0.102743,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2017-08-02 00:00:00+00:00,-0.178297,0.463801,0.0,0.0,0.000000,0.234621,0.117307,0.0,0.924659,-0.100695,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2017-08-03 00:00:00+00:00,-0.193450,0.513793,0.0,0.0,0.000000,0.234610,0.122259,0.0,0.883490,-0.062769,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2017-08-04 00:00:00+00:00,-0.177343,0.488850,0.0,0.0,0.000000,0.218040,0.123206,0.0,0.852017,-0.074945,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2017-08-07 00:00:00+00:00,-0.192261,0.474601,0.0,0.0,0.000000,0.256363,0.115271,0.0,0.826871,-0.071113,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2017-08-08 00:00:00+00:00,-0.182619,0.485271,0.0,0.0,0.000000,0.275237,0.122004,0.0,0.849250,-0.077038,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [16]:
from quantopian.pipeline.factors import AverageDollarVolume
from quantopian.pipeline.filters.morningstar import IsPrimaryShare

def masked_zscore_returns_example():
    returns = Returns(window_length=252)
    sector = Sector()
    
    is_liquid = AverageDollarVolume(window_length=30).percentile_between(75, 100)
    is_primary = IsPrimaryShare()
    no_returns_outliers = returns.percentile_between(2, 98)
    base_universe = is_liquid & no_returns_outliers & is_primary
    
    masked_zscored = returns.demean(groupby=Sector()).zscore(mask=base_universe)
    
    return Pipeline(
        columns={'1YearReturns': returns, 'sector': sector}, 
        screen=masked_zscored.notnull()
    )

results2 = run_pipeline(masked_zscore_returns_example(), '2017-07-26', '2019-07-26')
results2.head()



Unnamed: 0,Unnamed: 1,1YearReturns,sector
2017-07-26 00:00:00+00:00,Equity(2 [ARNC]),-0.179684,310
2017-07-26 00:00:00+00:00,Equity(24 [AAPL]),0.610342,311
2017-07-26 00:00:00+00:00,Equity(53 [ABMD]),0.297803,206
2017-07-26 00:00:00+00:00,Equity(62 [ABT]),0.194298,206
2017-07-26 00:00:00+00:00,Equity(67 [ADSK]),0.911319,311


### Ranked returns

In [28]:
from quantopian.pipeline.factors import AverageDollarVolume
from quantopian.pipeline.filters.morningstar import IsPrimaryShare

def masked_zscore_returns_example():
    returns = Returns(window_length=252)
    sector = Sector()
    
    is_liquid = AverageDollarVolume(window_length=30).percentile_between(25, 100)
    is_primary = IsPrimaryShare()
    no_returns_outliers = returns.percentile_between(2, 98)
    base_universe = is_liquid & no_returns_outliers & is_primary
    
    masked_zscored = returns.demean(groupby=Sector()).zscore(mask=base_universe)
    ranked_return = returns.rank()
    
    return Pipeline(
        columns={'1YearReturns': returns, 'sector': sector, 'ranked_return': ranked_return}, 
        screen=masked_zscored.notnull()
    )

results2 = run_pipeline(masked_zscore_returns_example(), '2017-07-26', '2019-07-26')
results2.head()



Unnamed: 0,Unnamed: 1,1YearReturns,ranked_return,sector
2017-07-26 00:00:00+00:00,Equity(2 [ARNC]),-0.179684,946.0,310
2017-07-26 00:00:00+00:00,Equity(24 [AAPL]),0.610342,6456.0,311
2017-07-26 00:00:00+00:00,Equity(31 [ABAX]),0.076093,3104.0,206
2017-07-26 00:00:00+00:00,Equity(41 [ARCB]),0.265384,5160.0,310
2017-07-26 00:00:00+00:00,Equity(52 [ABM]),0.201596,4641.0,310


In [29]:
results2 = results2.sort_values(by=['ranked_return'], ascending = False)

In [30]:
results2.filter(like='2019-07-26', axis = 0).head(50)

Unnamed: 0,Unnamed: 1,1YearReturns,ranked_return,sector
2019-07-26 00:00:00+00:00,Equity(22364 [EXAS]),0.854083,7097.0,206
2019-07-26 00:00:00+00:00,Equity(50967 [SAFE]),0.852824,7096.0,104
2019-07-26 00:00:00+00:00,Equity(6449 [RGEN]),0.85271,7095.0,206
2019-07-26 00:00:00+00:00,Equity(52115 [EIDX]),0.851832,7094.0,206
2019-07-26 00:00:00+00:00,Equity(44757 [NRC]),0.846499,7093.0,206
2019-07-26 00:00:00+00:00,Equity(40399 [IPHI]),0.844657,7092.0,311
2019-07-26 00:00:00+00:00,Equity(51746 [VCTR]),0.843233,7091.0,103
2019-07-26 00:00:00+00:00,Equity(46498 [REPH]),0.828678,7088.0,206
2019-07-26 00:00:00+00:00,Equity(35259 [LRN]),0.809294,7087.0,205
2019-07-26 00:00:00+00:00,Equity(24827 [RCII]),0.807875,7086.0,310
