## 正規化

Factor: RSI/Bollinger Band などのテクニカル分析指標や、PER/ESPなどのファンダメンタル指標などの数値のこと

多くのFactorは、他のFactorと比べる事が出来ない。
でも比較してモデルに盛り込みたい
そういう時は正規化だ

### demean

`demean()` は、同じ行に並んでいる数値を平均し、自分自身から引き算した数値を返します。


例えば、dfはこのようなDataFrameであれば

DATE|      AAPL|  MSFT|  MCD|   BK
---|---|---|---|---
2017-03-13|1|2|3|4
2017-03-14|1.5|2.5|3.5|1
2017-03-15|2|3|4|1.5
2017-03-16|2.5|3.5|1|2

df.demean() はこれを返します

date|  AAPL| MSFT|  MCD|    BK
---|---|---|---|---
2017-03-13|-1.500|-0.5|0.5|1.5
2017-03-14|-0.625|0.375|1.375|-1.125
2017-03-15|-0.625|0.375|1.375|-1.125
2017-03-16|0.25|-1.25|1.25|-0.25





In [None]:
import numpy as np
import pandas as pd
from scipy import stats

import seaborn as sns
import matplotlib.pyplot as plt

from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import Returns,BollingerBands, SimpleMovingAverage,CustomFactor
from quantopian.research import run_pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.filters import StaticAssets, Q500US, Q1500US, Q3000US, QTradableStocksUS
from quantopian.pipeline.classifiers.fundamentals import Sector  

In [None]:
def demean_example():
    
    returns = Returns(window_length=30) # == close.pct_change(29) 
    
    # 同日の他の銘柄のReturnの平均と、自分がどのくらい違うか
    demeaned_returns = returns.demean()
    # その日のClose
    adj_close = USEquityPricing.close.latest 
    
    universe = Q500US()
    
    return Pipeline(
        columns = {
            'vanilla': returns,
            'demeaned': demeaned_returns,
            'adj_close': adj_close, 
            'return_mean': returns-demeaned_returns,
        },
        screen = universe
        
    )

results0 = run_pipeline(demean_example(), '2014', '2014-03-01')
results0.head()

In [None]:
# demean のいいところは、中心が0付近になること
fig, ax = plt.subplots(2,2)
ax[0,0].hist(results0.loc["2014-01-02"]["demeaned"], bins=100)
ax[0,1].hist(results0.loc["2014-02-03"]["demeaned"], bins=100)
ax[1,0].hist(results0.loc["2014-02-21"]["demeaned"], bins=100)
ax[1,1].hist(results0.loc["2014-01-21"]["demeaned"], bins=100)


### zscore 

`demean()` は、同日の他の銘柄の平均と自分を比較したが

`zscore()` は、それを「分散で割る」という計算をいれます。

よって、`zscore()` / `demean()`  はその日の標準偏差が出る



In [None]:
def zscore_example():
    returns = Returns(window_length=30)
    demeaned_returns = returns.demean()
    zscore_returns = returns.zscore()
    universe = Q500US()
    
    return Pipeline(
        columns = {
            'vanilla':returns, 
            'zscored': zscore_returns,
            'sd':zscore_returns / demeaned_returns, # 今日の標準偏差がでる

        },
        screen = universe & returns.notnull()
    )

results1 = run_pipeline(zscore_example(), '2014', '2014-03-01')
results1.head()
    

In [None]:
fig, ax = plt.subplots(2,2)
ax[0,0].hist(results1.loc["2014-01-02"]["zscored"], bins=100)
ax[0,1].hist(results1.loc["2014-02-03"]["zscored"], bins=100)
ax[1,0].hist(results1.loc["2014-02-21"]["zscored"], bins=100)
ax[1,1].hist(results1.loc["2014-01-21"]["zscored"], bins=100)


### mask 

`zscore()`, `demean()` どちらも `mask=` オプションを持つ。Filterとして使う。





In [None]:

from quantopian.pipeline.factors import AverageDollarVolume
from quantopian.pipeline.filters.morningstar import IsPrimaryShare

def masked_zscore_returns_example():
    returns = Returns(window_length=30)
    
    is_liquid = AverageDollarVolume(window_length=30,).percentile_between(25,100)
    is_primary = IsPrimaryShare()
    no_returns_outliers = returns.percentile_between(2,98)
    base_universe = is_liquid & is_primary & no_returns_outliers & Q500US()
    
    masked_zscore = returns.zscore(mask=base_universe)
    
    return Pipeline(
        columns = {
            'masked_zscore':masked_zscore, 
            'returns':returns,
        },
        screen=masked_zscore.notnull()
    )
    
results2 = run_pipeline(masked_zscore_returns_example(), '2014', '2014-03-01')
results2.head()    

In [None]:
results2.describe()

In [None]:
def my_bollinger_band():
    universe = QTradableStocksUS()
    
    bband = BollingerBands(inputs=[USEquityPricing.close],   window_length=20, k=2)
    bband_upper = bband.upper
    bband_lower = bband.lower
    
    adj_close = USEquityPricing.close.latest 
    
    # bband_upperとadj_closeの密着度
    ratio_upper_and_close = bband_upper / adj_close
    ratio_lower_and_close = bband_lower / adj_close
    
    zscored_ratio_upper_and_close = ratio_upper_and_close.zscore()
    zscored_ratio_lower_and_close = ratio_lower_and_close.zscore()
    
    pipe = Pipeline()
    pipe.add(adj_close, 'adj_close')
    pipe.add(ratio_upper_and_close, 'ratio_upper_and_close')
    pipe.add(ratio_lower_and_close, 'ratio_lower_and_close')
    pipe.add(zscored_ratio_upper_and_close, 'zscored_ratio_upper_and_close')
    pipe.add(zscored_ratio_lower_and_close, 'zscored_ratio_lower_and_close')
    
    screen = universe & zscored_ratio_upper_and_close.notnull() & zscored_ratio_lower_and_close.notnull()
    pipe.set_screen(screen)
    
    return pipe 
  
results3 = run_pipeline(my_bollinger_band(), '2014', '2018-11-01')
results3.head()      
    

In [None]:
df = results3.xs(symbols('V'), level=1) 
df["zscored_ratio_lower_and_close"].hist(bins=100)
df["zscored_ratio_upper_and_close"].hist(bins=100)


In [None]:
class Zscore(CustomFactor):
    """
    """
    inputs = [USEquityPricing.close,]
    window_length = 20
    
    def compute(self, today, assets, out, close):
        out[:] = stats.zscore(close, axis=0)[-1] 
        
        

def my_sma():
    
    base_universe= QTradableStocksUS()
    
    sma = SimpleMovingAverage(inputs=[USEquityPricing.close], window_length=20)
    zscore = Zscore()
    adj_close = USEquityPricing.close.latest 
    
    
    ratio = adj_close / sma 
    zscored_ratio = ratio.zscore(mask=base_universe)
    
    return Pipeline(
        columns = {
            'zscore': zscore,
            'zscored_ratio':zscored_ratio, 
            'adj_close':adj_close,
            'sma':sma,
        },
        screen=zscored_ratio.notnull()
    )

    
results4 = run_pipeline(my_sma(), '2014', '2018-11-01')
results4.head()         
    

In [None]:
results4["zscore"].hist(bins=100)

In [None]:
df = results4.xs(symbols('KO'), level=1) 
df["over2"] = df["zscore"] > 2
df


In [None]:
df[["adj_close", "over2"]].plot(secondary_y = "over2")

In [None]:
import alphalens as al


In [None]:
MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}
