In [None]:
import numpy as np
import pandas as pd
from scipy import stats

import seaborn as sns
import matplotlib.pyplot as plt

from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import CustomFactor, Returns,SimpleMovingAverage
from quantopian.pipeline.classifiers.fundamentals import Sector  
from quantopian.pipeline.data import Fundamentals

from quantopian.pipeline.data.psychsignal import stocktwits

from quantopian.pipeline.filters import QTradableStocksUS
from time import time

import alphalens as al 

MORNINGSTAR_SECTOR_CODES = {
     -1: 'Misc',
    101: 'Basic Materials',
    102: 'Consumer Cyclical',
    103: 'Financial Services',
    104: 'Real Estate',
    205: 'Consumer Defensive',
    206: 'Healthcare',
    207: 'Utilities',
    308: 'Communication Services',
    309: 'Energy',
    310: 'Industrials',
    311: 'Technology' ,    
}


In [None]:
def mypipe():
    ebit = Fundamentals.ebit.latest
    ev = Fundamentals.enterprise_value.latest
    value =  ebit / ev
    quality = Fundamentals.roe.latest
    sentiment_score = SimpleMovingAverage(inputs=[stocktwits.bull_minus_bear],
                                          window_length=3,)
    universe = QTradableStocksUS()
    setctor = Sector()
    
    pipe = Pipeline(
        columns = {
            'ev':ev,
            'ebit': ebit, 
            'value':value,
            'quality':quality,
            'sentiment_score':sentiment_score,
            'setctor':setctor,
        },
        screen = universe
    
    )
    return pipe

# bull_minus_bear: subtracts the bearish intesity from the bullish intensity [BULL - BEAR] to rpovide an immediate net score.    

In [None]:
start_timer = time()
start = '2016-11-01'
end = '2018-11-20'
results = run_pipeline(mypipe(), start, end )
end_timer = time()
results.fillna(value=0);
print "Time to run pipeline %.2f secs" % (end_timer - start_timer)


In [None]:
results.head()

In [None]:
## 全期間で株式データを取得する。
asset_list = results.index.levels[1]
len(asset_list)

In [None]:
prices = get_pricing(asset_list, start_date=start, end_date=end, fields='close_price')
prices.head()

In [None]:
## alphalens のテストに使う収益率の日数。
periods = (1,5,10)
## factor をアルファレンズにかける。
## 
myfactor = results["value"]
factor_data = al.utils.get_clean_factor_and_forward_returns(factor=myfactor,
                                                           prices=prices,
                                                           groupby=results["setctor"],
                                                           groupby_labels=MORNINGSTAR_SECTOR_CODES,
                                                           periods=periods,
                                                           quantiles=10)

end_timer = time()
results.fillna(value=0);
print "Time to run pipeline %.2f secs" % (end_timer - start_timer)



## Algorithmでフィルターがかかっている箇所

```python
# We winsorize our factor values in order to lessen the impact of outliers
# For more information on winsorization, please see
# https://en.wikipedia.org/wiki/Winsorizing
value_winsorized = value.winsorize(min_percentile=0.05, max_percentile=0.95)
quality_winsorized = quality.winsorize(min_percentile=0.05, max_percentile=0.95)
sentiment_score_winsorized = sentiment_score.winsorize(min_percentile=0.05,                                                                             max_percentile=0.95)

# Here we combine our winsorized factors, z-scoring them to equalize their influence
combined_factor = (
    value_winsorized.zscore() + 
    quality_winsorized.zscore() + 
    sentiment_score_winsorized.zscore()
)

# Build Filters representing the top and bottom baskets of stocks by our
# combined ranking system. We'll use these as our tradeable universe each
# day.
longs = combined_factor.top(TOTAL_POSITIONS//2, mask=universe)
shorts = combined_factor.bottom(TOTAL_POSITIONS//2, mask=universe)
```

+ `value` のwinsorizeで、

In [None]:
factor_data.head()

In [None]:
al.tears.create_returns_tear_sheet(factor_data, by_group=True)

In [None]:
myfactor = results["sentiment_score"]
factor_data = al.utils.get_clean_factor_and_forward_returns(factor=myfactor,
                                                           prices=prices,
                                                           groupby=results["setctor"],
                                                           groupby_labels=MORNINGSTAR_SECTOR_CODES,
                                                           periods=periods,
                                                           quantiles=10)

end_timer = time()
results.fillna(value=0);
print "Time to run pipeline %.2f secs" % (end_timer - start_timer)
al.tears.create_returns_tear_sheet(factor_data, by_group=True)