In [None]:

from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import SimpleMovingAverage,RSI
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline,CustomFilter
from quantopian.pipeline.factors import CustomFactor,RSI
from quantopian.pipeline.data import morningstar
from zipline import TradingAlgorithm  
from quantopian.pipeline.filters import Q1500US, Q500US
from quantopian.pipeline.factors import AverageDollarVolume
from quantopian.pipeline.classifiers.morningstar import Sector, SuperSector
#
#from quantopian.pipeline.data.alpha_vertex import precog_top_500 as precog
#
import numpy as np
import talib 
import pandas as pd 
import matplotlib.pyplot as plt 
import matplotlib.cm as cm


###過去データを作成

```python
start_date='2017-5-15'
end_date='2017-6-1'
pipeline_results = build_pipeline(start_date, end_date)
```

を実行して得られるpipeline_resultsは，日付とsid（銘柄id）の2つをマルチインデックスとして持つDataFrame.

make_pipelineの中で指定されている ```columns=``` をコラムとして持つ


In [None]:

class PrevClose(CustomFactor):
    inputs = [USEquityPricing.close]
    window_length = 2
    def compute(self, today, assets, out, close):
        out[:] = close[-1]

class PrevVolume(CustomFactor):
    inputs = [USEquityPricing.volume]
    window_length = 2
    def compute(self, today, assets, out, close):
        out[:] = close[-1]

class FinancialFactor(CustomFactor):
    window_length = 2
    def compute(self, today, assets, out, v): 
        out[:] = v[0]
        
class MarketCap(FinancialFactor):
    inputs = [morningstar.valuation.market_cap]
    def compute(self, today, assets, out, v): 
        out[:] = np.log(v[0])
    
class ROA(FinancialFactor):
    inputs = [morningstar.operation_ratios.roa]
     
class ROE(FinancialFactor):
    inputs = [morningstar.operation_ratios.roe]

class NormalizedBasicEps(FinancialFactor):
    inputs = [morningstar.earnings_report.normalized_basic_eps]

class NetIncomeGrowth(FinancialFactor):
    inputs = [morningstar.operation_ratios.net_income_growth]

class PE(FinancialFactor):
    inputs = [morningstar.valuation_ratios.pe_ratio]

class BookValueYield(FinancialFactor):
    inputs = [morningstar.valuation_ratios.book_value_yield]

class DividendYield(FinancialFactor):
    inputs = [morningstar.valuation_ratios.dividend_yield]

# class ShortName(FinancialFactor):
#     inputs = [morningstar.company_reference.short_name]
#     def compute(self, today, assets, out, v): 
#         out[:] = np.array(v[0])

class PeriodEndingDate(FinancialFactor):
    inputs = [morningstar.financial_statement_filing.period_ending_date]
    
    
def make_pipeline(pred_quality_thresh=0.65):
    #
    base_universe = Q500US() 
    #
    yesterday_close = PrevClose()
    yesterday_volume = PrevVolume()
    dollar_volume = AverageDollarVolume(window_length=30)
    #ToDo この範囲を色々変えてみる．
    high_dollar_volume = dollar_volume.percentile_between(98, 100)
    sector = Sector()
    rsi = RSI(inputs=[USEquityPricing.close])

    columns = {
        'yesterday_close': yesterday_close,
        'yesterday_volume': yesterday_volume,
        'yesterday_turnover': yesterday_close * yesterday_volume,
        'dollar_volume': dollar_volume,
        'high_dollar_volume': high_dollar_volume,
        'sector': sector,
        'rsi': rsi,
        'market_cap': MarketCap(),
        'roa': ROA(),
        'roe': ROE(),
        'normalized_basic_eps': NormalizedBasicEps(),
        'net_income_growth': NetIncomeGrowth(),
        'pe': PE(),
        'book_value_yield': BookValueYield(),
        'dividend_yield': DividendYield(),
        #'short_name': ShortName(),
        'period_ending_date': PeriodEndingDate(),
    }
    screen = base_universe & high_dollar_volume
    #
    pipe = Pipeline(
        columns = columns,
        screen = screen
    )
    return pipe

def build_pipeline(start_date, end_date):
    pipeline_results = run_pipeline(make_pipeline(), start_date=start_date, end_date=end_date)
    return pipeline_results
        

### 当日の一分足データを作成

#### get_prices の返り値

```
<class 'pandas.core.panel.Panel'>
Dimensions: 7 (items) x 390 (major_axis) x 112 (minor_axis)
Items axis: open_price to turnover
Major_axis axis: 2017-05-15 13:31:00+00:00 to 2017-05-15 20:00:00+00:00
Minor_axis axis: Equity(24 [AAPL]) to Equity(49242 [PYPL])
```

#### calc_gap の 返り値

ID|book_value_yield|dividend_yield|gap|latest_turnover|market_cap|net_income_growth|normalized_basic_eps|pe|roa|roe|rsi|sector|turnover_ratio
---|---|---|---|---|---|---|---|---|---|---|---|---|---
Equity(24 [AAPL])|0.167|0.0152|-0.0041|7627489.44|27.411211|0.048783|2.11|17.9429|0.033136|0.082778|85.578218|311|0.00174
Equity(62 [ABT])|0.4121|0.0239|0.005119|321991.575|25.055432|0.325949|0.22|49.2809|0.006783|0.016146|46.619217|206|0.001503


#### get_minutewise の返り値

⇑日付をキーに，get_pricesとcalc_gapを値として格納した辞書2つをタプルとして返す


In [None]:
def get_prices(pipeline_results, date):
    """
    date当日の1分データを返す．
    pipeline_results: build_pipelineの返り値
    date: 日付データ．
    
    """
    # 各dateでpipelineでフィルターされた sids の一分足を取得する
    sids = pipeline_results.ix[date].index
    pan_today_1m = get_pricing(sids, start_date=date, end_date=date, frequency='minute')
    pan_today_1m['turnover'] = pan_today_1m.price * pan_today_1m.volume
    return pan_today_1m


def calc_gap(df_pipeline_results_prevday, pan_today_1m, observe_timing=1):
    """
    df_pipeline_results_prevday: pipeline_results.ix[date]で得られるDataFrame
    pan_today_1m: get_pricesの返り値
    observe_timing: [optional] マーケットオープン後，何分経ってからGapを観測するか指定する．デフォルトは1（分後，つまり09：31）
    
    Return: df_eligibles．Index は 銘柄id(sid) 
    """
    
    s_latest_price = pan_today_1m['price', observe_timing, :] #pan_today_1m.price.ix[observe_timing]
    s_latest_turnover = pan_today_1m['turnover', observe_timing, :] #pan_today_1m.turnover.ix[observe_timing]

    s_turnover = df_pipeline_results_prevday.yesterday_turnover
    s_prev_close = df_pipeline_results_prevday.yesterday_close

    df_eligibles = pd.DataFrame({
        'gap': s_latest_price / s_prev_close - 1.0,
        'turnover_ratio': s_latest_turnover/s_turnover,
        'rsi': df_pipeline_results_prevday.rsi,
        'sector': df_pipeline_results_prevday.sector,
        'latest_turnover':s_latest_turnover,
            #
        'market_cap': df_pipeline_results_prevday.market_cap,
        'roa': df_pipeline_results_prevday.roa,
        'roe': df_pipeline_results_prevday.roe,
        'normalized_basic_eps': df_pipeline_results_prevday.normalized_basic_eps,
        'net_income_growth': df_pipeline_results_prevday.net_income_growth,
        'pe': df_pipeline_results_prevday.pe,
        'book_value_yield': df_pipeline_results_prevday.book_value_yield,
        'dividend_yield': df_pipeline_results_prevday.dividend_yield,
        #'short_name': ShortName(),
        #'period_ending_date': df_pipeline_results_prevday.PeriodEndingDate(),        
    })
    return df_eligibles


def get_minutewise(pipeline_results):
    """
    get_pricesとcalc_gapを使って，各日付の一分足データとその分足データを使って，インディケータDataFrameを作成
    
    """
    dict_daily = dict()
    dict_pan_today_1m = dict()
    
    # pipeline_resultsから日付データを取り出し
    dates = pipeline_results.index.get_level_values(0).unique()
    observe_timing = 40
    for date in dates:
        print date.strftime("%Y-%m-%d"),
        df_pipeline_results_prevday = pipeline_results.ix[date]
        pan_today_1m = get_prices(pipeline_results, date)
        df_eligibles = calc_gap(df_pipeline_results_prevday, pan_today_1m, observe_timing,)
        dict_daily[date] = df_eligibles
        dict_pan_today_1m[date] = pan_today_1m
    return dict_daily, dict_pan_today_1m



### GAP UP 銘柄のみを探す

In [None]:
def get_daily_gapups(dict_daily, 
               gapup_min_turnover_ratio,
               gapup_max_turnover_ratio,
               gapup_min_gap,
               gapup_max_gap):
    """
    get_minutewiseの返り値，dict_dailyから，gapup銘柄のみをフィルターにかけて返す．
    
    Return: 各日付がキー，gapup銘柄のみのDataFrameを値に持つ辞書
    """
    
    dict_daily_gapups = dict()
    dates = dict_daily.keys()
    for date in dates:
        df_eligibles = dict_daily[date]
        df_gapups =  df_eligibles[(df_eligibles.turnover_ratio > gapup_min_turnover_ratio)
                                  & (df_eligibles.turnover_ratio < gapup_max_turnover_ratio)
                                  & (df_eligibles.gap > gapup_min_gap )
                                  & (df_eligibles.gap < gapup_max_gap )
                                 ].sort_values(by=['gap'], ascending=[False])
        dict_daily_gapups[date] = df_gapups
    return dict_daily_gapups



#### その他

spy の分足データを取得

In [None]:
def get_spy(pipeline_results):
    dates = pipeline_results.index.get_level_values(0).unique()
    spy = get_pricing(symbols('spy'), start_date=dates[0], end_date=dates[-1], frequency='daily')
    spy['gap'] =  spy.open_price / spy.close_price.shift(1) - 1
    return spy

In [None]:

def get_performance_data(dict_daily_gaps,dict_pan_today_1m):
    """
    dict_daily_gapups：get_daily_gapups / get_daily_gapups の返り値(get_daily_gapupsはまだ作ってません）
    dict_pan_today_1m：get_minutewiseの返り値dict_pan_today_1m
    
    """

    #special_dates = dates#[("2014-1-1" < dates) & (dates < "2015-1-1") & (6 < dates.month) & ( dates.month <= 12)]
    dates = dict_daily_gaps.keys()
    l = list()
    for date in dates:
        r = dict_daily_gaps[date]
        df = dict_pan_today_1m[date]['price', :, r.index]
        if not df.empty:
            df = df.reset_index(drop=True)
            #df = df.pct_change().cumsum()
            df = (df.pct_change()+1.0).apply(np.log).cumsum()
            l.append(pd.DataFrame({
                'date':date,
                'sector': r.sector,
                'gap': r.gap,
                'spy': spy['gap'].ix[date],
                'spy_gap': spy['gap'].ix[date]-r.gap,
                'latest_turnover': r.latest_turnover.apply(np.log),
                'turnover_ratio': r.turnover_ratio,
                'market_cap': r.market_cap,
                'roa': r.roa,
                'roe': r.roe,
                'normalized_basic_eps': r.normalized_basic_eps,
                'net_income_growth': r.net_income_growth,
                'pe': r.pe,
                'book_value_yield': r.book_value_yield,
                'dividend_yield': r.dividend_yield,   
                '05m':df.ix[5],
                '10m':df.ix[10],
                '15m':df.ix[15],
                '20m':df.ix[20],
                '25m':df.ix[25],
                '30m':df.ix[30],
                '35m':df.ix[35],
                '40m':df.ix[40],
                '45m':df.ix[45],
                '50m':df.ix[50],
              }))
    return pd.concat(l)


In [None]:
start_date='2017-5-15'
end_date='2017-6-1'
# pipeline_results = build_pipeline(start_date, end_date)
# dict_daily, dict_pan_today_1m = get_minutewise(pipeline_results)

# gapup_min_turnover_ratio = 0.0
# gapup_max_turnover_ratio = 1.0
# gapup_min_gap = -1.0
# gapup_max_gap = 1.0
#  #
# dict_daily_gapups = get_daily_gapups(dict_daily, 
#                gapup_min_turnover_ratio,
#                gapup_max_turnover_ratio,
#                gapup_min_gap,
#                gapup_max_gap)

# spy = get_spy(pipeline_results)


x = get_performance_data(dict_daily_gapups,dict_pan_today_1m)

In [None]:
#plt.scatter(x['gap'],x['5m'], s=2, color='blue')
thresh = 0.005
#margin = 0.01
x = x[(x.spy < -thresh) | (x.spy > thresh) ]
x = x[(x.sector != 102) & (x.sector != 105) & (x.sector != 301)]
#x = x[(x.gap < x.spy-margin) | (x.gap > x.spy+margin) ]
fig = plt.figure()
#plt.xlim([-0.02, 0.0])
#plt.ylim([-0.05, 0.05])

Y = '20m'
sx = x['gap']-x['spy']*1.02
sy = x[Y]

sz = x['turnover_ratio'].apply(np.log)
#sz = x['latest_turnover']
#sz = x['sector']

plt.xlabel('gap-sp/gap')
plt.ylabel(Y)

im = plt.scatter(sx,
                 sy,
                 #s=5,
                 c=sz , ## 配色を決定する三番目のデータ
                 linewidths=0, alpha=1,
                 cmap=cm.coolwarm, # ここでカラーマップを指定
                 #vmin=0.94,
                 #vmax=0.003,
                )
fig.colorbar(im)

In [None]:
import pytz
tz_ny = pytz.timezone("US/Eastern")

pan_historical_data = get_pricing(symbols(["spy","fb", "aapl"]), start_date="2017-1-2", end_date="2017-1-6", frequency='minute')
pan_historical_data.major_axis = pan_historical_data.major_axis.tz_convert(tz_ny)

In [None]:
df_open = pan_historical_data['open_price'].fillna(method='ffill').at_time("09:32")
df_price = pan_historical_data['price'].fillna(method='ffill').at_time("16:00")

In [None]:
df_gap = df_open.reset_index(drop=True) / df_price.shift(1).reset_index(drop=True) - 1

In [None]:
pan_historical_data['price'].at_time("16:00") / pan_historical_data['price'].at_time("15:00") - 1 