+ 2017/07/27　Updated
+ pipeline は，アジャストされていないの，フィルタリングにだけ使うようにUpdate


In [None]:
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.factors import SimpleMovingAverage
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline,CustomFilter
from quantopian.pipeline.factors import CustomFactor,RSI
from quantopian.pipeline.data import morningstar
from zipline import TradingAlgorithm  
from quantopian.pipeline.filters import Q1500US, Q500US
from quantopian.pipeline.factors import AverageDollarVolume
import numpy as np
import talib 
import pandas as pd 
import matplotlib.pyplot as plt 
import matplotlib.cm as cm

from quantopian.pipeline.filters.morningstar import IsPrimaryShare
from quantopian.pipeline.classifiers.morningstar import Sector, SuperSector


# class PrevClose(CustomFactor):
#     inputs = [USEquityPricing.close]
#     window_length = 2
#     def compute(self, today, assets, out, close):
#         out[:] = close[-2]
        
# class PrevVolume(CustomFactor):
#     inputs = [USEquityPricing.volume]
#     window_length = 2
#     def compute(self, today, assets, out, close):
#         out[:] = close[-2]
        
def make_pipeline():
    base_universe = Q500US() if False else Q1500US()
#     yesterday_close = PrevClose()
#     yesterday_volume = PrevVolume()
    dollar_volume = AverageDollarVolume(window_length=30)
    #ToDo この範囲を色々変えてみる．

    primary_share = IsPrimaryShare()
    # Equities listed as common stock (as opposed to, say, preferred stock).
    # 'ST00000001' indicates common stock.
    common_stock = morningstar.share_class_reference.security_type.latest.eq('ST00000001')
    # Non-depositary receipts. Recall that the ~ operator inverts filters,
    # turning Trues into Falses and vice versa
    not_depositary = ~morningstar.share_class_reference.is_depositary_receipt.latest
    # Equities not trading over-the-counter.
    not_otc = ~morningstar.share_class_reference.exchange_id.latest.startswith('OTC')
    # Not when-issued equities.
    not_wi = ~morningstar.share_class_reference.symbol.latest.endswith('.WI')
    # Equities without LP in their name, .matches does a match using a regular expression
    not_lp_name = ~morningstar.company_reference.standard_name.latest.matches('.* L[. ]?P.?$')
    # Equities with a null value in the limited_partnership Morningstar fundamental field.
    not_lp_balance_sheet = morningstar.balance_sheet.limited_partnership.latest.isnull()
    # Equities whose most recent Morningstar market cap is not null have fundamental data and therefore are not ETFs.
    have_market_cap = morningstar.valuation.market_cap.latest.notnull()
    
    is_cyclical = SuperSector().eq(SuperSector.CYCLICAL)
    is_defensive = SuperSector().eq(SuperSector.DEFENSIVE)
    is_sensitive = SuperSector().eq(SuperSector.SENSITIVE)    
    high_dollar_volume = dollar_volume.percentile_between(98, 100)
    
    tradeable_stocks = (
        primary_share
        &common_stock
        &not_depositary
        &not_otc
        &not_wi
        &not_lp_name
        &not_lp_balance_sheet
        &have_market_cap
        &(is_cyclical | is_defensive | is_sensitive))   

    pipe = Pipeline(
        columns = {
            'dollar_volume': dollar_volume,
            'high_dollar_volume': high_dollar_volume, 
        },
        screen = base_universe & high_dollar_volume #& rsi_under_60
    )
    return pipe

def my_get_pricing(results, start, end=None):

    if not end:
        end = start
    # 銘柄はendの日付で取得する．        
    sids = results.ix[end].index
    pan = get_pricing(sids, start_date=start, end_date=end, frequency='minute') 
    pan['turnover'] = pan.price * pan.volume
    return pan 

def find_gapup(results, pan, date, turnover_threshold=0.05, gapup_threshold = 0.0):
    #print pan

    df = pd.DataFrame({'gap': (pan.price.ix[0] / results.ix[date].yesterday_close - 1),
                       'turnover': (pan.turnover.ix[0] / results.ix[date].yesterday_turnover)})
    df = df.sort_values(by=['gap','turnover'], ascending=[True,True])
    df_gapups = df[(df.turnover > turnover_threshold) & (df.gap > gapup_threshold)]
    return df_gapups

def get_gapup_data(pan, df_gapups):
    # ⇓⇓⇓⇓
    top_gapup_sids = df_gapups.tail(5).index 
#     if top_gapup_sids.any(): print top_gapup_sids
    pan_sids = pan[:,:,top_gapup_sids]
    df_gapup_sids_data_for_an_hour = (pan_sids.price.ix[0] / pan_sids.price -1).ix[:60].reset_index(drop=True)
    return df_gapup_sids_data_for_an_hour

def get_gapup_data2(pan, df_gapups):

    top_gapup_sids = df_gapups.index 
    pan = pan[:,:,top_gapup_sids]
    
    prevdate = pan.price.ix[:390]
    today = pan.price.ix[390:]

    dscrb = prevdate.fillna(method='ffill').fillna(method='backfill').pct_change().dropna().describe()
    top_gapup_sids = dscrb.loc[:, dscrb.loc['mean'] > 0].T.index
    
    if top_gapup_sids.any():
        print sorted([s.symbol for s in top_gapup_sids]), 
        pan_sids = today[top_gapup_sids]
        # short 
        df_gapup_sids_data_for_an_hour = (pan_sids.ix[0] / pan_sids -1).ix[:60].reset_index(drop=True)
        return df_gapup_sids_data_for_an_hour
    else:
        return None 

In [None]:
results = run_pipeline(make_pipeline(), start_date='2017-6-1', end_date='2017-7-20')
dates = results.index.get_level_values(0).unique()

print dates
l = list()
m = 0
for i, date in enumerate(dates):
    if date.strftime("%m") != m:
        print date 
        m = date.strftime("%m")
    if i > 1:
        sids = results.ix[date].index
        target_date = get_pricing(sids, start_date = date, end_date = date, fields = ["price", "volume", "close_price", "open_price"])
        day_before_yesterday = get_pricing(sids, start_date = dates[i-2], end_date = dates[i-2],fields = ["close_price", "volume", "price"])
        
        target_date['turnover'] = target_date["price"] * target_date["volume"]
        day_before_yesterday['turnover'] = day_before_yesterday["close_price"] * day_before_yesterday["volume"]
        
        target_date_minute_data = get_pricing(sids, start_date = date, end_date = date, fields = "price", frequency='minute')
        target_date_minute_data.index = target_date_minute_data.index.tz_convert("US/Eastern")
        
        df = pd.DataFrame({
            'gap': target_date['price'].iloc[0] / day_before_yesterday['close_price'].iloc[0] -1, 
            'gap2': target_date['price'].iloc[0] / day_before_yesterday['price'].iloc[0] -1, 
            'gap3': target_date['open_price'].iloc[0] / day_before_yesterday['price'].iloc[0] -1,             
            'turnover_ratio': target_date['turnover'].iloc[0] / day_before_yesterday['turnover'].iloc[0],
            # UTC
            'return_between_0930_1010': 
            target_date_minute_data.at_time("10:10").iloc[0]/ target_date_minute_data.at_time("09:31").iloc[0]- 1,
            'today':date,
            'day_before_yesterday': dates[i-2],
            'close_day_before_yesterday': day_before_yesterday["close_price"].iloc[0],
            'entry_price':  target_date_minute_data.at_time("09:31").iloc[0],
            'exit_price': target_date_minute_data.at_time("10:10").iloc[0],
            
        })
        
        l.append(df)
        

In [None]:
x = pd.concat(l)
# x = x[
#     (x['gap'] > 0.10) 
#       & (x['gap'] < 1.0)
#       & (x['turnover_ratio'] > 0)
#       & (x['turnover_ratio'] < 0.6)
#       ]
fig = plt.figure()
im = plt.scatter(x.gap, 
                 x.return_between_0930_1010, 
                 c=x.turnover_ratio, ## 配色を決定する三番目のデータ
                 linewidths=0, alpha=1, 
                 cmap=cm.hot # ここでカラーマップを指定
                 )
fig.colorbar(im)

            

In [None]:
x[(x.today == "2017-07-06")].ix[symbols("AMD")]


In [None]:
amd = get_pricing(symbols("AMD"), start_date='2017-6-1', end_date='2017-7-20', frequency='minute', fields='price')
amd.index = amd.index.tz_convert("US/Eastern")
def at_two_times(df, time1, time2, time3):
    idx = np.sort(np.concatenate((df.index.indexer_at_time(time1) , 
                                  df.index.indexer_at_time(time2), 
                                  df.index.indexer_at_time(time3))))
    return df.ix[idx]

In [None]:
at_two_times(amd, "09:31", "10:11", "16:00").ix["2017-6-15":]


In [None]:
#results = run_pipeline(make_pipeline(), start_date='2016-1-1', end_date='2017-1-10')

dates = results.index.get_level_values(0).unique()

l = list()
for i, date in enumerate(dates):
    if i > 0: 
        print date.strftime("%Y-%m-%d"),
        df = results.ix[date]
        pan = my_get_pricing(results, dates[i-1], date)
        prevday = pan[:,:390,:]
        today   = pan[:,390:,:]
        prev_dscr=prevday.price.fillna(method='ffill').fillna(method='backfill').pct_change().dropna().describe().T
        prev_dscr = prev_dscr.rename(columns=dict([(c, "yesterday_"+c) for c in prev_dscr.columns]))
        today_dscr = today.price.ix[:60].fillna(method='ffill').fillna(method='backfill').pct_change().dropna().describe().T
        today_dscr = today_dscr.rename(columns=dict([(c, "today_"+c) for c in today_dscr.columns]))
        gap = pd.DataFrame({"gap":today.price.ix[0] / prevday.price.ix[-1] -1})
        turnover = pd.DataFrame({"turnover_impact":today.turnover.ix[0] / df.yesterday_turnover })
        df_concat = pd.concat([df, prev_dscr, today_dscr,gap,turnover], axis=1)        
        l.append(df_concat)


In [None]:
[c for c in l[0].columns]



In [None]:
df = pd.concat(l)
import matplotlib.cm as cm
fig = plt.figure()

x = df[(df.yesterday_turnover > df.yesterday_turnover.dropna().quantile(0.98) )
#        & (df.yesterday_mean > 0.001)
      ]
#x.plot.scatter(x = "gap", y = "today_mean", ylim=[-0.003, 0.003])#xlim= [0,0.0025], ylim=[-0.002, 0.002]

im = plt.scatter(x.gap, 
 x['today_mean'],
 c=x['yesterday_std'], ## 配色を決定する三番目のデータ
 linewidths=0, alpha=1, 
 cmap=cm.seismic,
 
 )
# plt.ylim(-0.002, 0.003)
fig.colorbar(im)



In [None]:
results = run_pipeline(make_pipeline(), start_date='2015-1-1', end_date='2016-1-10')
dates = results.index.get_level_values(0).unique()
turnover_thresholds = [0]#range(0,6)
gapup_thresholds = [0]#range(0,6)
xys = [(t,g) for t in turnover_thresholds for g in gapup_thresholds]

l = list()
d = dict()
for i, date in enumerate(dates): 
    if i > 0:
        print date.strftime("%Y-%m-%d"),
        pan = my_get_pricing(results, dates[i-1], date)
        for t, g in xys:
            df_gapups = find_gapup(results, pan[:,:390,:], date, t/100.0, g/100.0)
            df_gapup_sids_data_for_an_hour = get_gapup_data2(pan, df_gapups)
            if df_gapup_sids_data_for_an_hour is not None:
                if not df_gapup_sids_data_for_an_hour.empty :
                    k = "%s_%s" % (t/100.0, g/100.0)
                    print len(df_gapup_sids_data_for_an_hour.columns)
                    if k not in d.keys():
                        d[k] = df_gapup_sids_data_for_an_hour
                    else:
                        d[k] = pd.concat([d[k],df_gapup_sids_data_for_an_hour], axis=1)
                #l.append(("%s_%s" % (t/100.0, g/-100.0), df_gapup_sids_data_for_an_hour))



In [None]:
pd.concat([prev_dscr, df.T])

In [None]:
results = run_pipeline(make_pipeline(), start_date='2015-1-1', end_date='2016-1-7')
dates = results.index.get_level_values(0).unique()
turnover_thresholds = [0] #range(0,6)
gapup_thresholds = [0] #range(0,6)
xys = [(t,g) for t in turnover_thresholds for g in gapup_thresholds]

l = list()
d = dict()
for date in dates: 
    print date.strftime("%Y-%m-%d"),
    pan = my_get_pricing(results, date)
    for t, g in xys:
        df_gapups = find_gapup(results, pan, date, t/100.0, g/100.0)
        df_gapup_sids_data_for_an_hour = get_gapup_data(pan, df_gapups)
        if not df_gapup_sids_data_for_an_hour.empty:
            k = "%s_%s" % (t/100.0, g/100.0)
            if k not in d.keys():
                d[k] = df_gapup_sids_data_for_an_hour
            else:
                d[k] = pd.concat([d[k],df_gapup_sids_data_for_an_hour], axis=1)
        #l.append(("%s_%s" % (t/100.0, g/-100.0), df_gapup_sids_data_for_an_hour))


In [None]:
d['0.0_0.0'].T.describe().ix['25%'].plot()
        

In [None]:
dscrb = prevdate.fillna(method='ffill').fillna(method='backfill').pct_change().dropna().describe()
dscrb.loc[:, dscrb.loc['mean'] > 0].T.index
