In [None]:
# Import necessary Pipeline modules
from quantopian.pipeline import Pipeline, CustomFactor
from quantopian.research import run_pipeline
from quantopian.pipeline.filters.morningstar import Q500US
from quantopian.pipeline.factors import AverageDollarVolume, Latest,Returns
from quantopian.pipeline.factors.morningstar import MarketCap
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.pipeline.classifiers.morningstar import Sector
from quantopian.pipeline.data import morningstar
from quantopian.interactive.data.eventvestor import dividends_free  as dataset
#
from zipline.utils.tradingcalendar import trading_day  
import numpy as np
import pandas as pd 
from odo import odo
import matplotlib.pyplot as plt
import matplotlib.cm as cm

sector_mappings = {  
   101.0: "Basic Materials",  
   102.0: "Consumer Cyclical",  
   103.0: "Financial Services",  
   104.0: "Real Estate",  
   205.0: "Consumer Defensive",  
   206.0: "Healthcare",  
   207.0: "Utilites",  
   308.0: "Communication Services",  
   309.0: "Energy",  
   310.0: "Industrials",  
   311.0: "Technology"  
} 

In [None]:
# Leamanショック頃
start = "2010-05-07"#"2008-9-29"
end = "2010-08-07"#"2009-3-10"
#end = "2008-12-10"
period = 30
spy = get_pricing("spy", start_date=start, end_date=end, fields='price', frequency='daily')
research_start_date = spy.pct_change().sort_values().index[0].date()
print "SPYが一番下がった日： ", research_start_date
research_end_date = pd.date_range(research_start_date, periods=period, freq=trading_day)[-1].date()
print "調査期間：%s ~ %s" % (research_start_date, research_end_date)


In [None]:
class DividendYield(CustomFactor):  
    """  
    Computes (or rather fetches) dividend yield
    """  
    inputs = [morningstar.valuation_ratios.dividend_yield]
    window_length = 1
    
    def compute(self, today, assets, out, dividend_yield):  
        out[:] = dividend_yield 
        
pipe = Pipeline()
pipe.add(Returns(window_length=2), "Returns")
pipe.add(USEquityPricing.close.latest, "Latest")
pipe.add(MarketCap(), "MarketCap")
pipe.add(Sector(), "Sector")
pipe.add(morningstar.valuation_ratios.pe_ratio.latest , "PER")
pipe.add(morningstar.valuation_ratios.book_value_yield.latest, "PBR") # 本当にPBRかどうか怪しいので確認
pipe.add(DividendYield(), 'DividendYield')

pipe.set_screen(Q500US())        

In [None]:
result = run_pipeline(pipe, research_start_date, research_end_date)
returns = result.Returns

In [None]:
start_date_list = ["2008-01-17","2008-07-02", "2008-09-09", "2010-05-20", "2011-08-04", "2011-11-21", "2015-08-24", "2015-09-28", "2016-01-13"]
period = 30
research_dates = []

for start in start_date_list:
    research_start_date = pd.to_datetime(start).date()
    research_end_date = pd.date_range(research_start_date, periods=period, freq=trading_day)[-1].date()
    
    research_dates.append((research_start_date,research_end_date))
    



In [None]:
for start, end in research_dates:
    print start, end

In [None]:
l = list()
for start, end in research_dates:
    print (start, end)
    result = run_pipeline(pipe, start, end)
    #returns = result.Returns
    l.append(result)

In [None]:
l[0]["PBR"].reset_index().mean()
l


In [None]:
for returns in l:
    log_returns = returns.apply(lambda x: np.log(x+1))
    log_change_by_asset = log_returns.reset_index().groupby(['level_1']).sum().sort_values(by='Returns', ascending=False)
    change_by_asset = log_change_by_asset.apply(lambda x: np.exp(x)-1)
    change_by_asset.ix[:10].plot(kind='bar', legend=False)
    
    

In [None]:
returns = pd.concat(l)
log_returns = returns.apply(lambda x: np.log(x+1))
log_change_by_asset = log_returns.reset_index().groupby(['level_1']).sum().sort_values(by='Returns', ascending=False)
change_by_asset = log_change_by_asset.apply(lambda x: np.exp(x)-1)
change_by_asset.plot(kind='bar', legend=False)
print "調査期間でのSP500っぽい銘柄の銘柄別累積変化率"

In [None]:
print "調査期間内で変化率がポジティブにだったSP500銘柄数", 
len(change_by_asset[change_by_asset.Returns > 0])

In [None]:
return_by_sector = result[['Returns', 'Sector', 'MarketCap']]
return_by_sector.loc[:, 'LogReturn'] = return_by_sector.Returns.apply(lambda x: np.log(x+1))
return_by_sector.loc[:, 'CapChange'] = return_by_sector.Returns * return_by_sector.MarketCap
sum_returns_by_sector = return_by_sector.reset_index().groupby(['level_1']).mean().groupby(['Sector']).mean().rename(index= sector_mappings)
fig = plt.figure(figsize=(12, 6))
ax = fig.add_subplot(1,2,1)
bx = fig.add_subplot(1,2,2)
sum_returns_by_sector.sort_values(by='LogReturn', ascending=False)['LogReturn'].plot(kind='bar', ax=ax)
sum_returns_by_sector.sort_values(by='CapChange', ascending=False)['CapChange'].plot(kind='bar', ax=bx)
plt.title("by setcor")


In [None]:
return_by_sector[return_by_sector.Sector==207].reset_index().groupby("level_1").sum().sort_values(by="Returns")['Returns'].plot(kind='bar')
plt.title("Utilites setcors")

# for sid in returns_by_sector[returns_by_sector.Sector==207].reset_index().groupby("level_1").sum().index:
#     print sid, sid.security_name

In [None]:
r = result[['Returns', 'DividendYield', 'PER', 'PBR', 'MarketCap', 'Sector']]
r.loc[:, 'lnReturns'] = result.Returns.apply(lambda x: np.log(x+1))
r.loc[:, 'lnDividendYield'] = result.DividendYield.apply(lambda x: np.log(x+1))
r.loc[:, 'lnPER'] = result.PER.apply(lambda x: np.log(x+1))
r.loc[:, 'lnPBR'] = result.PBR.apply(lambda x: np.log(x+1))
r.loc[:, 'lnMarketCap'] = result.MarketCap.apply(lambda x: np.log(x+1))
r.loc[:, 'Robust'] = [int(v) in [207, 206, 308, 309] for v in result.Sector]
r.loc[:, 'Fragile'] = [int(v) in [103, 104, 311] for v in result.Sector]
r = r.reset_index().groupby(['level_1']).mean()

fig = plt.figure()
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.2, hspace=0.4)
for i, c in enumerate(['lnDividendYield','lnPER', 'lnPBR', 'lnMarketCap']):
    ax = fig.add_subplot(1,4,i+1)
    ax.scatter(r[c], r.lnReturns, alpha=0.2)
    ax.set_title(c)


In [None]:
from sklearn.ensemble import RandomForestRegressor
regr = RandomForestRegressor(max_depth=5, random_state=0)
df = r.copy().dropna()
df = df.iloc[np.random.permutation(len(df))]
X = df[['lnDividendYield', 'lnPER', 'lnPBR', 'lnMarketCap', 'Robust', 'Fragile']]
y = df['lnReturns']
n = int(len(X)*0.6)
X_train = X[:n]
y_train = y[:n]
X_test = X[n:]
y_test = y[n:]
regr.fit(X_train, y_train)
plt.scatter(regr.predict(X_test), y_test)

In [None]:
df_test = pd.concat([X_test, 
                  pd.DataFrame(regr.predict(X_test), columns=['pred'], index=X_test.index), #
                  pd.DataFrame([[v] for v in y_test], columns=['actual'],index=X_test.index )], axis=1) #
df_test = df_test.sort_values(by='pred')

In [None]:
df_test

In [None]:
n = 20
fig = plt.figure()
ax = fig.add_subplot(1,1,1)
for e in df_test.head(n).index:
    ps = result.xs([e], level=[1])['Latest']
    ps = ps/ps[0]
    ps.plot(color='red', alpha=0.5)
for e in df_test.tail(n).index:
    ps = result.xs([e], level=[1])['Latest']
    ps = ps/ps[0]
    ps.plot(color='blue', alpha=0.5)

In [None]:
from quantopian.interactive.data.quandl import cboe_vix
from odo import odo
df = odo(cboe_vix, pd.DataFrame)
df['asof_date'] = pd.to_datetime(df['asof_date'])
df = df.set_index(['asof_date'])
df = df.sort_index()

In [None]:
df[df.vix_close > 40]

In [None]:
import talib
start=df.index[0]
end = df.index[-1]
spy = get_pricing('spy', start_date=start, end_date=end, frequency='daily', fields=['price', 'high'])
spy['MA10'] =talib.MA(spy.price, timeperiod=10)
spy['MA30'] = talib.MA(spy.price, timeperiod=30)
spy['diff_30'] = spy.price / spy.MA30 - 1 
spy['VIX'] = df.vix_close.values
spy['RollingMax90'] = pd.rolling_max(spy.high, 90)
spy['diff_rm90'] = spy.price / spy.RollingMax90 - 1 
spy['target'] = (spy.VIX > 25) & (spy.diff_rm90 < -0.10) 


In [None]:
spy.plot(x = 'diff_rm90', y = 'VIX', kind='scatter')

In [None]:
dates = spy[spy.target].index


In [None]:
dates[42] + pd.DateOffset(30) <dates[43]


In [None]:
end_date=pd.to_datetime(start) + pd.DateOffset(period), 

for i, date in enumerate(dates[1:]):
    if dates[i-1] + pd.DateOffset(30) < date  :
        print date
    

In [None]:
df.shift(1)