In [None]:
from quantopian.pipeline.data.builtin import USEquityPricing
from quantopian.research import run_pipeline
from quantopian.pipeline import Pipeline
from quantopian.pipeline.factors import DailyReturns,SimpleMovingAverage
from quantopian.pipeline.experimental import QTradableStocksUS

from collections import Counter
import pandas as pd
import matplotlib.pyplot as plt 
import matplotlib.cm as cm

def make_pipeline():
    # Quantopian 側で用意してくれている，トレーダブルな銘柄リストに入っている銘柄だけで検証します．詳しくは
    # https://www.quantopian.com/posts/working-on-our-best-universe-yet-qtradablestocksus
    universe = QTradableStocksUS()
    dayreturn = DailyReturns(inputs = [USEquityPricing.close])
    # 且つペニーストックは外します．ココを外さないと結果は違うかもしれません．
    sma30 = SimpleMovingAverage(inputs = [USEquityPricing.close],window_length=30)
    not_penny = sma30 > 10
    
    pipe = Pipeline()
    pipe.add(dayreturn, 'dayreturn')
    pipe.set_screen(universe & not_penny)
    return pipe



In [None]:
results = run_pipeline(make_pipeline(),start_date="2010-1-1", end_date="2018-2-22" )
#results["initial"] = [sym.symbol[0]  for sym in results.index.get_level_values(1)]
#results["count"] = [len(sym.symbol) for sym in results.index.get_level_values(1)]

#by_initial = results.groupby(by="initial")
#by_initial.median().plot(kind="bar")

# by_initial.median().loc[list("CSAPEMFBITD")].plot(kind="bar")
# by_count = results.groupby(by="count")
# by_count.median().plot(kind="bar")


In [None]:
results["initial"] = [sym.symbol[0]  for sym in results.index.get_level_values(1)]
results["count"] = [len(sym.symbol) for sym in results.index.get_level_values(1)]

In [None]:
syms = [sym.symbol for sym in results.index.get_level_values(1).unique()]
data = [sym[0] for sym in syms]

counter = Counter(data)
df_initials = pd.DataFrame(counter.most_common(), columns=["initial", "count"])
df_initials["pct"] = df_initials["count"] / df_initials["count"].sum()
df_initials.sort_values(by="pct", ascending=False)



In [None]:
results["initial"] = [sym.symbol[0]  for sym in results.index.get_level_values(1)]
results["count"] = [len(sym.symbol) for sym in results.index.get_level_values(1)]

by_initial = results.groupby(by="initial")
by_initial["dayreturn"].median().plot(kind="bar")

In [None]:
by_initial.median()[list("CASMPTBENR")].plot(kind="bar")

In [None]:
by_count = results.groupby(by="count")
by_count.median().plot(kind="bar")

In [None]:
syms = results.loc["2018-1-2"].index.get_level_values(1).unique()
syms = [sym.symbol for sym in syms]

In [None]:
from collections import Counter
data = [sym[0] for sym in syms]

counter = Counter(data)
df_initials = pd.DataFrame(counter.most_common(), columns=["initial", "count"])
df_initials["pct"] = df_initials["count"] / df_initials["count"].sum()
df_initials.sort_values(by="pct", ascending=False)

In [None]:
df_initials["count"].sum()

In [None]:
results[results["sector"] == 206]["five_days_return"].dropna().hist(bins=100)
results[results["sector"] == 311]["five_days_return"].dropna().hist(bins=100)

In [None]:
results["dayofweek"] = results.index.get_level_values(0).dayofweek


In [None]:
results[results['dayofweek'] == 2]["five_days_return"].dropna().hist(bins=500, color="#5F9BFF",)
results[results['dayofweek'] == 4]["five_days_return"].dropna().hist(bins=500, alpha=.5, color="#F8766D",)


In [None]:
results[(results['dayofweek'] == 2) & (results["sector"] == 311)]["five_days_return"].dropna().hist(bins=100, color="#5F9BFF",)
results[(results['dayofweek'] == 4) & (results["sector"] == 311)]["five_days_return"].dropna().hist(bins=100, alpha=.5, color="#F8766D",)


In [None]:
df = get_pricing(["UCO", "USO"], fields='price')
df.columns =map(lambda x: x.symbol, df.columns)

In [None]:
(df.UCO / df.USO ).plot()

In [None]:
df = get_pricing(["NUGT", "DUST"], fields='price', frequency='minute', start_date='2018-01-03', end_date='2018-10-20',)


In [None]:
#df.index = df.index.apply(lambda x: x.tz_localize("Japan").tz_convert('US/Eastern'))
def convert_to_ustime(df):
    df["date"] = df.index
    df["date"] = df["date"].apply(lambda x: x.tz_convert('US/Eastern'))
    df = df.reset_index()
    df = df.set_index("date")
    df = df.drop("index", axis=1)
    return df 


In [None]:
df_ustime = convert_to_ustime(df)


In [None]:
s = """
 BRZU
 CURE
 DFEN
 DPK
 DPST
 DRIP
 DRN
 DRV
 DUSL
 DUST
 DZK
 EDC
 EDZ
 ERX
 ERY
 EURL
 EUXL
 FAS
 FAZ
 GASL
 GASX
 GUSH
 INDL
 JDST
 JNUG
 JPNL
 KORU
 LABD
 LABU
 LBJ
 MEXX
 MIDU
 MIDZ
 NAIL
 NUGT
 PILL
 RETL
 RUSL
 RUSS
 SOXL
 SOXS
 SPXL
 SPXS
 TECL
 TECS
 TMF
 TMV
 TNA
 TPOR
 TYD
 TYO
 TZA
 UBOT
 UTSL
 WDRW
 YANG
 YINN
"""
direxion_3x = [ticker.strip() for ticker in s.split()]
df = get_pricing(direxion_3x, fields='price', frequency='minute', start_date='2017-10-03', end_date='2018-10-20',)

df = convert_to_ustime(df)
    

In [None]:
df_1600 = df.at_time("16:00")
df_1545 = df.at_time("15:45")
df_1550 = df.at_time("15:50")


In [None]:
df_return = df_1600.reset_index().drop("date", axis=1) / df_1550.reset_index().drop("date", axis=1) - 1 

In [None]:
df_overnight = df_1600.reset_index().drop("date", axis=1).shift(1) / df_1550.reset_index().drop("date", axis=1) - 1 

In [None]:
df_overnight.head()

In [None]:
df_return.head()

In [None]:
fig = plt.figure(figsize=(20,20))
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.2, hspace=0.4)
num = len(df.columns)
xs = 10
ys = int(num/10)+1
for i, c in enumerate(df.columns):
    x = i % xs
    y = int(i/xs)+ys
    ax = fig.add_subplot(xs, ys, i+1)
    ax.set_title(c.symbol)
    ax.scatter(df_overnight[symbols(c)], 
               df_return[symbols(c)], alpha=0.3)
    
    

In [None]:
df_vol = get_pricing(direxion_3x, fields='volume', frequency='minute', start_date='2017-10-03', end_date='2018-10-20',)
df_vol = convert_to_ustime(df_vol)
df_vol["minute"] = df_vol.index.strftime("%H:%M") 

In [None]:
df_vol.groupby("minute").sum()
q = df_vol.groupby("minute").quantile(0.5)

grouped = df_vol.groupby("minute").sum()

In [None]:
fig = plt.figure(figsize=(40,100))
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.2, hspace=0.4)
num = len(df.columns)
xs = 40
ys = int(num/40)+1

left=list(range(len(grouped)))



for i, c in enumerate(df.columns):
    x = i % xs
    y = int(i/xs)+ys
    ax = fig.add_subplot(xs, ys, i+1)
    ax.set_title(c.symbol)
    ax.bar(left=left, height=grouped[symbols(c)],)
    
    

In [None]:
#grouped = df_vol.groupby("minute").sum()
c = grouped.columns


In [None]:
grouped[c[28]].plot(kind="bar", title=c[28], color="red")

In [None]:
i = 28
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i = 9
grouped[c[i]].plot(kind="bar", title=c[i], color="red")

In [None]:
i = 9
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =11
grouped[c[i]].plot(kind="bar", title=c[i], color='red')


In [None]:
i = 11
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =12
grouped[c[i]].plot(kind="bar", title=c[i], color='red')


In [None]:
i = 12
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =13
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 13
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =14
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 14
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =17
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 17
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =18
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 18
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =21
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 21
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =24
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 24
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =27
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 27
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =34
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 34
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =37
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 37
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =39
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 39
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =40
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 40
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =41
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 41
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =42
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
01i = 42
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =43
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 43
q[c[i]][-30:].plot(kind="bar", title=c[i])

In [None]:
i =44
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 44
q[c[i]][-30:].plot(kind="bar", title=c[i])

In [None]:
i =45
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 45
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =46
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 46
q[c[i]][-70:].plot(kind="bar", title=c[i])

In [None]:
i =47
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 47
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =51
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 51
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =55
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 55
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =56
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 56
q[c[i]][-20:].plot(kind="bar", title=c[i])

In [None]:
i =40
grouped[c[i]].plot(kind="bar", title=c[i], color='red')

In [None]:
i = 39
q[c[i]][-60:].plot(kind="bar", title=c[i])

In [None]:
def make_date_index(s, name):
    dates = s.index.to_series().apply(lambda x: x.strftime("%Y%m%d"))
    df = pd.DataFrame({name:s,
                      "dates":dates})
    df = df.reset_index().drop("date", axis=1)
    df = df.set_index("dates")

    return df

def make_std(s, start="9:31", end="15:30"):
    l = list()
    for date, g in s.groupby(pd.TimeGrouper(freq='D')):
        std = g.between_time(start, end).pct_change().std()
        if std > 0:
            l.append((date.strftime("%Y%m%d"), std))
    df_std = pd.DataFrame(l).rename(columns={0:"date", 1:"std"}, ).set_index("date")
    return df_std



x = df[symbols("TECS")].copy()

x_0931 = x.at_time("09:31")
x_1541 = x.at_time("15:41")
x_1546 = x.at_time("15:46")
x_1549 = x.at_time("15:49")
x_1550 = x.at_time("15:50")
x_1551 = x.at_time("15:51")
x_1559 = x.at_time("15:59")
x_1600 = x.at_time("16:00")

x_0931 = make_date_index(x_0931, "0931")
x_1541 = make_date_index(x_1541, "1541")
x_1546 = make_date_index(x_1546, "1546")

x_1549 = make_date_index(x_1549, "1549")
x_1550 = make_date_index(x_1550, "1550")
x_1551 = make_date_index(x_1551, "1551")
x_1559 = make_date_index(x_1559, "1559")
x_1600 = make_date_index(x_1600, "1600")

x_std = make_std(x)

df_test = pd.concat([x_0931, x_1541, x_1546, x_1549, x_1550, x_1551,x_1559, x_1600, x_std], axis=1)

df_test["pre_1600"] = df_test["1600"].shift(1)

df_test["pre_1600-1541"] = df_test["1541"] / df_test["pre_1600"] - 1
df_test["pre_1600-1546"] = df_test["1546"] / df_test["pre_1600"] - 1

df_test["pre_1600-1549"] = df_test["1549"] / df_test["pre_1600"] - 1
df_test["pre_1600-1550"] = df_test["1550"] / df_test["pre_1600"] - 1
df_test["pre_1600-1551"] = df_test["1551"] / df_test["pre_1600"] - 1

df_test["0931-1550"] = df_test["1550"] / df_test["0931"] - 1
df_test["1541-1550"] = df_test["1550"] / df_test["1541"] - 1

df_test["1549-1550"] = df_test["1550"] / df_test["1549"] - 1
df_test["1550-1551"] = df_test["1551"] / df_test["1550"] - 1
df_test["1551-1559"] = df_test["1559"] / df_test["1551"] - 1

df_test["1550-1600"] = df_test["1600"] / df_test["1550"] - 1
df_test["1551-1600"] = df_test["1600"] / df_test["1551"] - 1

#plt.scatter(x=df_test["pre_1600-1549"], y = df_test["1550-1600"], color="blue")
#plt.scatter(x=df_test["pre_1600-1549"], y = df_test["1550-1551"], color="red")
#plt.scatter(x=df_test["pre_1600-1551"], y = df_test["1551-1600"], color="magenta")
#plt.scatter(x=df_test["pre_1600-1550"], y = df_test["1551-1559"], color="black")
#plt.scatter(x=df_test["pre_1600-1546"], y = df_test["1551-1559"], color="black")
    

fig = plt.figure()
im = plt.scatter(x=df_test["pre_1600-1550"], y = df_test["1551-1559"],
                 c=df_test["1541-1550"],
                 linewidths=0, alpha=1, 
                 cmap=cm.Reds)
fig.colorbar(im)


In [None]:

((df_test["pre_1600-1550"]>0.05)*1.0 * df_test["1551-1559"]).cumsum().plot()

In [None]:

x = df[symbols("SOXS")].copy()



In [None]:
df_test

In [None]:
spy = get_pricing("SPY", start_date="2002-01-02", end_date="2018-1-1")
spy.head()

In [None]:
#spy["week num"] = spy.index.isocalendar()[1]

spy["date"] = spy.index
spy["week num"] =spy["date"].apply(lambda x: x.isocalendar()[1])
spy["day return"] = spy["close_price"].pct_change()
spy.groupby(by="week num").sum()["day return"].plot(kind="bar")

In [None]:
spy.groupby(by="week num").mean()["day return"].plot(kind="bar")


In [None]:
import pandas as pd
#spy[spy["week num"]==48]["day return"].groupby(pd.TimeGrouper(freq='12M')).sum().plot(kind="bar")
spy[spy["week num"]==49]["day return"].groupby(pd.TimeGrouper(freq='12M')).sum().plot(kind="bar", color="green")


In [None]:
spy.loc["2002"]["close_price"].plot()

In [None]:
vol = """
VXX
TVIX
SVXY
UVXY
VXXB
VIXY
ZIV
VIXM
VXZ
UOCT
VIIX
BOCT
POCT
UJUL
VXZB
EXIV
XVZ
PJUL
EVIX
BJUL
"""
vol = vol.split()

etf = """
SPY
IVV
VOO
VTI
VEA
QQQ
EFA
VWO
IEFA
IEMG
IJH
VTV
IWM
IWF
IJR
IWD
VUG
VIG
EEM
XLF
VO
VB
VYM
VEU
DIA
IVW
XLV
VGT
USMV
XLK
MDY
IWB
DVY
IWR
SDY
EWJ
ITOT
SCHF
IVE
XLE
RSP
SCHX
VGK
XLY
VV
SCHB
VBR
IXUS
"""
etf = etf.split()

In [None]:
syms = list(set(etf) - set(vol))


df = get_pricing(syms, fields='close_price', start_date="2008-1-1", end_date="2010-1-1")

In [None]:
print syms

In [None]:
df = df.dropna(axis=1)

In [None]:
df_describe = df.pct_change().dropna().describe()

In [None]:
import matplotlib.pyplot as plt 
df_describe.loc['mean'].plot(kind="bar")



In [None]:
df_describe.loc['std'].plot(kind="bar")


In [None]:
df_vwo = get_pricing("VWO", start_date="2008-1-1", end_date="2010-1-1")
df_vwo[["price", "volume"]].plot(secondary_y="volume")


In [None]:

df_xlf = get_pricing("XLF", start_date="2008-1-1", end_date="2010-1-1")
df_xlf[["price", "volume"]].plot(secondary_y="volume")

In [None]:
df_describe.columns[0].symbol