In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm 
import matplotlib.pyplot as plt

df = get_pricing(["EWA", "EWC"], start_date="11/27/2010", end_date="11/27/2017", fields="price")
df.columns = map(lambda x: x.symbol, df.columns)
df_change = df.pct_change()

## ボリンジャーバンドでペアトレード
    

In [None]:
fig = plt.figure(figsize=(20,20))
ax1 = fig.add_subplot(311)
ax2 = fig.add_subplot(312)
ax3 = fig.add_subplot(313)

df_change.plot(kind="scatter", x ="EWA", y="EWC", ax=ax1)
df.plot(ax=ax2)
s_ratio = (df["EWA"] / df["EWC"])
s_ratio_mean = s_ratio.rolling(20).mean()
s_ratio.std = s_ratio.rolling(20).std()

s_ratio.plot(ax=ax3)
s_ratio_mean.plot(ax=ax3)
(s_ratio_mean + s_ratio.std).plot(ax=ax3)
(s_ratio_mean - s_ratio.std).plot(ax=ax3)

ax3.axhline((df["EWA"] / df["EWC"]).mean())



In [None]:
df_tmp = df
df_tmp["EWA_dayreturn"] = df_tmp["EWA"].pct_change().shift(-1)
df_tmp["EWC_dayreturn"] = df_tmp["EWC"].pct_change().shift(-1)


df_tmp["EWA/EWC"] = df_tmp["EWA"] / df_tmp["EWC"]
df_tmp["EWA/EWC ma20"] = df_tmp["EWA/EWC"].rolling(20).mean()
df_tmp["EWA/EWC std20"] = df_tmp["EWA/EWC"].rolling(20).std()
df_tmp["upper band"] = df_tmp["EWA/EWC ma20"]+df_tmp["EWA/EWC std20"] 
df_tmp["lower band"] = df_tmp["EWA/EWC ma20"]-df_tmp["EWA/EWC std20"] 
df_tmp[["EWA/EWC", "EWA/EWC ma20","upper band", "lower band" ]].plot()


In [None]:
df_tmp.ix["2011":"2012"][["EWA/EWC", "EWA/EWC ma20","upper band", "lower band" ]].plot()

In [None]:
df_tmp["GoShort"] = df_tmp["EWA/EWC"] > df["upper band"]
df_tmp["GoLong"] = df_tmp["EWA/EWC"] < df["lower band"]

df_tmp["PLShort"] = df_tmp["EWA_dayreturn"]*df_tmp["GoShort"]*-1 + df_tmp["EWC_dayreturn"]*df_tmp["GoShort"]
df_tmp["PLLong"] =  df_tmp["EWA_dayreturn"]*df_tmp["GoLong"] + df_tmp["EWC_dayreturn"]*df_tmp["GoLong"]*-1

df_tmp["PL"] = df_tmp["PLShort"]  + df_tmp["PLLong"]



In [None]:
df_tmp["PL"].cumsum().plot() 

## 過去二十日間の最小二乗法

1. USOとGLDの過去二十日間の最小二乗法で傾きを取得
1. 取得した傾きをGLDにかけて，期待値よりもどのくらい外れているかを port_value に格納
1. 
    + -1 < zscore < 1 であればNo Position
    + zscore < -1 であれば，Long Spread（Long USO, Short GLD) 
    + 1 < zscore であれば，Short Spread（Long GLD, Short USO) 
        

In [None]:
df = get_pricing(["EWA", "EWC"],start_date="11/27/2010", end_date="11/27/2017",  fields="price")
df.columns = map(lambda x: x.symbol, df.columns)
df_change = df.pct_change()
df["EWA_dayreturn"] = df["EWA"].pct_change().shift(-1)
df["EWC_dayreturn"] = df["EWC"].pct_change().shift(-1)



In [None]:
# 1
def get_hedge_ratio(df):
    ewa = df["EWA"].values
    ewc = df["EWC"].values
    price1 = ewa
    price2 = sm.add_constant(ewc)
    a, hedge_ratio = sm.OLS(price1,price2).fit().params
    return a, hedge_ratio

num = 20
l = list()
for i in range(len(df)):
    df_tmp = df.iloc[:i].tail(num)
    
    if len(df_tmp) == num:
        l.append(get_hedge_ratio(df_tmp)[1])
    else:
        l.append(0)
df["hedge_ratio"] = l


In [None]:
# 2 
df["port_value"] = df["EWA"] - df["EWC"] * df["hedge_ratio"]
df["port_value_ma20"] = df["port_value"].rolling(20).mean()
df["port_value_std20"] = df["port_value"].rolling(20).std()
df["port_value_zscore"] = (df["port_value"] - df["port_value_ma20"] )/df["port_value_std20"] 



In [None]:
df["GoShort"] = df["port_value_zscore"] > 1
df["GoLong"] = df["port_value_zscore"] < -1

In [None]:
df["PLShort"] = df["EWA_dayreturn"]*df["GoShort"]*-1 + df["EWC_dayreturn"]*df["GoShort"]*df["hedge_ratio"]
df["PLLong"] =  df["EWA_dayreturn"]*df["GoLong"] + df["EWC_dayreturn"]*df["GoLong"]*df["hedge_ratio"]*-1
df["PL"] = df["PLShort"]  + df["PLLong"]


In [None]:
df["PL"].iloc[40:].cumsum().plot()

In [None]:
a1 =  df_change.EWA.values[1:]
a2 =  df_change.EWC.values[1:]

nsample = a1.size
X = np.column_stack((np.repeat(1, nsample), a1))
model = sm.OLS(a2, X)
results = model.fit()

In [None]:
# パラメータの推定値を取得

a, b = results.params

# プロットを表示
plt.plot(a1, a2, 'o')
plt.plot(a1, a+b*a1)
plt.title("a={:8.3f}, b={:8.3f}".format(a,b))

plt.show()



In [None]:
def get_hedge_ratio(x, y):
    
    nsample = x.size
    X = np.column_stack((np.repeat(1, nsample), x))
    model = sm.OLS(y, X)
    results = model.fit()
    a, b = results.params
    return a, b 

num = 20
l = list()
for i in range(len(df_change)):
    df_tmp = df_change.iloc[:i].tail(num)
    
    if len(df_tmp) >= num:
        x = df_tmp["EWA"].values
        y = df_tmp["EWC"].values
        l.append(get_hedge_ratio(x,y)[1])
    else:
        l.append(0)

s = pd.Series(l, name="hedge_ratio", index = df.index)
s.plot()
        

In [None]:
## 20日の移動平均．STD,．ZSCOREをだして， 
## -1 < zscore < 1 ではNo Position
## -1 > zscore : Long
## 1 < zscore : Short 


In [None]:
df_mean = df.rolling(20).mean()
df_std = df.rolling(20).std()
df_zscore = (df - df_mean) / df_std 

In [None]:
df_zscore.plot(x = "EWA", y = "EWC", kind="scatter")

In [None]:


df_tmp = df.iloc[30:].tail(20)

a,b = get_hedge_ratio(df_tmp.EWA.values,df_tmp.EWC.values,)
a1=df_tmp.EWA
a2=df_tmp.EWC
plt.plot(a1, a2, 'o')
plt.plot(a1, a+b*a1)
plt.title("a={:8.3f}, b={:8.3f}".format(a,b))

plt.show()




In [None]:
# パラメータの推定値を取得

a, b = results.params

# プロットを表示
plt.plot(a1, a2, 'o')
plt.plot(a1, a+b*a1)
plt.title("a={:8.3f}, b={:8.3f}".format(a,b))

plt.show()



In [None]:
df = pd.concat([df, s], axis=1)
df["port_value"] = df["EWC"] * df["hedge_ratio"]

In [None]:
((df.port_value - df.port_value.mean()) / df.port_value.std()).plot()

In [None]:

import pandas as pd
import numpy as np
import statsmodels.api as sm 
import matplotlib.pyplot as plt

df = get_pricing(["USO", "GLD","SPY"],start_date="5/26/2006", end_date="4/9/2012",  fields="price")
df.columns = map(lambda x: x.symbol, df.columns)
df_change = df.pct_change()
df["USO_dayreturn"] = df["USO"].pct_change().shift(-1)
df["GLD_dayreturn"] = df["GLD"].pct_change().shift(-1)
df["SPY_dayreturn"] = df.SPY.pct_change().shift(-1)

# 1
def get_hedge_ratio(df):
    USO = df["USO"].values
    GLD = df["GLD"].values
    price1 = USO
    price2 = sm.add_constant(GLD)
    a, hedge_ratio = sm.OLS(price1,price2).fit().params
    return a, hedge_ratio

num = 20
l = list()
for i in range(len(df)):
    df_tmp = df.iloc[:i].tail(num)
    
    if len(df_tmp) == num:
        l.append(get_hedge_ratio(df_tmp)[1])
    else:
        l.append(0)
df["hedge_ratio"] = l


# 2 
df["port_value"] = df["USO"] - df["GLD"] * df["hedge_ratio"]
df["port_value_ma20"] = df["port_value"].rolling(20).mean()
df["port_value_std20"] = df["port_value"].rolling(20).std()
df["port_value_zscore"] = (df["port_value"] - df["port_value_ma20"] )/df["port_value_std20"] 


df["GoShort"] = df["port_value_zscore"] > 1
df["GoLong"] = df["port_value_zscore"] < -1


df["PLShort"] = df["USO_dayreturn"]*df["GoShort"]*-1 + df["GLD_dayreturn"]*df["GoShort"]*df["hedge_ratio"]
df["PLLong"] =  df["USO_dayreturn"]*df["GoLong"] + df["GLD_dayreturn"]*df["GoLong"]*df["hedge_ratio"]*-1
df["PL"] = df["PLShort"]  + df["PLLong"]

df[["PL","SPY_dayreturn"]].iloc[40:].cumsum().plot()


# メモ

+ そもそもどうしてペアトレードなのか？
+ ペアトレードとは（mean reversion) 
+ ペアの選び方
+ 良いトレードとは
+ （相場の状況に影響されずに利益が得られますよ）
+ ペアの終焉


In [None]:
df.columns

In [None]:
df = get_pricing(["EWA", "EWC"], fields="price", frequency="daily", start_date="2006-1-1", end_date="2017-12-31")
df.columns = map(lambda x: x.symbol, df.columns)

In [None]:
df["EWA/EWC"] = df["EWA"] / df["EWC"]

In [None]:
fig = plt.figure()
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)

ax1.plot(df.index, df["EWA"], label="EWA")
ax1.plot(df.index, df["EWC"], label="EWC")
ax2.plot(df.index, df["EWA/EWC"], c="red", label="EWA/EWC")

ax1.legend()
ax2.legend()

ax2.axhline(df["EWA/EWC"].mean(), linestyle="--")



In [None]:
df["EWA_dayreturn"] = df["EWA"].pct_change().shift(-1)
df["EWC_dayreturn"] = df["EWC"].pct_change().shift(-1)
df["Short"] = df["EWA/EWC"] > 0.75
df["Long"] = df["EWA/EWC"] < 0.6


In [None]:
(df.loc[:"2012"]["EWA_dayreturn"][df["Short"]]*-1 + df.loc[:"2012"]["EWC_dayreturn"][df["Short"]]).cumsum().plot()

In [None]:
(df["EWA_dayreturn"][df["Short"]]*-1 + df["EWC_dayreturn"][df["Short"]]).cumsum().plot()

In [None]:
df.head()

In [None]:
term = 20
adf["EWA/EWC_MA20"] = df["EWA/EWC"].rolling(term).mean()
#df_tmp = df.loc[:"2010"].copy()
df_tmp = df.copy()
df_tmp["EWA/EWC"].rolling(term).mean().plot()
df_tmp["EWA/EWC"].plot()

In [None]:
df_tmp["upper"] = df_tmp["EWA/EWC"] > 

In [None]:
df_tmp["upper"] = df_tmp["EWA/EWC"] > df_tmp["EWA/EWC_MA20"]
df_tmp["lower"] = df_tmp["EWA/EWC"] < df_tmp["EWA/EWC_MA20"]
df_tmp["longSpread"] =  df_tmp["EWA_dayreturn"] * -1 * df_tmp["upper"] + df_tmp["EWC_dayreturn"]  * df_tmp["upper"]
df_tmp["shortSpread"] =  df_tmp["EWA_dayreturn"] * df_tmp["lower"] + df_tmp["EWC_dayreturn"] * -1 * df_tmp["lower"]
df_tmp[["longSpread", "shortSpread"]].cumsum().sum(axis=1).plot()


In [None]:
sym1="GDX"
sym2="ABX"
term = 20
start = "11/27/2010"
end = "11/27/2017"


df = get_pricing([sym1, sym2], start_date=start, end_date=, fields="price")
df.columns = map(lambda x: x.symbol, df.columns)

df["{}_dayreturn".format(sym1)] = df[sym1].pct_change().shift(-1)
df["{}_dayreturn".format(sym2)] = df[sym2].pct_change().shift(-1)

df["ratio"] = df[sym1]/df[sym2]


df["ma"] = df["ratio"].rolling(term).mean()
df["std"] = df["ratio"].rolling(term).std()
df["zscore"] = (df["ratio"] -df["ma"] )/df["std"]

df["flag"] = df["ratio"] > df["ma"]
df["upper band"] = df["ma"] + df["std"]
df["lower band"] = df["ma"] - df["std"]


In [None]:
df["ratio"].pct_change().plot.box()

In [None]:
df.loc["2016":][["ratio", "ma","upper band", "lower band", "zscore"]].plot(secondary_y = "zscore")

In [None]:
df.loc["2016":]["zscore"].mean()

In [None]:
df["pl long"] = df["flag"] * (df["EWA_dayreturn"] * -1 +  df["EWC_dayreturn"] ) 
df["pl short"] = ~df["flag"] * (df["EWA_dayreturn"]  +  df["EWC_dayreturn"] * -1 ) 
    

In [None]:
df["pl"] = df["pl long"]+df["pl short"]
df["pl"].cumsum().plot()