
```
-- 数据查询
-- 根据日期（单位：月）查询可以交易的期货
-- 逻辑：交易开始时间-截止时间在对应月份
select
    Name, Date(ftdate) as start, Date(lasttrade_date) as end
from future
where Name like 'IH%' and toYYYYMM(Date('2021-12-01')) between toYYYYMM(ftdate) and toYYYYMM(lasttrade_date)
order by Name;

-- 查询可交易的期权
-- 逻辑：
--   1. 交易开始时间-截止时间在对应月份的所有期权
--   2. 期权根据认购、认沽并同一价格形成 pair
--   3. pair: C:认购价格 P:认沽价格 K:行权价格 Pair:认购后4位-认沽后4位 begin、end：开始结束时间
CREATE temporary TABLE opt AS
    SELECT Name,
       startdate,
       lasttradingdate,
       concat(splitByChar('.', Code)[2], splitByChar('.', Code)[1]) AS code,
       toInt64(extract(Name, '月(\d+)')) AS price,
       IF(match(Name, '沽'), -toInt64(extract(Name, '月(\d+)')), toInt64(extract(Name, '月(\d+)'))) AS directionprice
       FROM option WHERE Name LIKE '50ETF%'
       AND toYYYYMM(Date('2021-12-01')) between toYYYYMM(Date(startdate)) and toYYYYMM(Date(lasttradingdate))
    ORDER BY code;;

select
    code as C, b.code as P, price as K,
    concat(substring(code, 7, 4), '-', substring(b.code, 7, 4)) as Pair,
    date(a.startdate) as begin, date(a.lasttradingdate) as end
from
    (select * from opt where directionprice > 0) a
    join
    (select * from opt where directionprice < 0) b
on a.startdate = b.startdate and a.lasttradingdate = b.lasttradingdate
and a.price = b.price
order by K limit 30
```

In [8]:
import pandas as pd
import pymysql
from clickhouse_driver import Client


class pickup_db(object):
    def __init__(self, storage):
        if storage == "mysql":
            self.client = pymysql.connect(host='127.0.0.1', user='root', password='root', database='stock_arbitrage',
                                          autocommit=True)
            self.cursor = self.client.cursor()
        elif storage == "clickhouse":
            self.client = Client('127.0.0.1', database='stock_arbitrage')
            self.cursor = self.client
        else:
            raise Exception("error storage")

    def __enter__(self):
        return self.cursor

    def __exit__(self, type, value, traceback):
        try:
            self.client.close()
        except Exception as e:
            pass


# load price
def load(type, codes, start, end):
    types_table = {
        "stock": "sh_sz_bin",
        "future": "cfe",
        "option": "ashr_option",
    }
    with pickup_db("clickhouse") as db:
        sql = "SELECT Code, toStartOfDay(toDateTime(Time)) as Time, avg(Price) as Price, avg(Volume) as Volume, avg(Amount) as Amount"" FROM {table} WHERE Code in ({codes}) AND Time between '{start}' and '{end}' ""group by Code, Time  order by Code, Time ".format(
            table=types_table[type],
            codes=",".join(list("'%s'" % c for c in codes)),
            start=start,
            end=end
        )
        rows = db.execute(sql)
        data = []
        for row in rows:
            data.append([row[0], row[1], row[2], row[3], row[4]])

        columns = ['Code', 'Time', 'Price', 'Volume', 'Amount']
        df = pd.DataFrame(data, columns=columns)
        df = df.set_index("Time")
        return df


def load_options():
    with pickup_db("clickhouse") as db:
        sql = """
-- 查询可交易的期权
CREATE temporary TABLE opt AS
    SELECT Name,
       startdate,
       lasttradingdate,
       concat(splitByChar('.', Code)[2], splitByChar('.', Code)[1]) AS code,
       toInt64(extract(Name, '月(\d+)')) AS price,
       IF(match(Name, '沽'), -toInt64(extract(Name, '月(\d+)')), toInt64(extract(Name, '月(\d+)'))) AS directionprice
       FROM option WHERE Name LIKE '50ETF%'
       AND toYYYYMM(Date('2021-12-01')) between toYYYYMM(Date(startdate)) and toYYYYMM(Date(lasttradingdate))
    ORDER BY code;;

select
    code as C, b.code as P, price as K,
    concat(substring(code, 7, 4), '-', substring(b.code, 7, 4)) as Pair,
    date(a.startdate) as begin, date(a.lasttradingdate) as end
from
    (select * from opt where directionprice > 0) a
    join
    (select * from opt where directionprice < 0) b
on a.startdate = b.startdate and a.lasttradingdate = b.lasttradingdate
and a.price = b.price
order by K
"""
        for s in sql.split(";;"):
            rows = db.execute(s)
            data = []
            for row in rows:
                data.append([row[0], row[1], row[2], row[3]])
        return data


# 50ETF_options → 50ETF → 上证50股指期货（IH）returns
start, end = "2021-12-01 00:00:00", "2021-12-31 23:59:59"
# etf 价格数据
stock_50etf = load("stock", ["SH510050"], start, end)
# 50 index 价格数据
stock_50index = load("stock", ["SH000016"], start, end)
# 期货 价格数据
future_names = ['IH2112', 'IH2201', 'IH2202', 'IH2203', 'IH2206']
stock_50future = load("future", future_names, start, end)

# 2021.12月期间内所有的期权
options = load_options()
# 期权价格数据
stock_50option = load("option", [opt[0] for opt in options] + [opt[1] for opt in options], start, end)
# 讲期权价格pair 并按照天级别时序进行对齐
freq = '1d'
time = pd.date_range(pd.Timestamp(start), pd.Timestamp(end), freq=freq)
stock_50options = pd.DataFrame({'Tm': time})
stock_50options = stock_50options.set_index("Tm")

for opt in options:
    for i in range(0, 2):
        right = stock_50option[stock_50option['Code'] == opt[i]]['Price']
        right = right[~right.index.duplicated(keep='first')]
        stock_50options = stock_50options.join(right)
        stock_50options.rename(columns={"Price": opt[i]}, inplace=True)
        stock_50options.reset_index(drop=True)

stock_50options.dropna(axis=0, how='all', inplace=True)
for i, opt in enumerate(options):
    stock_50options['50etf_copy_%s.%d' % (options[i][3], options[i][2])] = stock_50options[options[i][0]] + \
                                                                           stock_50options[options[i][1]]


In [9]:
import numpy as np

# 上一步骤得到了几个标的（）的基本价格
#     stock_50options 本月所有可交易的期权
#     stock_50future 包括 future_names 的期货
#     stock_50etf  50etf 价格
#     stock_50index 50index  价格
# 按照天时间对齐后，组合起来
# 并通过 50index 价格得到 50index_returns ，利用returns 换算 50etf 和 50etf_copy_xxx 的价格
# 50etf = 50etf * (1 + 50index_returns) * 1000
# 50etf_copy_xxx = 50etf_copy_xxx * (1 + 50index_returns) * 1000 + pair_K(期权的行权价)
# 最终价格汇集进入 stocks_price

time = pd.date_range(pd.Timestamp(start), pd.Timestamp(end), freq=freq)
stocks_price = pd.DataFrame({'Tm': time})
stocks_price = stocks_price.set_index("Tm")

for name in future_names:
    right = stock_50future[stock_50future['Code'] == name]['Price']
    right = right[~right.index.duplicated(keep='first')]
    stocks_price = stocks_price.join(right)
    stocks_price.rename(columns={"Price": name}, inplace=True)
    stocks_price.reset_index(drop=True)

right = stock_50index['Price']
right = right[~right.index.duplicated(keep='first')]
stocks_price = stocks_price.join(right)
stocks_price.rename(columns={"Price": "50index"}, inplace=True)

right = stock_50etf['Price']
right = right[~right.index.duplicated(keep='first')]
stocks_price = stocks_price.join(right)
stocks_price.rename(columns={"Price": "50etf"}, inplace=True)

for i, opt in enumerate(options):
    right = stock_50options['50etf_copy_%s.%d' % (opt[3], opt[2])]
    right = right[~right.index.duplicated(keep='first')]
    stocks_price = stocks_price.join(right)

stocks_price.dropna(axis=0, how='all', inplace=True)
stocks_price['50index_returns'] = np.log(stocks_price['50index'] / stocks_price['50index'].shift(1))
stocks_price['50index_returns'][0] = 0
stocks_price["50etf"] = stocks_price["50etf"] * (1 + stocks_price['50index_returns']) * 1000

for i, opt in enumerate(options):
    name = '50etf_copy_%s.%d' % (opt[3], opt[2])
    stocks_price[name] = stocks_price[name] * (1 + stocks_price['50index_returns']) * 1000 + opt[2]

# print(stocks_price)


invalid value encountered in log



In [11]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

# 图形中的直线是非交易日或非交易时间
# 远期合约价格高于近期合约，近期合约接近到期时间了
stocks_price['id'] = stocks_price.index
colors = px.colors.qualitative.Plotly
fig = go.Figure()

for i, name in enumerate(future_names):
    fig.add_traces(go.Scatter(x=stocks_price['id'], y=stocks_price[name],
                          mode='lines', line=dict(color=colors[3 + i], dash='dash'), name=name))

fig.add_traces(go.Scatter(x=stocks_price['id'], y=stocks_price['50index'],
                          mode='lines', line=dict(color=colors[0]), name="50index"))
fig.add_traces(go.Scatter(x=stocks_price['id'], y=stocks_price['50etf'],
                          mode='lines', line=dict(color=colors[1]), name="50etf"))
for i, opt in enumerate(options):
    name = '50etf_copy_%s.%d' % (opt[3], opt[2])
    fig.add_traces(go.Scatter(x=stocks_price['id'],
                              y=stocks_price[name],
                              mode='lines', line=dict(color=colors[2], dash='longdash', width=0.5), name=name))

fig.update_layout(autosize=False, width=1200, height=800,
                  margin=dict(l=50, r=50, b=100, t=100, pad=4))
fig.show(width=16, height=16)

![img](https://picx.zhimg.com/80/v2-950a21371ecee537647849ac3e739d0f_1440w.png)

In [12]:
columns = stocks_price.columns
ih_columns = []
etf_copy_columns = []
for col in columns:
    if col.startswith("IH"):
        ih_columns.append(col)
    if col.startswith("50etf_copy"):
        etf_copy_columns.append(col)

# early_future 取最早到期
def early_future(row):
    for col in sorted(ih_columns):
        if not np.isnan(row[col]):
            return row[col]
    return np.NAN

# lower_option 取低价格
def lower_option(row):
    return np.min(row[etf_copy_columns])

# IH_early 取期货最早到期
# 50etf_copy_lower 取期权copy的最低价格
# 并计算收益率和累积收益率
stocks_price['IH_early'] = stocks_price.apply(early_future, axis=1)
stocks_price['50etf_copy_lower'] = stocks_price.apply(lower_option, axis=1)
stocks_price['returns'] = np.maximum((stocks_price['IH_early'] - stocks_price['50etf_copy_lower']) / (
        stocks_price['IH_early'] + stocks_price['50etf_copy_lower']), 0)
stocks_price['cum_returns'] = stocks_price['returns'].cumsum()

In [13]:
colors = px.colors.qualitative.Plotly
import plotly.subplots as sp

figure = sp.make_subplots(rows=3, cols=1)

figure.append_trace(go.Scatter(x=stocks_price['id'], y=stocks_price['IH_early'],
                               mode='lines', line=dict(color=colors[0], dash='dash'), name="IH_early"),
                    row=1, col=1)
figure.append_trace(go.Scatter(x=stocks_price['id'], y=stocks_price['50etf_copy_lower'],
                               mode='lines', line=dict(color=colors[1], dash='dash'), name="50etf_copy_lower"),
                    row=1, col=1)

figure.append_trace(go.Scatter(x=stocks_price['id'], y=stocks_price['returns'],
                               mode='lines', line=dict(color=colors[2]), name="returns"),
                    row=2, col=1)
figure.append_trace(go.Scatter(x=stocks_price['id'], y=stocks_price['cum_returns'],
                               mode='lines', line=dict(color=colors[3]), name="cum_returns"),
                    row=3, col=1)

figure.update_layout(autosize=False, width=1200, height=800, margin=dict(l=50, r=50, b=100, t=100, pad=4))
figure.show(width=16, height=16)

![img](https://pica.zhimg.com/80/v2-254f859144b638033176151a863de723_1440w.png  )