In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
from pandas.errors import EmptyDataError

WEIGHT_MEAN_PRICE = '成交均价滑差'
TRADE_VOLUME = '成交手数'

pd.set_option('display.float_format', '{:.2f}'.format)

def calc_mean_price_and_vol(df):
    """Groud process function"""
    df = df.copy()
    turnover = np.dot(df.price, df.volume)
    volume = df.volume.sum()
    mean_price = turnover / volume
    return pd.Series({WEIGHT_MEAN_PRICE: mean_price, TRADE_VOLUME: volume})
    

In [None]:
# filter_id is date.tradeid
filter_list = [
    '811887.20191226.110427',
    '811887.20191227.58998'
]

p1 = Path(r'D:\vnpy-2.0.7\examples\rohon_follower\.vntrader\trade')
p2 = Path(r'D:\vnpy-2.0.7\examples\ruida_follower\.vntrader\trade')
p3 = Path(r'D:\vnpy-2.0.7\examples\guomao_follower\.vntrader\trade')
folders = [p1, p2, p3]

group_files = [list(p.glob('*.csv')) for p in folders]
files = [f for g in group_files for f in g]
files

In [None]:
# 检查单日成交数据是否正常（2个账户成交数量是否一致）
for file in files:
    try:
        df = pd.read_csv(file)
    except EmptyDataError:
        pass
#         print("The File is empty")
#         print(file)
#         print("=" * 50)
    if not df.empty:
        res = df.groupby(['gateway_name', 'direction'])['volume'].sum().unstack().T
        try:
            comp = res.iloc[:, 0] - res.iloc[:, 1]
        except:
            print('unknown error')
            print(file)
            print(res)
            print("=" * 50)
        if sum(comp) != 0:
            print("Trade count is not identical:")
            print(file)
            print(res)
            print("=" * 50)


In [None]:
dflist = []
for file in files:
    try:
        dflist.append(pd.read_csv(file))
    except:
        pass

df = pd.concat(dflist, ignore_index=True)
df[['date', 'source_account', 'tradeid']] = df[['date', 'source_account', 'tradeid']].astype('str')

# 过滤重复成交记录，行情中断重连会导致成交记录重复推送
df.drop_duplicates(inplace=True)

# 去除要过滤的单子
f = lambda x: x + '.'
df['filter_id'] = df['source_account'].map(f) + df['date'].map(f) + df['tradeid']
df = df[~df['filter_id'].isin(filter_list)].copy()


In [None]:
# 分组聚合计算成交均价
res = df.groupby(['source_account', 'gateway_name', 'direction']).apply(calc_mean_price_and_vol).unstack().T
res

In [None]:
# 计算滑差
res['811887S'] = res.iloc[:, 1] - res.iloc[:, 0]
res['918068S'] = res.iloc[:, 3] - res.iloc[:, 2]
res['10032518S'] = res.iloc[:, 5] - res.iloc[:, 4]
res['all'] = 0
res

In [None]:
# 根据成交量计算权重
long_vol = res.iloc[2, :].sum() / 2
short_vol = res.iloc[3, :].sum() / 2
# print(long_vol, short_vol)
long_weights = [res.iloc[2, 1] / long_vol, res.iloc[2, 3] / long_vol, res.iloc[2, 5] / long_vol]
short_weights = [res.iloc[3, 1] / short_vol, res.iloc[3, 3] / short_vol, res.iloc[3, 5] / short_vol]
# long_weights, short_weights, sum(long_weights), sum(short_weights)

In [None]:
# 计算加权平均跟踪滑差
res.iloc[0, 9] = np.dot(long_weights, res.iloc[0, 6:9])
res.iloc[1, 9] = np.dot(short_weights, res.iloc[1, 6:9])
res.iloc[2, 9] = long_vol
res.iloc[3, 9] = short_vol
res

In [None]:
# 调整要展示的数据
res.iloc[2, 6:9] = res.iloc[2, [0, 2, 4]].values
res.iloc[3, 6:9] = res.iloc[3, [0, 2, 4]].values
n_res = res.iloc[:, 6:].copy()
n_res.columns.names = ['account', '']
n_res

In [None]:
# df2 = df.groupby(['source_account', 'gateway_name', 'direction'])['price'].agg(['mean', 'count']).unstack().T
# df2['slippage'] =  np.abs(df2['RPC'] - df2['ROHON'])
# df2
