In [None]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Optional
from copy import copy
plt.style.use('ggplot')

with open('multiplier.json', 'r') as f:
    multis = json.load(f)
    
DIR_LONG = '多'
DIR_SHORT = '空'
OFFSET_CLOSE = '平'
OFFSET_OPEN = '开'
OFFSET_CLOSE_YESTERDAY = '平昨'
OFFSET_CLOSE_TODAY = '平今'
CLOSE_SET = set([OFFSET_CLOSE, OFFSET_CLOSE_TODAY, OFFSET_CLOSE_YESTERDAY])

In [None]:
def strip_digt(symbol: str) -> str:
    res = ""
    for char in symbol:
        if not char.isdigit():
            res += char
        else:
            break
    return res


def add_multiplier(symbol: str) -> Optional[float]:
    underlying = strip_digt(symbol).upper()
    return multis.get(underlying)


def load_trade_file(filename: str) -> pd.DataFrame:
    trade_columns = [
        'datetime', 'exchange', 'trade_id', 'order_id', 'symbol', 'direction',
        'offset', 'price', 'volume'
    ]
    trade_df = pd.read_csv(filename, header=None, names=trade_columns)
    trade_df.drop(['trade_id', 'order_id'], axis=1, inplace=True)
    trade_df['underlying'] = trade_df['symbol'].map(strip_digt)
    trade_df['multiplier'] = trade_df['symbol'].map(add_multiplier)
    trade_df.loc[trade_df.direction == DIR_SHORT, 'volume'] *= -1
    return trade_df


def cal_trading_pnl(df: pd.DataFrame) -> pd.DataFrame:
    # 如果这里直接对源df进行修改（inplace=True），可能会产生意料不到的问题
    df = df.reset_index(drop=True)
    start, end = 0, len(df) - 1
    drop_list = []

    if df.iloc[start].offset != OFFSET_OPEN:
        drop_list.append(start)
    if df.iloc[end].offset == OFFSET_OPEN:
        drop_list.append(end)
    if drop_list:
        df.drop(drop_list, inplace=True)

    df_open = df[df.offset == OFFSET_OPEN].copy()
    df_open.reset_index(drop=True, inplace=True)
    df_close = df[df.offset.isin(CLOSE_SET)].copy()
    df_close = df_close[['datetime', 'direction', 'offset', 'price']]
    df_close = df_close.add_prefix('out_')
    df_close.reset_index(drop=True, inplace=True)

    res_df = pd.concat([df_open, df_close], axis=1)
    res_df['profit'] = (res_df['out_price'] - res_df['price']
                        ) * res_df['volume'] * res_df['multiplier']
    return res_df

def trade_stats(df: pd.DataFrame) -> pd.DataFrame:
    pos = df[df['profit'] > 0]
    neg = df[df['profit'] <= 0]
    profit_mean = pos['profit'].mean()
    loss_mean = neg['profit'].mean()
    
    d = {
        'profit_num': len(pos),
        'loss_num': len(neg),
        'all_num': len(df),
        'accuracy_rate': len(pos) / len(df),
        'profit_mean': profit_mean,
        'loss_mean': loss_mean,
        'ratio': profit_mean / abs(loss_mean)
    }
    return pd.DataFrame([d])


def t_df(df):
    print(df)
    df = df.iloc[0:2, 0:5]
    return df

In [None]:
fnames = ['trade_20.csv', 'trade_50.csv']
res_lst = []
for fname in fnames:
    df = load_trade_file(fname)
    r = df.groupby('underlying', group_keys=False).apply(cal_trading_pnl)
    r.to_csv(f'{fname}_pnl.csv', encoding='utf-8-sig')
    res_lst.append(r)

In [None]:
t20, t50 = res_lst
t20.groupby('underlying').apply(trade_stats)

In [None]:
t50.groupby('underlying').apply(trade_stats)