In [None]:
import json
import pandas as pd
import numpy as np
from pathlib import Path
from datetime import datetime
pd.set_option('display.float_format', '{:.2f}'.format)

In [None]:
with open('commission_fol.json', 'r') as f:
    commission_multiple = json.load(f)
    
excluded_trades = set()
size_dict = {
    'IF': 300,
    'IH': 300,
    'IC': 200
}

def strip_digit(symbol: str):
    res = ""
    for char in symbol:
        if not char.isdigit():
            res += char
        else:
            break
    return res

def open_trade_file(file):
    try:
        df = pd.read_csv(file, parse_dates=[0], encoding='utf-8')
    except UnicodeDecodeError:
        df = pd.read_csv(file, parse_dates=[0], encoding='gb2312')
    return df

def calc_trade(df: pd.DataFrame, account_type: str):
    df_sel = df[df['account_type'] == account_type]
    df_sel = df_sel[~(df_sel['vt_orderid'].isin(excluded_trades))].copy()
#     print(df_sel)
    
    df_sel['size'] = df_sel['symbol'].map(strip_digit).map(size_dict)
    df_sel['turnover'] = df_sel['price'] * df_sel['volume'] * df_sel['size']
    df_sel['com_multiple'] = df_sel['account_id'].map(commission_multiple)
    df_sel['comission'] = df_sel['turnover'] * df_sel['com_multiple'] * (0.23 / 10000)
    
    df_sel_long = df_sel[df_sel['direction'] == "多"]
    df_sel_short = df_sel[df_sel['direction'] == "空"]
    
    order_fee = len(df_sel)
    
    d = {}
    d['date'] = df_sel.iloc[0]['date']
    d['account'] = df_sel.iloc[0]['account_id']
    d['comission'] = df_sel['comission'].sum() + order_fee
    d['long_volume'] = df_sel_long['volume'].sum()
    d['long_turnover'] = df_sel_long['turnover'].sum()
    d['long_cost'] = sum(df_sel_long['price'] * df_sel_long['volume']) / d['long_volume']
    d['short_volume'] = df_sel_short['volume'].sum()
    d['short_turnover'] = df_sel_short['turnover'].sum()
    d['short_cost'] = sum(df_sel_short['price'] * df_sel_short['volume']) / d['short_volume']
    d['trade_pnl'] = d['short_turnover'] - d['long_turnover']
    d['net_pnl'] = d['trade_pnl'] - d['comission']
    d['all_volume'] = d['long_volume'] + d['short_volume']
    
#     print('手续费', d['account'], d['comission'])
    df = pd.DataFrame([d])
#     df.set_index('date', inplace=True)
    return df

def stats_trade_by_date(file):
    df = open_trade_file(file)
    df[['account_id', 'tradeid']] = df[['account_id', 'tradeid']].astype('str')
    
    source_res = calc_trade(df, 'source')
    target_res = calc_trade(df, 'target')
    
    df_res = pd.merge(source_res, target_res, how='outer', on='date', suffixes=('_source', '_target'))
    df_res['net_pnl_diff'] = df_res['net_pnl_source'] - df_res['net_pnl_target']
    df_res['trade_pnl_diff'] = df_res['trade_pnl_source'] - df_res['trade_pnl_target']
    df_res['long_diff'] = df_res['long_cost_target'] - df_res['long_cost_source']
    df_res['short_diff'] = df_res['short_cost_source'] - df_res['short_cost_target']
    df_res['trade_diff'] = (df_res['long_diff'] + df_res['short_diff']) / 2
    
    display_cols = [
                    'date', 'account_source', 'account_target',
                    'all_volume_source', 'all_volume_target',
#                     'trade_pnl_source', 'trade_pnl_target',
#                     'trade_pnl_diff',
                    'net_pnl_source', 'net_pnl_target',
                    'net_pnl_diff', 'trade_diff'
                   ]

    en_to_zh = {
        'date': '日期', 'account_source': '标准户', 'account_target': '跟单户',
        'all_volume_source': '标准户笔数', 'all_volume_target': '跟单户笔数',
#         'trade_pnl_source': '标准户交易盈亏', 'trade_pnl_target': '跟单户交易盈亏',
#         'trade_pnl_diff': '交易盈亏差',
        'net_pnl_source': '标准户净盈亏', 'net_pnl_target': '跟单户净盈亏',
        'net_pnl_diff': '净盈亏差',
        'trade_diff': '滑点差'
    }
    
    df2 = df_res[display_cols].copy()
    df2.rename(columns=en_to_zh, inplace=True)
    return df2

def load_exclude_trades(folder):
    filter_folder = folder.joinpath('filter_traded')
    if filter_folder.exists():
        files = filter_folder.glob('*.csv')
        all_dfs = []
        for file in files:
            fn = file.name
            if not fn.startswith('trade'):
                continue

            df = open_trade_file(file)
            all_dfs.append(df)

        all_exclude_df = pd.concat(all_dfs, axis=0)
        return all_exclude_df['vt_orderid'].to_list()
    else:
        return []


def stats_trade_by_folder(folder):
    existed_records = []
    pnl_df = None

    pnl_fn = "pnl_result.csv"
    pnl_file = folder.joinpath(pnl_fn)
    if pnl_file.exists():
        pnl_df = pd.read_csv(pnl_file, parse_dates=[0], encoding='utf-8')
    #     print(pnl_df)
        existed_records  = pnl_df['日期'].map(lambda dt: dt.strftime('%Y%m%d')).tolist()
    #     print(existed_records)

    files = folder.glob('*.csv')
    all_dfs = []
    for file in files:
        fn = file.name
        if not fn.startswith('trade'):
            continue

        trade_date = file.name.split('.')[0].split('_')[1]
    #     print(file, type(file), trade_date) 
        if trade_date in existed_records:
            print(f"{trade_date}数据已存在")
            continue

        file_df = stats_trade_by_date(file)
        all_dfs.append(file_df)

    if all_dfs:
        if pnl_df is not None:
            all_dfs.append(pnl_df)

        new_pnl_df = pd.concat(all_dfs, join="inner")
        new_pnl_df.sort_values(by="日期", inplace=True)
#         new_pnl_df.to_csv(pnl_file, index=False)
    else:
        new_pnl_df = pnl_df

    items_to_process = [
        ('标准户笔数', '标准户累计笔数'),
        ('跟单户笔数', '跟单户累计笔数'),
        ('标准户净盈亏', '标准户累计净盈亏'),
        ('跟单户净盈亏', '跟单户累计净盈亏'),
        ('净盈亏差', '累计净盈亏差')
    ]
    for (item, item_cum) in items_to_process:
        new_pnl_df[item_cum] = new_pnl_df[item].rolling(window=len(new_pnl_df), min_periods=1).sum()
    new_pnl_df['平均滑点差'] = new_pnl_df['滑点差'].rolling(window=len(new_pnl_df), min_periods=1).mean()
    
    new_pnl_df.to_csv(pnl_file, index=False)
    return new_pnl_df

def stats_all(folders):
    pnls = []
    for folder in folders:
        pnl_file = folder.joinpath("pnl_result.csv")
        if pnl_file.exists():
            pnl_df = pd.read_csv(pnl_file, parse_dates=[0], encoding='utf-8')
            pnls.append(pnl_df)
            
    all_pnl_file = Path(r'D:\work\all_pnl_result.csv')
    all_pnl_df = pd.concat(pnls, join="inner")
    all_pnl_df.sort_values(by="日期", inplace=True)
    
    int_items = [
        '标准户净盈亏',
        '跟单户净盈亏',
        '净盈亏差',
        '标准户累计净盈亏',
        '跟单户累计净盈亏',
        '累计净盈亏差'
    ]
    all_pnl_df[int_items] = all_pnl_df[int_items].astype('int')
    
    round_float_items = ['滑点差', '平均滑点差']
    for item in round_float_items:
        all_pnl_df[item] = all_pnl_df[item].map(lambda value: round(value, 2))

    all_pnl_df.to_csv(all_pnl_file, index=False)
    return all_pnl_df


def merge_trades(trade_folder, to_merge_days):
    fp_list = []
    dfs = []
    for file in trade_folder.glob(r'trade*.csv'):
    #     print(file, type(file))
        for day in to_merge_days:
            if day in file.name:
                fp_list.append(file)
                dfs.append(open_trade_file(file))
                file.rename(file.parent.joinpath(file.name + '.automerge'))

    df = pd.concat(dfs, axis=0)
    df.sort_values(by="dt", ascending=False, inplace=True)

    new_fp = trade_folder.joinpath(f"trade_{to_merge_days[-1]}.csv")
    df.to_csv(new_fp, index=False)
    print("合并完成")

In [None]:
excluded_trades

In [None]:
folder1 = Path(r'D:\work\rd_liu')
excluded_trades.update(load_exclude_trades(folder1))
print(excluded_trades)
df = stats_trade_by_folder(folder1)
df

In [None]:
folder2 = Path(r'D:\work\rd_huang')
excluded_trades.update(load_exclude_trades(folder2))
print(excluded_trades)
df = stats_trade_by_folder(folder2)
df

In [None]:
compare_day = None
# compare_day = datetime(2020, 7, 31)
folders = [folder1, folder2]
# folders = [folder1]

all_pnl_df = stats_all(folders)

if compare_day is None:
    compare_day = datetime.today()
    
all_pnl_df[all_pnl_df['日期'] == pd.Timestamp(compare_day.date())]


In [None]:
# 单文件测试
file = Path(r'D:\work\rd_liu\trade_20200804.csv')
df = stats_trade_by_date(file)
df

In [None]:
# 合并多日成交记录
trade_folder = Path(r'D:\work\rd_huang')
to_merge_days = ['20200811', '20200812']

merge_trades(trade_folder, to_merge_days)