In [12]:
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Optional
from copy import copy
plt.style.use('ggplot')

with open('multiplier.json', 'r') as f:
    multis = json.load(f)
    
DIR_LONG = '多'
DIR_SHORT = '空'
OFFSET_CLOSE = '平'
OFFSET_OPEN = '开'
OFFSET_CLOSE_YESTERDAY = '平昨'
OFFSET_CLOSE_TODAY = '平今'
CLOSE_SET = set([OFFSET_CLOSE, OFFSET_CLOSE_TODAY, OFFSET_CLOSE_YESTERDAY])

In [13]:
def strip_digt(symbol: str) -> str:
    res = ""
    for char in symbol:
        if not char.isdigit():
            res += char
        else:
            break
    return res


def add_multiplier(symbol: str) -> Optional[float]:
    underlying = strip_digt(symbol).upper()
    return multis.get(underlying)


def load_trade_file(filename: str) -> pd.DataFrame:
    trade_columns = [
        'datetime', 'exchange', 'trade_id', 'order_id', 'symbol', 'direction',
        'offset', 'price', 'volume'
    ]
    trade_df = pd.read_csv(filename, header=None, names=trade_columns)
    trade_df.drop(['trade_id', 'order_id'], axis=1, inplace=True)
    trade_df['underlying'] = trade_df['symbol'].map(strip_digt)
    trade_df['multiplier'] = trade_df['symbol'].map(add_multiplier)
    trade_df.loc[trade_df.direction == DIR_SHORT, 'volume'] *= -1
    return trade_df


def cal_trading_pnl(df: pd.DataFrame) -> pd.DataFrame:
    # 如果这里直接对源df进行修改（inplace=True），可能会产生意料不到的问题
    df = df.reset_index(drop=True)
    start, end = 0, len(df) - 1
    drop_list = []

    if df.iloc[start].offset != OFFSET_OPEN:
        drop_list.append(start)
    if df.iloc[end].offset == OFFSET_OPEN:
        drop_list.append(end)
    if drop_list:
        df.drop(drop_list, inplace=True)
        
    df_open = df[df.offset == OFFSET_OPEN].copy()
    df_open.reset_index(drop=True, inplace=True)
    df_close = df[df.offset.isin(CLOSE_SET)].copy()
    df_close = df_close[['datetime', 'direction', 'offset', 'price']]
    df_close = df_close.add_prefix('out_')
    df_close.reset_index(drop=True, inplace=True)

    res_df = pd.concat([df_open, df_close], axis=1)
    res_df['profit'] = (res_df['out_price'] - res_df['price']
                        ) * res_df['volume'] * res_df['multiplier']
    return res_df

def t_df(df):
    print(df)
    df = df.iloc[0: 2, 0: 5]
    return df

In [16]:
fnames = ['trade_20.csv', 'trade_50.csv']
for fname in fnames:
    df = load_trade_file(fname)
    r = df.groupby('underlying', group_keys=False).apply(cal_trading_pnl)
    r.to_csv(f'{fname}_pnl.csv', encoding='utf-8-sig')
    pnl = r.groupby('underlying')['profit'].sum()
    print(pnl, pnl.sum())

underlying
MA    -5230.0
RM   -10480.0
SM     -980.0
ZC    -4080.0
bu     5060.0
jd    -3720.0
p     -4780.0
pp     4130.0
rb    -5330.0
Name: profit, dtype: float64 -25409.99999999997
underlying
AP    -2640.0
CF    -5525.0
FG    -4680.0
MA    -9410.0
SM     3780.0
SR     5280.0
TA   -19060.0
ZC   -20680.0
a    -13050.0
ag   -10395.0
bu     4520.0
cs    -5130.0
cu    44300.0
jd   -11090.0
p      3040.0
pp    -7960.0
rb     5270.0
ru     6000.0
sp    13680.0
Name: profit, dtype: float64 -23750.00000000016


In [4]:
fname = 'trade_20.csv'
df = load_trade_file(fname)

dfs = [cal_trading_pnl(sub_df) for name, sub_df in df.groupby('underlying')]
# r = pd.concat(dfs)
# r.to_csv(f'{fname}_pnl.csv', encoding='utf-8-sig')
# r.groupby('underlying')['profit'].sum()
# ma_df = df[df.underlying == 'pp'].copy()
# # ma_df
# ma_df = cal_trading_pnl(ma_df)
# ma_df


# df.groupby('underlying')['volume'].sum()
# r = data.groupby('underlying').apply(t_df)
# r = data.groupby('underlying').apply(cal_trading_pnl)
# r
# r.to_csv(f'{fname}_pnl.csv', encoding='utf-8-sig')
# r.groupby('underlying')['profit'].sum()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
