In [None]:
import re
import pandas as pd
import numpy as np
from pathlib import Path, PurePath
from typing import List
from datetime import datetime, timedelta
from vnpy.trader.utility import round_to

from utility import get_output_path, get_output_folder, strip_digt, load_data

future_basic_data = pd.read_csv('future_basic_data.csv', index_col=0)
pricetick = future_basic_data['pricetick']
pricetick.index = pricetick.index.map(lambda x: x.lower())

ZH_TO_EN_DICT = {
    '多': 'long',
    '空': 'short',
    '开': 'open',
    '平昨': 'close_yesterday',
    '平': 'close',
    '平今': 'close_today'
}


def clean_vt_log(file_path: PurePath) -> List[dict]:
    data_list = []
    with open(file_path, 'r', encoding='utf-8') as f:
        commodity_re = re.compile(r'(60m:)|(:)')
        split_re = r'(?:,|\n|\s)\s*'
        for line in f:
            if '[Pre-stop Order]' in line or '[Stop Order]' in line:
                fields = re.split(split_re, line)
                line_dict = {}
                line_dict['datetime'] = f"{fields[0]} {fields[1]}"
                line_dict['commodity'] = commodity_re.sub(r'', fields[4]).lower()
                line_dict['order_type'] = f"{fields[5]} {fields[6]}".strip('[]')
                line_dict['direction'] = fields[7].replace('Direction:', '')
                line_dict['offset'] = fields[8].replace('Offset:', '')
                line_dict['test_price'] = fields[9].replace('Price:', '')
                line_dict['volume'] = fields[10].replace('Volume:', '')

                data_list.append(line_dict)
    return data_list


def clean_folder_logs(folder: PurePath) -> pd.DataFrame:
    file_list = list(folder.glob(f'*.log'))
    
    data_list = []
    for file in file_list:
        data_list.extend(clean_vt_log(file))
        
    df = pd.DataFrame(data_list)
    df[['test_price', 'volume']] = df[['test_price', 'volume']].astype('float')
    df['datetime'] = df['datetime'].map(pd.to_datetime)
    df['pricetick'] = df['commodity'].map(pricetick)
#     df.to_csv(get_output_path('test.csv', root_default='server_log'), encoding='utf-8-sig')
    df['price'] = [round_to(*tuple) for tuple in list(zip(df['test_price'], df['pricetick']))]
    df['dt_flag'] = df['datetime'].map(lambda dt: dt.replace(minute=0, second=0))

    return df


def load_trade_file(file_path: PurePath) -> pd.DataFrame:
    trade_columns = [
        'datetime', 'exchange', 'trade_id', 'order_id', 'symbol', 'direction',
        'offset', 'price', 'volume'
    ]
    trade_df = pd.read_csv(file_path, header=None, names=trade_columns, parse_dates=[0])
    trade_df.drop(['trade_id', 'order_id'], axis=1, inplace=True)
    trade_df['commodity'] = trade_df['symbol'].map(lambda x: strip_digt(x).lower())
#     trade_df['multiplier'] = trade_df['symbol'].map(add_multiplier)
    trade_df.loc[trade_df.direction == '空', 'volume'] *= -1
    trade_df['dt_flag'] = trade_df['datetime'].map(lambda dt: dt.replace(minute=0, second=0))
    return trade_df


def is_open(delta) -> int:
    result = 1 if delta <= timedelta(seconds=5) else 0
    return result


def compare_slip_level(trade: pd.DataFrame, backtest: pd.DataFrame, filename: str) -> pd.DataFrame:
    merge_df = pd.merge(trade, log_df, on=['dt_flag', 'commodity'], how='left')
    merge_df['slip_level'] = (merge_df['price_y'] - merge_df['price_x']) * (merge_df['volume'] / np.abs(merge_df['volume'])) / merge_df['pricetick']
    merge_df['direction'] = merge_df['direction'].map(ZH_TO_EN_DICT)
    merge_df['offset'] = merge_df['offset'].map(ZH_TO_EN_DICT)
    merge_df['is_open'] = (merge_df['datetime_x'] - merge_df['datetime_y']).map(is_open)
    non_open_filter = merge_df['dt_flag'].map(lambda dt: dt.hour != 9 and dt.hour != 21)
    merge_df.loc[non_open_filter, 'is_open'] = 0

    open_list = []
    use_open_list = []
    slip_level_list = []
    for idx, row in merge_df.iterrows():
        if row['is_open']:
            vt_symbol = f"{row['symbol']}.{row['exchange']}"
            begin_dt = end_dt = row['dt_flag'].to_pydatetime()
#             print(vt_symbol, begin_dt)
            k_line = load_data(vt_symbol, '1h', begin_dt, end_dt)

            open_price = k_line.iloc[0]['open']
            open_list.append(open_price)

            if row['direction'] == 'long' and open_price > row['test_price']:
                use_open_list.append(1)
                slip_level = (open_price - row['price_x']) / row['pricetick']
            elif row['direction'] == 'short' and open_price < row['test_price']:
                use_open_list.append(1)
                slip_level = (row['price_x'] - open_price) / row['pricetick']
            else:
                use_open_list.append(0)
                slip_level = row['slip_level']

            slip_level_list.append(slip_level)
        else:
            open_list.append(0)
            use_open_list.append(0)
            slip_level_list.append(row['slip_level'])
    merge_df['open'] = open_list
    merge_df['use_open'] = use_open_list
    merge_df['actual_slip'] = slip_level_list

    merge_df.dropna(inplace=True)
    merge_df.to_csv(get_output_path(filename, root_default='server_log'))
    return merge_df


In [None]:
window = 55
filename = f"compare_slip_entry_{window}.csv"

file_path = get_output_path(f"trade_turtle_entry_{window}.csv", root_default='server_log')
trade_df = load_trade_file(file_path)
trade_df.drop_duplicates(inplace=True)
print(len(trade_df))
trade_df.head()

In [None]:
folder = get_output_folder(f"turtle_entry_{window}", root_default='server_log')
log_df = clean_folder_logs(folder)
keep_items = ['commodity', 'dt_flag', 'datetime', 'test_price', 'price', 'pricetick']
log_df = log_df[keep_items]
print(len(log_df))
log_df.head()


In [None]:
df = compare_slip_level(trade_df, log_df, filename)
df

In [None]:
merge_df = pd.merge(trade, log_df, on=['dt_flag', 'commodity'], how='left')
merge_df['slip_level'] = (merge_df['price_y'] - merge_df['price_x']) * (merge_df['volume'] / np.abs(merge_df['volume'])) / merge_df['pricetick']
merge_df['direction'] = merge_df['direction'].map(ZH_TO_EN_DICT)
merge_df['offset'] = merge_df['offset'].map(ZH_TO_EN_DICT)
merge_df['is_open'] = (merge_df['datetime_x'] - merge_df['datetime_y']).map(is_open)
non_open_filter = merge_df['dt_flag'].map(lambda dt: dt.hour != 9 and dt.hour != 21)
merge_df.loc[non_open_filter, 'is_open'] = 0

open_list = []
use_open_list = []
slip_level_list = []
for idx, row in merge_df.iterrows():
    if row['is_open']:
        vt_symbol = f"{row['symbol']}.{row['exchange']}"
        begin_dt = end_dt = row['dt_flag'].to_pydatetime()
        k_line = load_data(vt_symbol, '1h', begin_dt, end_dt)
        
        open_price = k_line.iloc[0]['open']
        open_list.append(open_price)

        if row['direction'] == 'long' and open_price > row['test_price']:
            use_open_list.append(1)
            slip_level = (open_price - row['price_x']) / row['pricetick']
        elif row['direction'] == 'short' and open_price < row['test_price']:
            use_open_list.append(1)
            slip_level = (row['price_x'] - open_price) / row['pricetick']
        else:
            use_open_list.append(0)
            slip_level = row['slip_level']
        
        slip_level_list.append(slip_level)
    else:
        open_list.append(0)
        use_open_list.append(0)
        slip_level_list.append(row['slip_level'])
merge_df['open'] = open_list
merge_df['use_open'] = use_open_list
merge_df['actual_slip'] = slip_level_list

merge_df.dropna(inplace=True)
merge_df.to_csv(get_output_path('slip_compare.csv', root_default='server_log'))
merge_df
