In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from pathlib import Path

pd.set_option('display.max_rows', 200)

excep_code_list = [
    '10000633.XSHG',
    '10000634.XSHG'
]

def backtest(df: pd.DataFrame, price: float = 0.002, drawback_mode: str = 'close'):
    result_dict = {}
    exception_dict = {}
    pos = 0
    max_high = 0
    is_long_day_interval = False
    last_date = None
    for idx, bar_series in df.iterrows():  
        if not pos:
            if bar_series['low'] > price:
                continue
            else:
                if bar_series['high'] > price:
                    result_dict['start'] = bar_series['date']
                    result_dict['entry'] = price * 10000
                    pos = 1
                    last_date = bar_series['date']
                    t_day_high = bar_series['close'] if drawback_mode == 'close' else bar_series['high']
                    max_high = max(max_high, t_day_high)
        else:
            max_high = max(max_high, bar_series['high'])
            if not is_long_day_interval:
                if last_date:
#                     print('last_date', last_date)
#                     print('today', bar_series['date'])
                    if bar_series['date'] - last_date > timedelta(days=1):
                        is_long_day_interval = True
                last_date = bar_series['date']

    if pos:
        result_dict['code'] = df.iloc[-1]['code']
        result_dict['trading_code'] = df.iloc[-1]['trading_code']
        result_dict['end'] = df.iloc[-1]['date']
        result_dict['exit'] = df.iloc[-1]['close'] * 10000
        result_dict['max_high'] = max_high * 10000
        result_dict['duration'] = (result_dict['end'] - result_dict['start']).days
        result_dict['max_db'] = min(0, result_dict['entry'] - result_dict['max_high'])
        result_dict['long_date_gap'] = is_long_day_interval
        
        start = result_dict['start']
        entry_series = df[df['date'] == start].iloc[0]
        open_m = entry_series['open'] / entry_series['low']
        high_m = entry_series['high'] / entry_series['low']
        close_m = entry_series['close'] / entry_series['low']
        if open_m > 500 and high_m > 500 and close_m > 500:
            exception_dict.update(result_dict)
            exception_dict['multiple'] = np.mean([open_m, high_m, close_m])
    else:
        #         print('no position')
        return
        
#     print(result_dict)
    return result_dict, exception_dict


def load_hist_data():
    # 载入和合并数据
    keep_items = ['code', 'date', 'open', 'high', 'low', 'close', 'volume', 'position', 'trading_code', 'status']

    data_path = Path.cwd().joinpath('daily_bar')
    files = list(data_path.glob('*.csv'))
    files

    df_list = [pd.read_csv(file, index_col=0, parse_dates=[4]) for file in files]
    df = pd.concat(df_list, ignore_index=True)
    df = df[keep_items]

    # 去掉调整过以及交易中的合约
    df2 = df[(df['trading_code'].map(lambda x: x[11] == 'M')) & (df['status'] == 'DELIST')].copy()
    return df2


def run_batch_backtest(price: float = 0.002, drawback_mode: str = 'close'):
    # 关于最大回撤，因为当天触发交易和最高价的先后顺序从k线之中无法判断，因此当天下单后的最高价取当天收盘价，以后的交易日取最高价
    data = load_hist_data()
    res_list = []
    excep_list = []
    for _code, bar_df in data.groupby('code'):
        res = backtest(bar_df, price, drawback_mode)
        if res:
            res_dict, excep_dict = res
            if res_dict:
                res_list.append(res_dict)
            if excep_dict:
                excep_list.append(excep_dict)
    res_df = pd.DataFrame(res_list)
    excep_df = pd.DataFrame(excep_list)
    
    # 保存结果
    excep_order = ['code', 'trading_code', 'start', 'end', 'duration', 'multiple']
    new_order = ['code', 'trading_code', 'start', 'end', 'duration', 'entry', 'exit', 'max_high', 'max_db', 'long_date_gap']
    res_df = res_df[new_order]
    excep_df = excep_df[excep_order]
    res_df.to_csv(Path.cwd().joinpath('result', f'low_premium_{drawback_mode}_{str(int(price * 10000))}.csv'))
    excep_df.to_csv(Path.cwd().joinpath('result', f'low_premium_{drawback_mode}_{str(int(price * 10000))}_exception.csv'))
    return res_df, excep_df
    
    
def analyze_result(price: float = 0.002, day: int = 2, drawback_mode: str = 'close'):
    # 分析
    filename = f'low_premium_{drawback_mode}_{str(int(price * 10000))}.csv'
    excep_filename = f'low_premium_{drawback_mode}_{str(int(price * 10000))}_exception.csv'
    filepath = Path.cwd().joinpath('result', filename)
    excep_filepath = Path.cwd().joinpath('result', excep_filename)
    if filepath.exists():
        print(f'{price}回测结果已存在，从文件载入')
        df = pd.read_csv(filepath, parse_dates=[3, 4])
        excep_df = pd.read_csv(excep_filepath)
    else:
        print(f'{price}回测结果不存在，需要执行运算，请稍候..')
        df, excep_df = run_batch_backtest(price, drawback_mode)
#         print(f'部分合约可能存在价格异常，请人工检查，合约如下：')
#         print(excep_df)
    
    print("=" * 50)
    print(f"计算下单当日回撤基于{drawback_mode}模式")

    skip_code_list = excep_df['code'].to_list()
    skip_code_list.extend(excep_code_list)
    print("价格异常合约代码：")
    print(skip_code_list)
    
    sel_df = df[df['duration'] <= day]
    print("所有合约数量:", len(sel_df))

    sel_df = sel_df[ ~(sel_df['code'].isin(skip_code_list))]
    print("排除异常价格数量：", len(sel_df))

    sel_df_no_db = sel_df[sel_df['max_db'] == 0]
    sel_df_none = sel_df[sel_df['exit'] == 1]
    
    print('最终归零合约数量:', len(sel_df_none))
    print('归零率:', len(sel_df_none) / len(sel_df))
    
    print('持有期间没有回撤合约数量:', len(sel_df_no_db))
    print('无回撤率:', len(sel_df_no_db) / len(sel_df))

    print("=" * 50)
    print("统计数据：")
    print(sel_df.describe())

In [None]:
# 运行回测
price = 0.0030
day = 2
drawback_mode = 'high'
analyze_result(price, day, drawback_mode)

In [None]:
# 测试代码
df = load_hist_data()
code = '10001346.XSHG'
test_df = df[df['code'] == code]
test_df

In [None]:
res = backtest(test_df)
res

In [None]:
import akshare as ak
df = ak.option_sina_sse_daily(code="10001494")
df