In [1]:
def normalize_dates(df_pivot_old, dates, calendar='df'):
    df_pivot = df_pivot_old.copy()
    if calendar == 'month':
        # 填充到每个月的数据(按季度填充)
        df_pivot_other = pd.DataFrame(columns=df_pivot.columns, index=dates.difference(df_pivot.index))
        df_pivot = pd.concat([df_pivot, df_pivot_other]).sort_index()
        # 按季度填充
        df_pivot['calendar'] = df_pivot.index.map(lambda x: str(x)[:5] + str((x.month + 2) // 3))
        df_pivot['calendar'] = df_pivot['calendar'].shift(-1).ffill()
        df_pivot[df_pivot.columns[:-1]] = df_pivot.groupby('calendar').ffill()
    elif calendar == 'day':
        df_pivot = df_pivot_old.copy()
        df_pivot_other = pd.DataFrame(columns=df_pivot.columns, index=dates.difference(df_pivot.index))
        df_pivot = pd.concat([df_pivot, df_pivot_other]).sort_index()

        df_pivot['calendar'] = df_pivot.index.map(lambda x: str(x)[:5] + str((x.month + 2) // 3))
        df_pivot['calendar'] = df_pivot['calendar'].shift(-1).ffill()
        df_pivot[df_pivot.columns[:-1]] = df_pivot.groupby('calendar').ffill()
    else:
        # 不按quarter填充日期，但收益率按照quarter统计
        df_pivot['calendar'] = range(len(df_pivot))
        df_pivot_other = pd.DataFrame(columns=df_pivot.columns, index=dates.difference(df_pivot.index))
        df_pivot = pd.concat([df_pivot, df_pivot_other]).sort_index()
        df_pivot['calendar'] = df_pivot['calendar'].ffill()
        df_pivot[df_pivot.columns[:-1]] = df_pivot.groupby('calendar').ffill()
    return df_pivot[df_pivot.columns[:-1]].dropna(how='all')


def get_port_ts_old(score, score_num):
    port_ts_tmp = score.where((score > score_num) & (score <= score_num + 1), 0)
    port_ts = port_ts_tmp.where(port_ts_tmp == 0, 1).shift(1)
    port_count = port_ts.sum(axis=1).rename('num_' + str(score_num + 1))
    port_ts = (port_ts.T / port_count).T
    return port_ts, port_count


def get_backtest2_old(score, price_df, price_df_index, path_name):
    cols = score.columns.intersection(price_df.columns)
    price_df_new = price_df[cols]
    score_new = score[cols]

    ret_df = price_df_new.pct_change()
    ret_df_index = price_df_index.pct_change()
    ret_port = pd.DataFrame(index=score.index)
    for score_num in range(5):
        port_ts, port_count = get_port_ts_old(score_new, score_num)
        port_ts.reset_index().to_feather(f'./{path_name}/{score_num}_port_ts.feather')
        ret_attr = (port_ts * ret_df).sum(axis=1).rename('score_' + str(score_num + 1))
        ret_port = pd.concat([ret_port, pd.DataFrame(ret_attr), pd.DataFrame(port_count)], axis=1)
    score_cols = ret_port.columns[ret_port.columns.str.startswith('score_')]
    num_cols = ret_port.columns[ret_port.columns.str.startswith('num_')]
    ret_port = pd.concat([ret_port, ret_df_index], axis=1)
    ret_port['excess_300'] = ret_port['score_5'] - ret_port['000300.SH']
    ret_port['excess_500'] = ret_port['score_5'] - ret_port['000905.SH']
    ret_port['ls'] = ret_port['score_5'] - ret_port['000905.SH']
    cols2 = ['excess_300', 'excess_500', 'ls']
    # ret_port[score_cols] = (ret_port[score_cols] + 1).cumprod() - 1
    ret_port = ret_port[(ret_port.index >= '2017-01-01') & (ret_port.index <= '2023-08-17')]
    ret_port = ret_port.loc[price_df.index.intersection(ret_port.index)]
    ret_port[list(num_cols) + list(score_cols) + list(ret_df_index.columns) + cols2].to_csv(
        f'./{path_name}backtest2_detail.csv')


def get_port_ts(score, score_percent_last, score_percent_next, score_num):
    port_ts_tmp = score.where((score.T > score.quantile(score_percent_last, axis=1)).T & (score.T <= score.quantile(score_percent_next, axis=1)).T, 0)
    port_ts = port_ts_tmp.where(port_ts_tmp == 0, 1).shift(1)
    port_count = port_ts.sum(axis=1).rename('num_' + str(score_num + 1))
    port_ts = (port_ts.T / port_count).T
    return port_ts, port_count


def get_backtest2(score, price_df, price_df_index, path_name):
    cols = score.columns.intersection(price_df.columns).intersection(price_df.columns)
    price_df_new = price_df[cols]
    score_new = score[cols]

    ret_df = price_df_new.pct_change()
    ret_df_index = price_df_index.pct_change()
    ret_port = pd.DataFrame(index=score.index)
    score_percent_lst = [0, 0.1, 0.25, 0.45, 0.7, 1]
    # score_percent_lst = [0, 0.02, 0.32, 0.7, 0.95, 1]
    for score_num in range(5):
        score_percent_last, score_percent_next = score_percent_lst[score_num], score_percent_lst[score_num+1]
        port_ts, port_count = get_port_ts(score_new, score_percent_last, score_percent_next, score_num)
        port_ts.reset_index().to_feather(f'./{path_name}/{score_num}_port_ts.feather')
        ret_attr = (port_ts * ret_df).sum(axis=1).rename('score_' + str(score_num + 1))
        ret_port = pd.concat([ret_port, pd.DataFrame(ret_attr), pd.DataFrame(port_count)], axis=1)
    score_cols = ret_port.columns[ret_port.columns.str.startswith('score_')]
    num_cols = ret_port.columns[ret_port.columns.str.startswith('num_')]
    ret_port = pd.concat([ret_port, ret_df_index], axis=1)
    ret_port['excess_300'] = ret_port['score_5'] - ret_port['000300.SH']
    ret_port['excess_500'] = ret_port['score_5'] - ret_port['000905.SH']
    ret_port['ls'] = ret_port['score_5'] - ret_port['000905.SH']
    cols2 = ['excess_300', 'excess_500', 'ls']
    # ret_port[score_cols] = (ret_port[score_cols] + 1).cumprod() - 1
    ret_port = ret_port[(ret_port.index >= '2017-01-01') & (ret_port.index <= '2023-08-17')]
    ret_port = ret_port.loc[price_df.index.intersection(ret_port.index)]
    ret_port[list(num_cols) + list(score_cols) + list(ret_df_index.columns) + cols2].to_csv(
        f'./{path_name}/backtest2_detail.csv')


In [14]:
import pandas as pd
import numpy as np
import os

all_daily_cbond_new = pd.read_feather('BondDailyData.feather')
bond_info = pd.read_feather(r'BondInfo.feather')
bond_info['cbond_info_pub_startdate']=pd.to_datetime(bond_info['cbond_info_pub_startdate'])
bond_info['cbond_info_pub_enddate']=pd.to_datetime(bond_info['cbond_info_pub_enddate'])
bond_info=bond_info.dropna(subset=['cbond_info_pub_startdate']).reset_index(drop=True)

price_interest = pd.read_feather(r'price_interest.feather').rename(columns={'date': 'datetime'})
all_daily_cbond_new['bond_code'] = all_daily_cbond_new['symbol'].str[:-3]
all_daily_cbond_new = all_daily_cbond_new.merge(price_interest[['bond_code', 'datetime', 'resale_price', 'interest']], on=['bond_code', 'datetime'], how='left')
all_daily_cbond_new['resale_price'] = all_daily_cbond_new['resale_price'].fillna(100)
all_daily_cbond_new['interest'] = all_daily_cbond_new['interest'].fillna(0)
all_daily_cbond_new['resale'] = all_daily_cbond_new['resale_price'] + all_daily_cbond_new['interest']

bond_info_new = bond_info.copy()
bond_info_new.loc[~bond_info_new['cbond_info_transaction_enddate'].isna(), 'cbond_info_pub_enddate'] = bond_info_new['cbond_info_transaction_enddate']
bond_info_new['cbond_info_pub_enddate'] = pd.to_datetime(bond_info_new['cbond_info_pub_enddate'])

all_daily_cbond_new = all_daily_cbond_new.merge(bond_info_new[['cbond_info_symbol', 'cbond_info_display_name', 'cbond_info_stock_symbol', 'cbond_info_pub_enddate']], left_on='symbol', right_on='cbond_info_symbol', how='inner')
all_daily_cbond_new['left_years'] = (pd.DatetimeIndex(all_daily_cbond_new['cbond_info_pub_enddate']) - pd.DatetimeIndex(all_daily_cbond_new['datetime'])).days/365
all_daily_cbond_new['convert_divide_paper'] = all_daily_cbond_new['stock_close'] / all_daily_cbond_new['convert_price'] - 1
all_daily_cbond_new['convert_divide_paper_new'] = all_daily_cbond_new['stock_close'] / all_daily_cbond_new['convert_price'] * 100 / all_daily_cbond_new['resale'] - 1
all_daily_cbond_new = all_daily_cbond_new[all_daily_cbond_new['left_years'] >= 0]
# all_daily_cbond_new = all_daily_cbond_new[['datetime', 'symbol', 'stock_symbol', 'cbond_info_display_name', 'convert_premium', 'convert_divide_paper', 'left_years', 'stock_close', 'close']]


# %%
inds = ['convert_premium', 'convert_divide_paper', 'convert_divide_paper_new', 'left_years']
inds_cross_rank = [ind+'_cross_rank' for ind in inds]
inds_cross_count = [ind+'_cross_count' for ind in inds]
inds_cross_percent = [ind+'_cross_percent' for ind in inds]

inds_ts_rank = [ind+'_ts_rank' for ind in inds]
inds_ts_count = [ind+'_ts_count' for ind in inds]
inds_ts_percent = [ind+'_ts_percent' for ind in inds]

inds_rank = [ind+'_rank' for ind in inds]
inds_count = [ind+'_count' for ind in inds]
inds_percent = [ind+'_percent' for ind in inds]

for i in range(len(inds)):
    ind, ind_cross_rank, ind_cross_count, ind_cross_percent = inds[i], inds_cross_rank[i], inds_cross_count[i], inds_cross_percent[i]
    ind_ts_rank, ind_ts_count, ind_ts_percent = inds_ts_rank[i], inds_ts_count[i], inds_ts_percent[i]
    ind_rank, ind_count, ind_percent = inds_rank[i], inds_count[i], inds_percent[i]
    all_daily_cbond_new = all_daily_cbond_new.sort_values(['datetime', ind])
    all_daily_cbond_new[ind_cross_rank] = all_daily_cbond_new.groupby('datetime')[ind].rank().rename(ind_cross_rank)
    all_daily_cbond_new = all_daily_cbond_new.merge(all_daily_cbond_new.groupby('datetime')[ind].count().rename(ind_cross_count), on=['datetime'], how='inner')
    all_daily_cbond_new[ind_cross_percent] = all_daily_cbond_new[ind_cross_rank] / all_daily_cbond_new[ind_cross_count]

    all_daily_cbond_new = all_daily_cbond_new.sort_values(['symbol', 'datetime']).reset_index(drop=True)
    all_daily_cbond_new[ind_ts_percent] = all_daily_cbond_new.groupby('symbol')[ind].rolling(252, min_periods=1).rank(pct=True).rename(ind_ts_percent).reset_index(drop=True)

    all_daily_cbond_new[ind_percent] = all_daily_cbond_new[[ind_cross_percent, ind_ts_percent]].mean(axis=1)
# 0-1加速度变大
# all_daily_cbond_new['left_years_cross_percent'] = np.log2(all_daily_cbond_new['left_years_cross_percent'] + 1)
m = 80
all_daily_cbond_new['left_years_cross_percent'] = np.log(m * all_daily_cbond_new['left_years_cross_percent'] + 1) / np.log(m+1)
# all_daily_cbond_new['left_years_cross_percent'] = np.log(2 * all_daily_cbond_new['left_years_cross_percent'] + 1) / np.log(3)
# 0-1加速度不变
# all_daily_cbond_new['left_years_cross_percent'] = 1 - (all_daily_cbond_new['left_years_cross_percent'] - 1) ** 2
# all_daily_cbond_new['left_years_cross_percent'] = (3 * all_daily_cbond_new['left_years_cross_percent'] - all_daily_cbond_new['left_years_cross_percent'] ** 3) / 2
# all_daily_cbond_new['left_years_cross_percent'] = np.sqrt(2 * all_daily_cbond_new['left_years_cross_percent'] - all_daily_cbond_new['left_years_cross_percent'] ** 2)
# 0-1加速度变小
# all_daily_cbond_new['left_years_cross_percent'] = 2 - 2 ** (1 - all_daily_cbond_new['left_years_cross_percent'])
all_daily_cbond = all_daily_cbond_new.copy()


# %%
path_name = 'left_years_cross_percent'

# all_daily_cbond['game_score'] = (1 - all_daily_cbond['convert_premium_percent']) * 2.5 + (1 - all_daily_cbond['convert_divide_paper_percent']) * 1.25 + all_daily_cbond['left_years_cross_percent'] * 1.25
all_daily_cbond['game_score'] = (1 - all_daily_cbond['convert_premium_percent']) * 10 / 3 + (1 - all_daily_cbond['left_years_cross_percent']) * 5 / 3
all_daily_cbond['convert_premium_percent'] = (1 - all_daily_cbond['convert_premium_percent']) * 5
all_daily_cbond['convert_premium_cross_percent'] = (1 - all_daily_cbond['convert_premium_cross_percent']) * 5
all_daily_cbond['convert_premium_ts_percent'] = (1 - all_daily_cbond['convert_premium_ts_percent']) * 5
all_daily_cbond['convert_divide_paper_percent'] = (1 - all_daily_cbond['convert_divide_paper_percent']) * 5
all_daily_cbond['convert_divide_paper_cross_percent'] = (1 - all_daily_cbond['convert_divide_paper_cross_percent']) * 5
all_daily_cbond['convert_divide_paper_ts_percent'] = (1 - all_daily_cbond['convert_divide_paper_ts_percent']) * 5

all_daily_cbond['left_years_cross_percent'] = (1 - all_daily_cbond['left_years_cross_percent']) * 5
score = all_daily_cbond[['datetime', 'symbol', path_name]]
score.columns = ['date', 'stock_code', 'score']
close = all_daily_cbond[['datetime', 'symbol', 'close']]
close.columns = ['date', 'stock_code', 'close']


factor_df3 = score.set_index(['date', 'stock_code'])['score'].unstack()
factor_df3.index = pd.to_datetime(factor_df3.index)
dates = pd.read_feather('tradingdays.feather').rename(columns={'tradingday': 0})
dates1 = dates.loc[dates.index % 21 == 1]
dates1 = pd.DatetimeIndex(dates1[0])
dates2 = dates.loc[dates.index % 5 == 2]
dates2 = pd.DatetimeIndex(dates2[0])
dates3 = dates.loc[dates.index % 5 == 3]
dates3 = pd.DatetimeIndex(dates3[0])
dates4 = dates.loc[dates.index % 5 == 4]
dates4 = pd.DatetimeIndex(dates4[0])
dates5 = dates.loc[dates.index % 5 == 0]
dates5 = pd.DatetimeIndex(dates5[0])
dates = pd.DatetimeIndex(dates[0])


price_df_index = pd.read_feather(r'price_df_index.feather').set_index('index')
# path_name = 'game_score'
print(f'start_{path_name}')
import os
if not os.path.exists(f'./{path_name}/'):
    os.mkdir(f'./{path_name}/')

price_df = close.set_index(['date', 'stock_code'])['close'].unstack()
# price_df.columns = price_df.columns.map(dict(zip(price_df.columns, pd.Series(price_df.columns).str[:-3])))
path_name_new = path_name + '/1/'
if not os.path.exists(f'./{path_name_new}'):
    os.mkdir(f'./{path_name_new}')
score_new = normalize_dates(factor_df3.loc[dates1.intersection(factor_df3.index).tolist()], dates)
get_backtest2_old(score_new, price_df, price_df_index, path_name_new)

start_left_years_cross_percent


In [1]:
###