In [3]:
import os, pickle, re
from tqdm import tqdm
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.cm import ScalarMappable

# 设置显示所有行
pd.set_option('display.max_rows', None)

from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix

from dl_helper.tool import adjust_class_weights_df

In [4]:
%%html
<style>
  th,td { border: 1px solid black !important; }
</style>

In [5]:
test_data_folder = r'D:\L2_DATA_T0_ETF\train_data\base_top5_filter_time\test'
label_idx = 4

In [6]:
def class_f1_score_sklearn(y_true, y_pred):
    """
    计算每个类别的F1 score
    
    参数:
    y_true: 真实标签,numpy数组或列表
    y_pred: 预测标签,numpy数组或列表
    
    返回:
    numpy数组,包含每个类别的F1 score
    """
    # 确保输入是numpy数组
    if isinstance(y_true, (list, tuple)):
        y_true = np.array(y_true)
    if isinstance(y_pred, (list, tuple)):
        y_pred = np.array(y_pred)
        
    # 如果输入是概率,转换为类别
    if len(y_pred.shape) > 1 and y_pred.shape[1] > 1:
        y_pred = np.argmax(y_pred, axis=1)
    
    # 计算每个类别的F1 score
    class_f1 = f1_score(y_true, y_pred, average=None)
    
    return class_f1

def class_accuracy_sklearn(y_true, y_pred):
    # 计算混淆矩阵
    cm = confusion_matrix(y_true, y_pred)

    # 计算每个类别的准确率
    num_classes = len(cm)
    accuracies = []
    for i in range(num_classes):
        class_accuracy = cm[i, i] / cm[i, :].sum()  # True Positives / Total samples in the class
        accuracies.append(class_accuracy)

    return accuracies

def class_precision_sklearn(y_true, y_pred):
    # 计算混淆矩阵
    cm = confusion_matrix(y_true, y_pred)

    # 计算每个类别的精确率
    precision_per_class = cm.diagonal() / cm.sum(axis=0)
    if (cm.sum(axis=0) == 0).any():
         print(cm)
         print(pd.Series(y_true).value_counts())
         print(pd.Series(y_pred).value_counts())
    assert not (cm.sum(axis=0) == 0).any(), f"分母为0:\n{cm}"
    return precision_per_class 

In [7]:
def cal_data(df, threshold):
    data = {}

    labels = df['target'].value_counts()
    if len(labels) != 3:
        return None

    # # 计算标签类别分布占比
    # pct = labels / len(df)
    # for i in range(len(pct)):
    #     data[f'{i}'] = round(pct[i], 2)

    # # 读取测试数据 获取原始 y
    # raw_data = pd.DataFrame()
    # for test_file in os.listdir(test_data_fodler):
    #     ids,_, _, y, _ = pickle.load(open(os.path.join(test_data_fodler, test_file), 'rb'))
    #     _data = pd.DataFrame({
    #         # 'code': [i.split('_')[0] for i in ids],
    #         # 'timestamp': [int(i.split('_')[1]) for i in ids],
    #         'id': ids,
    #         'y': [i[label_idx] for i in y],
    #     })
    #     raw_data = pd.concat([raw_data, _data], ignore_index=True)
    # # 筛选过滤
    # raw_data['code'] = raw_data['id'].apply(lambda x: x.split('_')[0])
    # raw_data['timestamp'] = raw_data['id'].apply(lambda x: int(x.split('_')[1]))
    # raw_data = raw_data.loc[raw_data['code'] == code, :].reset_index(drop=True)
    # raw_data = raw_data[raw_data['timestamp'].isin(df['timestamp'])]
    # raw_data = raw_data.sort_values('timestamp').reset_index(drop=True)
    # assert (raw_data['timestamp'] == df['timestamp']).all(), f'len(df)!= len(raw_data)'
    # target_value = raw_data['y']
    # class_0_mean_value = target_value[df[df['target'] == 0].index].mean()

    # 随机模型性能作为基准
    random_predict = np.random.randint(0, len(labels), len(df))
    
    # 保证随机预测的结果包含所有的类别
    if len(set(random_predict)) != len(labels):
        # print(f'随机预测丢失类别: {set(labels.index.to_list()) - set(random_predict)}')
        return None

    if threshold is None:
        # 根据最大值计算
        df['predict'] = df.iloc[:, 2:].idxmax(axis=1).astype(int)
    else:
        predict_bool = df.iloc[:, 2:] >= threshold
        predict_bool.iloc[:, -1] = True
        df['predict'] = predict_bool.idxmax(axis=1).astype(int)
    # 检查是否有未预测出的类别
    if len(df['predict'].unique())!= len(labels):
        # print(f'未预测出的类别: {set(range(len(labels))) - set(df["predict"].unique())}')
        return None

    # 样本数量
    data[f'samples'] = len(df)

    # 样本数量比例
    target_value_counts = df['target'].value_counts().sort_index()
    target_value_counts /= len(df)
    for i in range(len(target_value_counts)):
        data[f'class_{i}_pct'] = target_value_counts[i]

    class_f1 = class_f1_score_sklearn(df['target'], df['predict'])
    # class_f1_bm = class_f1_score_sklearn(df['target'], random_predict)
    for i in range(len(class_f1)):
        # bm = class_f1_bm[i]
        # diff_pct = round((class_f1[i] - bm) / bm * 100, 2)
        data[f'class_f1_{i}'] = class_f1[i]
        # data[f'class_f1_{i}'] = str(round(class_f1[i], 3)) + f'({diff_pct}%)'
        # data[f'class_f1_{i}_bm'] = bm

    data['mean_class_f1'] = (sum(class_f1) - class_f1[-1]) / (len(class_f1) -1)
    # mean_class_f1_bm = (sum(class_f1_bm) - class_f1_bm[-1]) / (len(class_f1_bm) -1)
    # diff_pct = round((data['mean_class_f1'] - mean_class_f1_bm) / mean_class_f1_bm * 100, 2)
    data['mean_class_f1'] = data['mean_class_f1']
    # data['mean_class_f1'] = str(round(data['mean_class_f1'], 3)) + f'({diff_pct}%)'
    # data['mean_class_f1_bm'] = mean_class_f1_bm

    class_acc = class_accuracy_sklearn(df['target'], df['predict'])
    class_acc_bm = class_accuracy_sklearn(df['target'], random_predict)
    for i in range(len(class_acc)):
        # bm = class_acc_bm[i]
        # diff_pct = round((class_acc[i] - bm) / bm * 100, 2)
        data[f'class_acc_{i}'] = class_acc[i]
        # data[f'class_acc_{i}'] = str(round(class_acc[i], 3)) + f'({diff_pct}%)'
        # data[f'class_acc_{i}_bm'] = bm

    data['mean_class_acc'] = (sum(class_acc) - class_acc[-1]) / (len(class_acc) -1)
    # mean_class_acc_bm = (sum(class_acc_bm) - class_acc_bm[-1]) / (len(class_acc_bm) -1)

    class_pre = class_precision_sklearn(df['target'], df['predict'])
    class_pre_bm = class_precision_sklearn(df['target'], random_predict)
    for i in range(len(class_pre)):
        # bm = class_pre_bm[i]
        # diff_pct = round((class_pre[i] - bm) / bm * 100, 2)
        data[f'class_pre_{i}'] =  class_pre[i]
        # data[f'class_pre_{i}'] =  str(round(class_pre[i], 3)) + f'({diff_pct}%)'
        # data[f'class_pre_{i}_bm'] = bm
    # class_pre_0 = class_pre[0]

    data['mean_class_pre'] = (sum(class_pre) - class_pre[-1]) / (len(class_pre) -1)
    # mean_class_pre_bm = (sum(class_pre_bm) - class_pre_bm[-1]) / (len(class_pre_bm) -1)
    # diff_pct = round((data['mean_class_pre'] - mean_class_pre_bm) / mean_class_pre_bm * 100, 2)
    data['mean_class_pre'] = data['mean_class_pre']
    # data['mean_class_pre'] = str(round(data['mean_class_pre'], 3)) + f'({diff_pct}%)'
    # data['mean_class_pre_bm'] = mean_class_pre_bm

    # data['class_0_mean_value'] = class_0_mean_value
    # data['class_0_ep'] = class_0_mean_value * class_pre_0

    return data

def handle_predict_file(predict_file, test_data_fodler, label_idx, threshold=None):
    """
    处理预测文件
    """
    # print(f'handle: {predict_file}')

    # threshold = [0.411,0.432,0.202]
    # predict_file = r"D:\\code\\forecast_model\\notebook\\20241031_1_model_stack\\train_model_stack_base_top9_seed0_T4x2_fp16\\model_final\\159941_1724895312_1725519117.csv"
    code, begin, end = os.path.basename(predict_file).split('_')

    _df = pd.read_csv(predict_file)

    datas = []
    # 采样10次 取平均值
    for i in range(10):

        # 按照最少数量的类别进行降采样
        df = adjust_class_weights_df(_df)
        
        # 限制样本数量
        if len(df) < 600:
            continue

        data = cal_data(df, threshold)
        if None is data:
            continue
        datas.append(data)

    data = {}
    if len(datas) == 0:
        return None
    for k in datas[0].keys():
        if k in ['samples'] or 'pct' in k:
            continue
        data[f'a_{k}'] = np.mean([i[k] for i in datas])

    no_adj_data = cal_data(_df, threshold)
    if None in no_adj_data:
        return None

    for k in no_adj_data.keys():
        data[f'{k}'] = no_adj_data[k]

    return data

In [8]:
wait_handle_folders = [
    r'D:\code\forecast_model\notebook\20250811_base',
]

In [9]:
if not wait_handle_folders:
    for train_folder in os.listdir(r'D:\code\forecast_model\notebook'):

        if train_folder == 'ago' or 'reg' in train_folder:
            continue

        # train_folder = r'D:\code\forecast_model\notebook\20241105_1_of_bincatbl'
        train_folder = os.path.join(r'D:\code\forecast_model\notebook', train_folder)

        # 非文件夹
        if not os.path.isdir(train_folder):
            continue

        wait_handle_folders.append(train_folder)

In [10]:
res = pd.DataFrame()    
for train_folder in wait_handle_folders:

    print(train_folder)

    cal_data_result_file = os.path.join(train_folder, 'train_result.csv')
    if not os.path.exists(cal_data_result_file):
        each_train_folder_list = [os.path.join(train_folder, i) for i in os.listdir(train_folder)]
        each_train_folder_list = [i for i in each_train_folder_list if os.path.isdir(i)]
        len(each_train_folder_list)

        res_list= []
        for i in tqdm(each_train_folder_list):
            for _model_type in ['model_best', 'model_final']:
                _model_folder = os.path.join(i, _model_type)

                # 读取阈值 第一行
                threshold = os.path.join(_model_folder, 'threshold.txt')
                with open(threshold, 'r') as f:
                    threshold = f.readline().strip()
                threshold = [float(i) for i in threshold.split(',')]

                for predict_file in os.listdir(_model_folder):
                    infos = predict_file.split('.')[0].split('_')
                    if len(infos) == 3:
                        code, begin, end = infos
                        if len(code) == 6 and len(begin) == len('1724895312') and len(end) == len('1724895312'):
                            # 打开标的预测文件
                            predict_file = os.path.join(_model_folder, predict_file)
                            
                            for _title, _threshold in zip(
                                ['', 'max'],
                                [threshold, None]
                            ):
                                res_dict = handle_predict_file(predict_file ,test_data_folder,label_idx,  _threshold)
                                if res_dict is None:
                                    print(f'{predict_file} {_threshold} 无结果')
                                    continue
                                res_dict['code'] = code
                                # 提取 i 的最后一个文件夹名
                                train_title = os.path.basename(i)
                                # 匹配 _seed0 并删除
                                seed= re.search(r'_seed(\d+)', train_title)
                                if seed is None:
                                    seed = 0
                                else:
                                    seed = seed.group(1)
                                    train_title = re.sub(r'_seed\d+', '', train_title)

                                res_dict['seed'] = int(seed)
                                res_dict['title'] = train_title
                                res_dict['type'] = _model_type
                                res_dict['threshold'] = _title

                                res_list.append(res_dict)
        
        if len(res_list) > 0:
            _res = pd.DataFrame(res_list)
            _res = _res.groupby(['title', 'type', 'code', 'threshold']).mean()

            _res.to_csv(cal_data_result_file)
    
    _res = pd.read_csv(cal_data_result_file)
    print(len(_res))
    res = pd.concat([res, _res], ignore_index=True)

# # 过滤
# res = res.loc[~res['title'].str.contains('label_1')]

res = res.set_index(['title', 'type', 'code', 'threshold'])
len(res)

D:\code\forecast_model\notebook\20250811_base


100%|██████████| 7/7 [00:00<00:00, 2341.50it/s]




FileNotFoundError: [Errno 2] No such file or directory: 'D:\\code\\forecast_model\\notebook\\20250811_base\\train_result.csv'

In [9]:
need_cols_raw = ['mean_class_f1','class_pre_0','class_pre_1','class_pre_2','mean_class_pre', 'class_acc_0','class_acc_1','class_acc_2','mean_class_acc']
need_cols_adj = [f'a_{i}' for i in need_cols_raw]
class_pct_cols = [i for i in list(res) if 'pct' in i]
_need_cols = need_cols_adj + need_cols_raw
_head_need_cols = ['a_mean_class_f1', 'mean_class_f1', 'a_mean_class_pre', 'mean_class_pre', 'a_mean_class_acc', 'mean_class_acc']
_other_need_cols = [i for i in _need_cols if i not in _head_need_cols]
need_cols = class_pct_cols + _head_need_cols + _other_need_cols
print(need_cols)

['class_0_pct', 'class_1_pct', 'class_2_pct', 'a_mean_class_f1', 'mean_class_f1', 'a_mean_class_pre', 'mean_class_pre', 'a_mean_class_acc', 'mean_class_acc', 'a_class_pre_0', 'a_class_pre_1', 'a_class_pre_2', 'a_class_acc_0', 'a_class_acc_1', 'a_class_acc_2', 'class_pre_0', 'class_pre_1', 'class_pre_2', 'class_acc_0', 'class_acc_1', 'class_acc_2']


In [10]:
res = res.reset_index()

# 提取 predict_n
try:
    res['predict_n'] = res['title'].apply(lambda x: int(re.search(r'predict_n(\d+)', x).group(1)))
    res['title'] = res['title'].apply(lambda x: re.sub(r'_predict_n\d+', '', x))
except:
    print(res['title'])
    p_n = input('请输入预测周期:')
    res['predict_n'] = int(p_n)

# 提取 top
try:
    res['top'] = res['title'].apply(lambda x: int(re.search(r'top(\d+)', x).group(1)))
    res['title'] = res['title'].apply(lambda x: re.sub(r'_top\d+', '', x))
except:
    top = input('请输入top:')
    res['top'] = int(top)

# 过滤掉 title 中的 _IDX\d+ 
res['title'] = res['title'].apply(lambda x: re.sub(r'_IDX\d+', '', x))

In [11]:
# 考察模型对各个标的的性能差异
groupbys = ['code']
groupbys = ['top'] + groupbys
d = res.reset_index().loc[:, groupbys + ['samples'] + need_cols].groupby(groupbys).mean()
d.sort_values(by=['top', 'a_mean_class_f1', 'a_mean_class_pre'], ascending=False).style.background_gradient(cmap=plt.cm.OrRd, subset=_head_need_cols, vmin=0, vmax=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,samples,class_0_pct,class_1_pct,class_2_pct,a_mean_class_f1,mean_class_f1,a_mean_class_pre,mean_class_pre,a_mean_class_acc,mean_class_acc,a_class_pre_0,a_class_pre_1,a_class_pre_2,a_class_acc_0,a_class_acc_1,a_class_acc_2,class_pre_0,class_pre_1,class_pre_2,class_acc_0,class_acc_1,class_acc_2
top,code,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
5,159941,26558.0,0.013273,0.017283,0.969444,0.461193,0.199594,0.702634,0.143127,0.348878,0.348862,0.731301,0.673967,0.473984,0.325228,0.372529,0.941232,0.128304,0.157949,0.982621,0.325228,0.372496,0.939772
5,513180,27155.0,0.034542,0.021469,0.943988,0.430546,0.20519,0.702597,0.154695,0.315159,0.314451,0.655423,0.749771,0.43726,0.298336,0.331983,0.906237,0.146591,0.1628,0.963391,0.29692,0.331983,0.907855
5,513050,26840.0,0.080142,0.072532,0.847327,0.418455,0.206337,0.463658,0.146125,0.386855,0.386771,0.387843,0.539472,0.511663,0.335201,0.438509,0.745966,0.118856,0.173393,0.911916,0.335032,0.438509,0.744654
5,518880,26710.0,0.195324,0.20064,0.604037,0.339003,0.246005,0.452333,0.299461,0.397539,0.397528,0.387614,0.517053,0.495936,0.328366,0.466712,0.453221,0.266139,0.332783,0.734445,0.328362,0.466694,0.453567


In [12]:
# 考察模型的性能差异 (不同top, 不同数据规模)
groupbys = ['top', 'title', 'type', 'threshold']
d = res.reset_index().loc[res['type'] == 'model_final', groupbys + ['samples'] + need_cols].groupby(groupbys).mean()
d.sort_values(by=['top', 'a_mean_class_f1', 'a_mean_class_pre'], ascending=False).style.background_gradient(cmap=plt.cm.OrRd, subset=_head_need_cols, vmin=0, vmax=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,samples,class_0_pct,class_1_pct,class_2_pct,a_mean_class_f1,mean_class_f1,a_mean_class_pre,mean_class_pre,a_mean_class_acc,mean_class_acc,a_class_pre_0,a_class_pre_1,a_class_pre_2,a_class_acc_0,a_class_acc_1,a_class_acc_2,class_pre_0,class_pre_1,class_pre_2,class_acc_0,class_acc_1,class_acc_2
top,title,type,threshold,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
5,depth_god_label_win_diff_T4x2_fp16,model_final,max,26807.6,0.091092,0.088474,0.820434,0.492204,0.257102,0.566657,0.183656,0.47305,0.473038,0.555485,0.577829,0.554336,0.455005,0.491095,0.60493,0.176332,0.190981,0.920625,0.454751,0.491324,0.605009


In [13]:
# 考察模型的性能差异
groupbys = ['title', 'type', 'threshold']
d = res.reset_index().loc[res['type'] == 'model_final', groupbys + ['samples'] + need_cols].groupby(groupbys).mean()
d.sort_values(by=['a_mean_class_f1', 'a_mean_class_pre'], ascending=False).style.background_gradient(cmap=plt.cm.OrRd, subset=_head_need_cols, vmin=0, vmax=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,samples,class_0_pct,class_1_pct,class_2_pct,a_mean_class_f1,mean_class_f1,a_mean_class_pre,mean_class_pre,a_mean_class_acc,mean_class_acc,a_class_pre_0,a_class_pre_1,a_class_pre_2,a_class_acc_0,a_class_acc_1,a_class_acc_2,class_pre_0,class_pre_1,class_pre_2,class_acc_0,class_acc_1,class_acc_2
title,type,threshold,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
depth_god_label_win_diff_T4x2_fp16,model_final,max,26807.6,0.091092,0.088474,0.820434,0.492204,0.257102,0.566657,0.183656,0.47305,0.473038,0.555485,0.577829,0.554336,0.455005,0.491095,0.60493,0.176332,0.190981,0.920625,0.454751,0.491324,0.605009


In [14]:
# 考察模型 top 的性能差异
groupbys = ['title', 'predict_n', 'top']
# 同步top最小的标的列表
min_codes = res.loc[res['top'] == res['top'].min(), 'code'].unique()
print(min_codes)
d = res.reset_index().loc[:, groupbys + ['samples'] + need_cols].groupby(groupbys).mean()
d.sort_values(by=['title', 'predict_n', 'a_mean_class_f1', 'a_mean_class_pre'], ascending=False).style.background_gradient(cmap=plt.cm.OrRd, subset=_head_need_cols, vmin=0, vmax=1)

[159941 513050 513180 518880]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,samples,class_0_pct,class_1_pct,class_2_pct,a_mean_class_f1,mean_class_f1,a_mean_class_pre,mean_class_pre,a_mean_class_acc,mean_class_acc,a_class_pre_0,a_class_pre_1,a_class_pre_2,a_class_acc_0,a_class_acc_1,a_class_acc_2,class_pre_0,class_pre_1,class_pre_2,class_acc_0,class_acc_1,class_acc_2
title,predict_n,top,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
depth_god_label_win_diff_T4x2_fp16,100,5,26810.545455,0.133778,0.128247,0.737975,0.376213,0.247747,0.5242,0.267966,0.351793,0.351717,0.480451,0.567948,0.492183,0.295466,0.40812,0.707327,0.253277,0.282655,0.832228,0.295301,0.408132,0.707232
depth_god_label_win_diff_T4x2_fp16,60,5,26794.6,0.103271,0.101541,0.795188,0.425393,0.231905,0.549344,0.176455,0.397496,0.397303,0.513377,0.585311,0.483075,0.32153,0.473463,0.667647,0.155575,0.197335,0.866585,0.32125,0.473357,0.667625
depth_god_label_win_diff_T4x2_fp16,30,5,26753.333333,0.079447,0.086029,0.834524,0.375224,0.155269,0.507703,0.157029,0.379872,0.379719,0.415777,0.599629,0.491468,0.387077,0.372666,0.647662,0.106027,0.208032,0.88965,0.38667,0.372768,0.646861


In [15]:
# 考察模型 predict_n 的性能差异
groupbys = ['top', 'predict_n']
d = res.reset_index().loc[:, groupbys + ['samples'] + need_cols].groupby(groupbys).mean()
d.sort_values(by=['top', 'a_mean_class_f1', 'a_mean_class_pre'], ascending=False).style.background_gradient(cmap=plt.cm.OrRd, subset=_head_need_cols, vmin=0, vmax=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,samples,class_0_pct,class_1_pct,class_2_pct,a_mean_class_f1,mean_class_f1,a_mean_class_pre,mean_class_pre,a_mean_class_acc,mean_class_acc,a_class_pre_0,a_class_pre_1,a_class_pre_2,a_class_acc_0,a_class_acc_1,a_class_acc_2,class_pre_0,class_pre_1,class_pre_2,class_acc_0,class_acc_1,class_acc_2
top,predict_n,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
5,60,26794.6,0.103271,0.101541,0.795188,0.425393,0.231905,0.549344,0.176455,0.397496,0.397303,0.513377,0.585311,0.483075,0.32153,0.473463,0.667647,0.155575,0.197335,0.866585,0.32125,0.473357,0.667625
5,100,26810.545455,0.133778,0.128247,0.737975,0.376213,0.247747,0.5242,0.267966,0.351793,0.351717,0.480451,0.567948,0.492183,0.295466,0.40812,0.707327,0.253277,0.282655,0.832228,0.295301,0.408132,0.707232
5,30,26753.333333,0.079447,0.086029,0.834524,0.375224,0.155269,0.507703,0.157029,0.379872,0.379719,0.415777,0.599629,0.491468,0.387077,0.372666,0.647662,0.106027,0.208032,0.88965,0.38667,0.372768,0.646861


In [16]:
# 考察模型正对单独标的的性能差异
groupbys = ['title', 'type', 'threshold', 'code']
groupbys = ['top', 'predict_n'] + groupbys
d = res.reset_index().loc[res['code']==513050, groupbys + ['samples'] + need_cols].groupby(groupbys).mean()
d.sort_values(by=['top', 'predict_n', 'a_mean_class_f1', 'a_mean_class_pre'], ascending=False).style.background_gradient(cmap=plt.cm.OrRd, subset=_head_need_cols, vmin=0, vmax=1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,samples,class_0_pct,class_1_pct,class_2_pct,a_mean_class_f1,mean_class_f1,a_mean_class_pre,mean_class_pre,a_mean_class_acc,mean_class_acc,a_class_pre_0,a_class_pre_1,a_class_pre_2,a_class_acc_0,a_class_acc_1,a_class_acc_2,class_pre_0,class_pre_1,class_pre_2,class_acc_0,class_acc_1,class_acc_2
top,predict_n,title,type,threshold,code,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
5,100,depth_god_label_win_diff_T4x2_fp16,model_final,max,513050,26840.0,0.115574,0.107303,0.777124,0.476406,0.300785,0.484638,0.221497,0.468568,0.468644,0.474068,0.495209,0.575833,0.467691,0.469444,0.613385,0.218097,0.224898,0.904554,0.467843,0.469444,0.614536
5,100,depth_god_label_win_diff_T4x2_fp16,model_best,max,513050,26840.0,0.115574,0.107303,0.777124,0.469828,0.298206,0.483965,0.22171,0.456797,0.456365,0.474497,0.493434,0.564926,0.449444,0.464149,0.627587,0.219437,0.223984,0.899983,0.448582,0.464149,0.625899
5,60,depth_god_label_win_diff_T4x2_fp16,model_final,max,513050,26840.0,0.067027,0.056073,0.8769,0.477774,0.218255,0.49087,0.142462,0.466561,0.467116,0.472897,0.508842,0.590342,0.480963,0.452159,0.645797,0.151317,0.133608,0.953331,0.482073,0.452159,0.648528
5,60,depth_god_label_win_diff_T4x2_fp16,model_best,max,513050,26840.0,0.067027,0.056073,0.8769,0.444008,0.227598,0.518192,0.162259,0.389992,0.390335,0.501237,0.535147,0.503777,0.376329,0.403654,0.748322,0.175962,0.148556,0.934446,0.377015,0.403654,0.746909
5,30,depth_god_label_win_diff_T4x2_fp16,model_final,max,513050,26840.0,0.022392,0.019449,0.958159,0.556424,0.143012,0.61013,0.083084,0.513721,0.513659,0.584597,0.635664,0.584749,0.537979,0.489464,0.7659,0.085963,0.080205,0.984596,0.537854,0.489464,0.761675
5,30,depth_god_label_win_diff_T4x2_fp16,model_best,max,513050,26840.0,0.022392,0.019449,0.958159,0.455276,0.153831,0.615235,0.098312,0.363961,0.362802,0.59545,0.63502,0.476148,0.369205,0.358716,0.860489,0.100074,0.09655,0.976272,0.366889,0.358716,0.8588


In [17]:
before = 0.491281
after = 0.486124
pct = (after / before - 1) * 100
print(f'性能提升: {pct:.2f}%' if pct > 0 else f'性能下降: {pct:.2f}%')

性能下降: -1.05%
