In [2]:
import os
import pandas as pd
import glob
import re
pd.set_option('display.max_rows', None)
LOG_PATTERN = re.compile(
    r'(?P<dataset>\w+)_'          # 数据集 (e.g., beauty)
    r'(?P<method>\w+)_'           # 方法 (e.g., me, base)
    r'scala(?P<scale>\d+)_'       # 尺度 (e.g., 1, 0)
    r'seed(?P<seed>\d+)_'         # 种子 (e.g., 20)
    r'lr(?P<lr>[\d\.]+)'          # 学习率 (e.g., 0.01)
    r'(?:_decor(?P<decor>[\d\.]+))?' # [可选] decor参数
    r'(?:_mask(?P<mask>\d+))?'      # [新增] 可选的mask参数
    r'\.log'
)
# group_cols = ['Dataset', 'Method', 'Scale', 'Lr']


In [3]:
def analyze_experiment_logs(model_name, search_directory, log_pattern_str):
    """
    在指定的 search_directory 目录（及其所有子目录）中查找并分析实验日志。

    :param model_name: str, 模型名称，用于在结果DataFrame中标记模型。
    :param search_directory: str, 要递归搜索.log文件的根目录。
    :param log_pattern_str: str, 解析日志文件名的正则表达式。
    :return: pd.DataFrame, 包含所有解析结果的排序后的DataFrame。
    """
    
    # --- 1. 准备工作 ---
    # 我们不再拼接路径，直接使用传入的 search_directory
    log_pattern = re.compile(log_pattern_str)
    all_results_data = []

    # --- 2. 查找并遍历所有日志文件 ---
    # search_path 现在直接基于 search_directory 构建
    search_path = os.path.join(search_directory, '**', '*.log')
    log_files = glob.glob(search_path, recursive=True)
    
    # 打印信息也更新为使用 search_directory
    print(f"在 '{search_directory}' 目录下共找到 {len(log_files)} 个 .log 文件。开始解析...")

    # --- 3. 读取文件并提取指标 (这部分逻辑不变) ---
    for log_file_path in log_files:
        filename = os.path.basename(log_file_path)
        match = log_pattern.match(filename)
        
        if not match:
            continue
            
        params = match.groupdict()
        
        try:
            with open(log_file_path, 'r') as file:
                lines = file.readlines()
                if not lines:
                    continue
                
                last_line = lines[-1].strip()
                if 'AUC:' in last_line and 'logloss:' in last_line:
                    parts = last_line.split()
                    auc = float(parts[parts.index('AUC:') + 1])
                    logloss = float(parts[parts.index('logloss:') + 1])
                    
                    result_entry = {
                        'Model': model_name,
                        'AUC': auc,
                        'Logloss': logloss
                    }
                    for key, value in params.items():
                        if value.isdigit():
                            result_entry[key.capitalize()] = int(value)
                        elif '.' in value and value.replace('.', '', 1).isdigit():
                            result_entry[key.capitalize()] = float(value)
                        else:
                            result_entry[key.capitalize()] = value
                            
                    all_results_data.append(result_entry)

        except (IOError, IndexError, ValueError) as e:
            print(f"  - 错误: 处理文件 '{filename}' 时出错: {e}")

    if not all_results_data:
        print("\n未能解析到任何有效数据，请检查路径和文件格式。")
        return pd.DataFrame()

    raw_df = pd.DataFrame(all_results_data)
    
    # 动态获取排序列
    sort_cols = [key.capitalize() for key in log_pattern.groupindex.keys()]
    # 确保排序列存在于DataFrame中
    existing_sort_cols = [col for col in sort_cols if col in raw_df.columns]
    
    if not existing_sort_cols:
         print("\n警告：根据正则表达式定义的列在数据中均不存在，无法排序。")
         return raw_df

    df_sorted = raw_df.sort_values(by=existing_sort_cols).reset_index(drop=True)
    
    return df_sorted

# 修复前结果

### DCNv2moc实验结果

In [None]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1019_experiments',
    log_pattern_str=LOG_PATTERN
)

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1019_experiments' 目录下共找到 243 个 .log 文件。开始解析...


In [31]:
filtered_results = raw_results[raw_results['Lr'] == 0.005]
filtered_results

Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
1,DCNv2,0.6768,0.5032,beauty,me,1,20,0.005
4,DCNv2,0.6754,0.5038,beauty,me,1,201,0.005
7,DCNv2,0.6771,0.5036,beauty,me,1,1027,0.005
10,DCNv2,0.6745,0.5045,beauty,me,3,20,0.005
13,DCNv2,0.675,0.5045,beauty,me,3,201,0.005
16,DCNv2,0.676,0.5039,beauty,me,3,1027,0.005
19,DCNv2,0.6642,0.6523,beauty,me,7,20,0.005
22,DCNv2,0.658,0.6399,beauty,me,7,201,0.005
25,DCNv2,0.6726,0.5055,beauty,me,7,1027,0.005
28,DCNv2,0.6771,0.5033,beauty,moc,1,20,0.005


In [32]:
group_cols = ['Model', 'Dataset', 'Method', 'Scale', 'Lr']
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Lr,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,beauty,me,1,0.005,0.676433,0.000907,0.503533,0.000306,3
1,DCNv2,beauty,me,3,0.005,0.675167,0.000764,0.5043,0.000346,3
2,DCNv2,beauty,me,7,0.005,0.664933,0.007328,0.599233,0.081412,3
3,DCNv2,beauty,moc,1,0.005,0.676633,0.000503,0.5039,0.0006,3
4,DCNv2,beauty,moc,3,0.005,0.6743,0.000557,0.5047,0.000436,3
5,DCNv2,beauty,moc,7,0.005,0.670067,0.001137,0.506933,0.000611,3
6,DCNv2,beauty,rq,1,0.005,0.676767,0.000208,0.5038,0.0001,3
7,DCNv2,beauty,rq,3,0.005,0.6733,0.000954,0.505833,0.000569,3
8,DCNv2,beauty,rq,7,0.005,0.6675,0.000458,0.508833,5.8e-05,3
9,DCNv2,sports,me,1,0.005,0.703533,0.000814,0.4605,0.000173,3


### DCNv2 base without dropout and reg = 0.01

In [5]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/base_1104_experiments',
    log_pattern_str=LOG_PATTERN
)
raw_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/base_1104_experiments' 目录下共找到 9 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,DCNv2,0.6261,0.5215,beauty,base,0,20,0.005
1,DCNv2,0.6205,0.523,beauty,base,0,201,0.005
2,DCNv2,0.6262,0.5217,beauty,base,0,1027,0.005
3,DCNv2,0.6425,0.4832,sports,base,0,20,0.005
4,DCNv2,0.6375,0.485,sports,base,0,201,0.005
5,DCNv2,0.6375,0.4847,sports,base,0,1027,0.005
6,DCNv2,0.6998,0.4708,toys,base,0,20,0.005
7,DCNv2,0.6947,0.473,toys,base,0,201,0.005
8,DCNv2,0.6938,0.4748,toys,base,0,1027,0.005


In [6]:
filtered_results = raw_results[raw_results['Lr'] == 0.005]
filtered_results
group_cols = ['Model', 'Dataset', 'Method', 'Scale', 'Lr']
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Lr,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,beauty,base,0,0.005,0.624267,0.003262,0.522067,0.000814,3
1,DCNv2,sports,base,0,0.005,0.639167,0.002887,0.4843,0.000964,3
2,DCNv2,toys,base,0,0.005,0.6961,0.003236,0.472867,0.002003,3


### DCNv2 base without dropout and reg = 0

In [7]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/base_1104_experiments_woreg',
    log_pattern_str=LOG_PATTERN
)
raw_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/base_1104_experiments_woreg' 目录下共找到 9 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,DCNv2,0.6708,0.5062,beauty,base,0,20,0.005
1,DCNv2,0.6706,0.5058,beauty,base,0,201,0.005
2,DCNv2,0.6722,0.5056,beauty,base,0,1027,0.005
3,DCNv2,0.6937,0.4644,sports,base,0,20,0.005
4,DCNv2,0.693,0.4661,sports,base,0,201,0.005
5,DCNv2,0.6935,0.4658,sports,base,0,1027,0.005
6,DCNv2,0.7448,0.4476,toys,base,0,20,0.005
7,DCNv2,0.7427,0.4496,toys,base,0,201,0.005
8,DCNv2,0.7438,0.4481,toys,base,0,1027,0.005


In [8]:
filtered_results = raw_results[raw_results['Lr'] == 0.005]
filtered_results
group_cols = ['Model', 'Dataset', 'Method', 'Scale', 'Lr']
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Lr,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,beauty,base,0,0.005,0.6712,0.000872,0.505867,0.000306,3
1,DCNv2,sports,base,0,0.005,0.6934,0.000361,0.465433,0.000907,3
2,DCNv2,toys,base,0,0.005,0.743767,0.00105,0.448433,0.001041,3


### DCNv2 moc without dropout and reg = 0.001

In [16]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1104_experiments_wreg001',
    log_pattern_str=LOG_PATTERN
)
raw_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1104_experiments_wreg001' 目录下共找到 162 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,DCNv2,0.6636,0.5097,beauty,me,1,20,0.001
1,DCNv2,0.6599,0.5106,beauty,me,1,20,0.005
2,DCNv2,0.6636,0.5101,beauty,me,1,201,0.001
3,DCNv2,0.658,0.5111,beauty,me,1,201,0.005
4,DCNv2,0.6614,0.5206,beauty,me,1,1027,0.001
5,DCNv2,0.6605,0.5102,beauty,me,1,1027,0.005
6,DCNv2,0.6505,0.5235,beauty,me,3,20,0.001
7,DCNv2,0.6593,0.5107,beauty,me,3,20,0.005
8,DCNv2,0.656,0.5187,beauty,me,3,201,0.001
9,DCNv2,0.6609,0.511,beauty,me,3,201,0.005


In [17]:
filtered_results = raw_results[raw_results['Lr'] == 0.001]
filtered_results
group_cols = ['Model', 'Dataset', 'Method', 'Scale', 'Lr']
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Lr,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,beauty,me,1,0.001,0.662867,0.00127,0.513467,0.006181,3
1,DCNv2,beauty,me,3,0.001,0.653467,0.002775,0.5208,0.002456,3
2,DCNv2,beauty,me,7,0.001,0.647733,0.005426,0.576467,0.106811,3
3,DCNv2,beauty,moc,1,0.001,0.667833,0.000702,0.5089,0.000872,3
4,DCNv2,beauty,moc,3,0.001,0.6607,0.002381,0.512333,0.00387,3
5,DCNv2,beauty,moc,7,0.001,0.657633,0.000929,0.513433,0.001834,3
6,DCNv2,beauty,rq,1,0.001,0.666367,0.002259,0.510067,0.001589,3
7,DCNv2,beauty,rq,3,0.001,0.663667,0.00085,0.512167,0.00165,3
8,DCNv2,beauty,rq,7,0.001,0.664933,0.001387,0.513667,0.001021,3
9,DCNv2,sports,me,1,0.001,0.691567,0.001767,0.4666,0.001229,3


### DCNv2 base实验结果

In [35]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/base_1018_experiments',
    log_pattern_str=LOG_PATTERN
)
filtered_results = raw_results[raw_results['Lr'] == 0.005]
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/base_1018_experiments' 目录下共找到 27 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
1,DCNv2,0.6701,0.5069,beauty,base,0,20,0.005
4,DCNv2,0.6711,0.5058,beauty,base,0,201,0.005
7,DCNv2,0.6702,0.5077,beauty,base,0,1027,0.005
10,DCNv2,0.6952,0.4648,sports,base,0,20,0.005
13,DCNv2,0.6942,0.4653,sports,base,0,201,0.005
16,DCNv2,0.6942,0.4644,sports,base,0,1027,0.005
19,DCNv2,0.7417,0.4521,toys,base,0,20,0.005
22,DCNv2,0.7432,0.4481,toys,base,0,201,0.005
25,DCNv2,0.7448,0.448,toys,base,0,1027,0.005


### DeepFM moc实验结果

##### 普通融合版本，在dnn处融合

In [11]:
raw_results = analyze_experiment_logs(
    model_name='DeepFM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1025_experiments',
    log_pattern_str=LOG_PATTERN
)
filtered_results = raw_results[raw_results['Lr'] == 0.005]
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1025_experiments' 目录下共找到 243 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
1,DeepFM,0.6785,0.5043,beauty,me,1,20,0.005
4,DeepFM,0.68,0.503,beauty,me,1,201,0.005
7,DeepFM,0.6781,0.506,beauty,me,1,1027,0.005
10,DeepFM,0.6788,0.5035,beauty,me,3,20,0.005
13,DeepFM,0.6798,0.5036,beauty,me,3,201,0.005
16,DeepFM,0.6773,0.5073,beauty,me,3,1027,0.005
19,DeepFM,0.6785,0.5054,beauty,me,7,20,0.005
22,DeepFM,0.6796,0.5029,beauty,me,7,201,0.005
25,DeepFM,0.6775,0.5091,beauty,me,7,1027,0.005
28,DeepFM,0.6785,0.5041,beauty,moc,1,20,0.005


In [14]:
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Dataset,Method,Scale,Lr,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,beauty,me,1,0.005,0.678867,0.001002,0.504433,0.001504,3
1,beauty,me,3,0.005,0.678633,0.001258,0.5048,0.002166,3
2,beauty,me,7,0.005,0.678533,0.00105,0.5058,0.003119,3
3,beauty,moc,1,0.005,0.678733,0.001266,0.505,0.002571,3
4,beauty,moc,3,0.005,0.6785,0.001253,0.505433,0.002937,3
5,beauty,moc,7,0.005,0.6785,0.001082,0.505633,0.002974,3
6,beauty,rq,1,0.005,0.678767,0.001124,0.504633,0.001823,3
7,beauty,rq,3,0.005,0.6782,0.001249,0.505167,0.002458,3
8,beauty,rq,7,0.005,0.677833,0.001106,0.506533,0.00305,3
9,sports,me,1,0.005,0.706333,0.000777,0.459767,0.000321,3


##### 在fm处融合

In [26]:
raw_results = analyze_experiment_logs(
    model_name='DeepFM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1025_experiments_earlyin',
    log_pattern_str=LOG_PATTERN
)
filtered_results = raw_results[raw_results['Lr'] == 0.005]
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1025_experiments_earlyin' 目录下共找到 162 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
1,DeepFM,0.6451,0.521,beauty,me,1,20,0.005
3,DeepFM,0.6552,0.5502,beauty,me,1,201,0.005
5,DeepFM,0.6486,0.5303,beauty,me,1,1027,0.005
7,DeepFM,0.6553,0.5469,beauty,me,3,20,0.005
9,DeepFM,0.6561,0.5912,beauty,me,3,201,0.005
11,DeepFM,0.6485,0.5454,beauty,me,3,1027,0.005
13,DeepFM,0.6606,0.5218,beauty,me,7,20,0.005
15,DeepFM,0.6548,0.5906,beauty,me,7,201,0.005
17,DeepFM,0.646,0.586,beauty,me,7,1027,0.005
19,DeepFM,0.6349,0.5257,beauty,moc,1,20,0.005


In [27]:
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Dataset,Method,Scale,Lr,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,beauty,me,1,0.005,0.649633,0.005129,0.533833,0.014917,3
1,beauty,me,3,0.005,0.6533,0.004176,0.561167,0.02602,3
2,beauty,me,7,0.005,0.6538,0.007351,0.566133,0.038463,3
3,beauty,moc,1,0.005,0.645567,0.011899,0.541833,0.021774,3
4,beauty,moc,3,0.005,0.6548,0.006594,0.538633,0.034791,3
5,beauty,moc,7,0.005,0.655567,0.012215,0.631267,0.076033,3
6,beauty,rq,1,0.005,0.653867,0.008451,0.6092,0.125746,3
7,beauty,rq,3,0.005,0.648333,0.01133,0.571333,0.018194,3
8,beauty,rq,7,0.005,0.643467,0.00814,0.607,0.068784,3
9,sports,me,1,0.005,0.672833,0.007273,0.518233,0.039148,3


#### DeepFM的base实验结果

In [28]:
raw_results = analyze_experiment_logs(
    model_name='DeepFM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/base_1025_experiments',
    log_pattern_str=LOG_PATTERN
)
filtered_results = raw_results[raw_results['Lr'] == 0.001]
filtered_results
# raw_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/base_1025_experiments' 目录下共找到 27 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,DeepFM,0.6521,0.701,beauty,base,0,20,0.001
3,DeepFM,0.6517,0.7096,beauty,base,0,201,0.001
6,DeepFM,0.6484,0.6202,beauty,base,0,1027,0.001
9,DeepFM,0.6651,0.7461,sports,base,0,20,0.001
12,DeepFM,0.6674,0.7016,sports,base,0,201,0.001
15,DeepFM,0.6742,0.6069,sports,base,0,1027,0.001
18,DeepFM,0.7169,0.6437,toys,base,0,20,0.001
21,DeepFM,0.7241,0.6139,toys,base,0,201,0.001
24,DeepFM,0.7203,0.6218,toys,base,0,1027,0.001


#### deepfm base 新版，融合修正

In [29]:
raw_results = analyze_experiment_logs(
    model_name='DeepFM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/base_1025_newfm_experiments',
    log_pattern_str=LOG_PATTERN
)
filtered_results = raw_results[raw_results['Lr'] == 0.005]
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/base_1025_newfm_experiments' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
1,DeepFM,0.6501,0.5577,beauty,base,0,20,0.005
3,DeepFM,0.6561,0.5341,beauty,base,0,201,0.005
5,DeepFM,0.6303,0.7682,beauty,base,0,1027,0.005
7,DeepFM,0.684,0.5675,sports,base,0,20,0.005
9,DeepFM,0.6693,0.4935,sports,base,0,201,0.005
11,DeepFM,0.6625,0.5197,sports,base,0,1027,0.005
13,DeepFM,0.7245,0.4993,toys,base,0,20,0.005
15,DeepFM,0.7185,0.5165,toys,base,0,201,0.005
17,DeepFM,0.723,0.5527,toys,base,0,1027,0.005


In [30]:
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Dataset,Method,Scale,Lr,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,beauty,base,0,0.005,0.6455,0.013501,0.62,0.128886,3
1,sports,base,0,0.005,0.671933,0.010989,0.5269,0.037522,3
2,toys,base,0,0.005,0.722,0.003122,0.522833,0.027258,3


### DeepFM base reg=0.01

In [4]:
raw_results = analyze_experiment_logs(
    model_name='DeepFM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/base_1104_experiments',
    log_pattern_str=LOG_PATTERN
)
filtered_results = raw_results[raw_results['Lr'] == 0.005]
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/base_1104_experiments' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
1,DeepFM,0.6717,0.5054,beauty,base,0,20,0.005
3,DeepFM,0.6586,0.512,beauty,base,0,201,0.005
5,DeepFM,0.6642,0.509,beauty,base,0,1027,0.005
7,DeepFM,0.6957,0.4636,sports,base,0,20,0.005
9,DeepFM,0.6871,0.4673,sports,base,0,201,0.005
11,DeepFM,0.6936,0.4646,sports,base,0,1027,0.005
13,DeepFM,0.7424,0.4491,toys,base,0,20,0.005
15,DeepFM,0.7417,0.4489,toys,base,0,201,0.005
17,DeepFM,0.7375,0.4516,toys,base,0,1027,0.005


In [5]:
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Dataset,Method,Scale,Lr,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,beauty,base,0,0.005,0.664833,0.006573,0.5088,0.003305,3
1,sports,base,0,0.005,0.692133,0.004484,0.465167,0.001914,3
2,toys,base,0,0.005,0.740533,0.00265,0.449867,0.001504,3


### DeepFM Moc reg= 0.01

In [14]:
raw_results = analyze_experiment_logs(
    model_name='DeepFM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1104_experiments',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1104_experiments' 目录下共找到 162 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,DeepFM,0.6716,0.5603,beauty,me,1,20,0.001
1,DeepFM,0.6738,0.533,beauty,me,1,201,0.001
2,DeepFM,0.6756,0.5193,beauty,me,1,1027,0.001
3,DeepFM,0.6722,0.5093,beauty,me,3,20,0.001
4,DeepFM,0.6665,0.556,beauty,me,3,201,0.001
5,DeepFM,0.6741,0.5118,beauty,me,3,1027,0.001
6,DeepFM,0.6712,0.5059,beauty,me,7,20,0.005
7,DeepFM,0.669,0.5078,beauty,me,7,201,0.005
8,DeepFM,0.6704,0.5057,beauty,me,7,1027,0.005
9,DeepFM,0.6736,0.5119,beauty,moc,1,20,0.001


In [15]:

summary_df = filtered_results.groupby(['Model', 'Dataset', 'Method', 'Scale']).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DeepFM,beauty,me,1,0.673667,0.002003,0.537533,0.020873,3
1,DeepFM,beauty,me,3,0.670933,0.003955,0.5257,0.02627,3
2,DeepFM,beauty,me,7,0.6702,0.001114,0.506467,0.001159,3
3,DeepFM,beauty,moc,1,0.673533,0.000503,0.517167,0.012279,3
4,DeepFM,beauty,moc,3,0.664367,0.001361,0.5088,0.0003,3
5,DeepFM,beauty,moc,7,0.663833,0.00106,0.509933,0.000833,3
6,DeepFM,beauty,rq,1,0.670967,0.001677,0.506067,0.000751,3
7,DeepFM,beauty,rq,3,0.664467,0.002371,0.5096,0.000781,3
8,DeepFM,beauty,rq,7,0.664667,0.001305,0.511633,0.001901,3
9,DeepFM,sports,me,1,0.694267,0.006178,0.476167,0.017304,3


## DeepIM dropout=0.1


### base reg=0

In [18]:
raw_results = analyze_experiment_logs(
    model_name='DeepIM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepIM/base_1104_experiments',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepIM/base_1104_experiments' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,DeepIM,0.6682,0.5071,beauty,base,0,20,0.005
1,DeepIM,0.6686,0.5068,beauty,base,0,201,0.005
2,DeepIM,0.6693,0.5069,beauty,base,0,1027,0.005
3,DeepIM,0.691,0.4659,sports,base,0,20,0.001
4,DeepIM,0.6904,0.467,sports,base,0,201,0.001
5,DeepIM,0.6938,0.4646,sports,base,0,1027,0.001
6,DeepIM,0.7431,0.4486,toys,base,0,20,0.005
7,DeepIM,0.7414,0.4492,toys,base,0,201,0.005
8,DeepIM,0.7394,0.4505,toys,base,0,1027,0.005


In [19]:
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DeepIM,beauty,base,0,0.6687,0.000557,0.506933,0.000153,3
1,DeepIM,sports,base,0,0.691733,0.001815,0.465833,0.001201,3
2,DeepIM,toys,base,0,0.7413,0.001852,0.449433,0.000971,3


### base reg = 0.001

In [21]:
raw_results = analyze_experiment_logs(
    model_name='DeepIM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepIM/base_1104_wreg001_experiments',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepIM/base_1104_wreg001_experiments' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,DeepIM,0.6659,0.508,beauty,base,0,20,0.001
1,DeepIM,0.6675,0.5075,beauty,base,0,201,0.001
2,DeepIM,0.665,0.5097,beauty,base,0,1027,0.001
3,DeepIM,0.6884,0.467,sports,base,0,20,0.001
4,DeepIM,0.6891,0.4675,sports,base,0,201,0.001
5,DeepIM,0.6921,0.4655,sports,base,0,1027,0.001
6,DeepIM,0.7402,0.4516,toys,base,0,20,0.001
7,DeepIM,0.7407,0.4499,toys,base,0,201,0.001
8,DeepIM,0.7397,0.451,toys,base,0,1027,0.001


In [22]:
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DeepIM,beauty,base,0,0.666133,0.001266,0.5084,0.001153,3
1,DeepIM,sports,base,0,0.689867,0.001966,0.466667,0.001041,3
2,DeepIM,toys,base,0,0.7402,0.0005,0.450833,0.000862,3


### moc reg=0.001

In [8]:
raw_results = analyze_experiment_logs(
    model_name='DeepIM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepIM/moc_1104_wreg001_experiments',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepIM/moc_1104_wreg001_experiments' 目录下共找到 162 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,DeepIM,0.6696,0.6526,beauty,me,1,20,0.001
1,DeepIM,0.6683,0.5073,beauty,me,1,201,0.001
2,DeepIM,0.6683,0.5074,beauty,me,1,1027,0.001
3,DeepIM,0.6678,0.5078,beauty,me,3,20,0.001
4,DeepIM,0.6675,0.5085,beauty,me,3,201,0.001
5,DeepIM,0.667,0.5083,beauty,me,3,1027,0.001
6,DeepIM,0.671,0.5586,beauty,me,7,20,0.001
7,DeepIM,0.6685,0.5077,beauty,me,7,201,0.001
8,DeepIM,0.6704,0.5068,beauty,me,7,1027,0.001
9,DeepIM,0.6685,0.5263,beauty,moc,1,20,0.005


In [9]:
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DeepIM,beauty,me,1,0.668733,0.000751,0.555767,0.08386,3
1,DeepIM,beauty,me,3,0.667433,0.000404,0.5082,0.000361,3
2,DeepIM,beauty,me,7,0.669967,0.001305,0.524367,0.02965,3
3,DeepIM,beauty,moc,1,0.670967,0.002173,0.5183,0.006963,3
4,DeepIM,beauty,moc,3,0.667133,0.001007,0.5447,0.06305,3
5,DeepIM,beauty,moc,7,0.665567,0.002845,0.537333,0.024038,3
6,DeepIM,beauty,rq,1,0.6682,0.000819,0.5655,0.063333,3
7,DeepIM,beauty,rq,3,0.666433,0.001301,0.5264,0.031177,3
8,DeepIM,beauty,rq,7,0.6692,0.00052,0.523533,0.003972,3
9,DeepIM,sports,me,1,0.68995,0.000495,0.46635,0.000212,2


### moc reg=0

In [4]:
raw_results = analyze_experiment_logs(
    model_name='DeepIM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepIM/moc_1104_woreg_experiments',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepIM/moc_1104_woreg_experiments' 目录下共找到 162 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,DeepIM,0.6737,0.5049,beauty,me,1,20,0.005
1,DeepIM,0.6774,0.5033,beauty,me,1,201,0.005
2,DeepIM,0.677,0.5037,beauty,me,1,1027,0.005
3,DeepIM,0.6723,0.5068,beauty,me,3,20,0.005
4,DeepIM,0.6712,0.5067,beauty,me,3,201,0.005
5,DeepIM,0.6731,0.5057,beauty,me,3,1027,0.005
6,DeepIM,0.6608,0.5143,beauty,me,7,20,0.005
7,DeepIM,0.6595,0.5143,beauty,me,7,201,0.005
8,DeepIM,0.6578,0.5267,beauty,me,7,1027,0.005
9,DeepIM,0.6745,0.505,beauty,moc,1,20,0.005


In [5]:
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DeepIM,beauty,me,1,0.676033,0.002031,0.503967,0.000833,3
1,DeepIM,beauty,me,3,0.6722,0.000954,0.5064,0.000608,3
2,DeepIM,beauty,me,7,0.659367,0.001504,0.518433,0.007159,3
3,DeepIM,beauty,moc,1,0.6757,0.0012,0.504067,0.000862,3
4,DeepIM,beauty,moc,3,0.6713,0.000557,0.506267,0.000416,3
5,DeepIM,beauty,moc,7,0.664133,0.000723,0.510467,0.000569,3
6,DeepIM,beauty,rq,1,0.6745,0.002252,0.5049,0.001217,3
7,DeepIM,beauty,rq,3,0.672567,0.000802,0.505933,0.000289,3
8,DeepIM,beauty,rq,7,0.664233,0.000503,0.511067,0.000833,3
9,DeepIM,sports,me,1,0.7026,0.001453,0.461033,0.000702,3


## AutoInt

### base reg=0

In [12]:
raw_results = analyze_experiment_logs(
    model_name='AutoInt',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/AutoInt/base_1104_woreg_experiments',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/AutoInt/base_1104_woreg_experiments' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,AutoInt,0.6674,0.5076,beauty,base,0,20,0.005
1,AutoInt,0.6662,0.5085,beauty,base,0,201,0.005
2,AutoInt,0.6683,0.5074,beauty,base,0,1027,0.005
3,AutoInt,0.6919,0.4653,sports,base,0,20,0.005
4,AutoInt,0.6953,0.464,sports,base,0,201,0.005
5,AutoInt,0.6925,0.466,sports,base,0,1027,0.005
6,AutoInt,0.7434,0.4486,toys,base,0,20,0.001
7,AutoInt,0.7424,0.4491,toys,base,0,201,0.001
8,AutoInt,0.7398,0.4506,toys,base,0,1027,0.001


In [13]:
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,AutoInt,beauty,base,0,0.6673,0.001054,0.507833,0.000586,3
1,AutoInt,sports,base,0,0.693233,0.001815,0.4651,0.001015,3
2,AutoInt,toys,base,0,0.741867,0.001858,0.449433,0.001041,3


### Moc reg=0

In [16]:
raw_results = analyze_experiment_logs(
    model_name='AutoInt',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/AutoInt/moc_1104_woreg_experiments',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
filtered_results

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/AutoInt/moc_1104_woreg_experiments' 目录下共找到 162 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr
0,AutoInt,0.676,0.5043,beauty,me,1,20,0.005
1,AutoInt,0.6749,0.505,beauty,me,1,201,0.005
2,AutoInt,0.6745,0.5044,beauty,me,1,1027,0.005
3,AutoInt,0.6703,0.5064,beauty,me,3,20,0.005
4,AutoInt,0.6707,0.5069,beauty,me,3,201,0.005
5,AutoInt,0.6701,0.5065,beauty,me,3,1027,0.005
6,AutoInt,0.6681,0.507,beauty,me,7,20,0.005
7,AutoInt,0.6697,0.5063,beauty,me,7,201,0.005
8,AutoInt,0.6677,0.5072,beauty,me,7,1027,0.005
9,AutoInt,0.6741,0.5054,beauty,moc,1,20,0.005


In [17]:
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,AutoInt,beauty,me,1,0.675133,0.000777,0.504567,0.000379,3
1,AutoInt,beauty,me,3,0.670367,0.000306,0.5066,0.000265,3
2,AutoInt,beauty,me,7,0.6685,0.001058,0.506833,0.000473,3
3,AutoInt,beauty,moc,1,0.674433,0.00085,0.5049,0.000458,3
4,AutoInt,beauty,moc,3,0.668567,0.001909,0.5075,0.000819,3
5,AutoInt,beauty,moc,7,0.6665,0.001044,0.508433,0.000929,3
6,AutoInt,beauty,rq,1,0.674967,0.000351,0.504967,0.000321,3
7,AutoInt,beauty,rq,3,0.667233,0.00095,0.509,0.001015,3
8,AutoInt,beauty,rq,7,0.6668,0.001323,0.5091,0.000608,3
9,AutoInt,sports,me,1,0.701233,0.000208,0.4624,0.0007,3


# 修复后结果

### DCNv2 moc 无reg， dropout=0.2 

In [6]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1107',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1107' 目录下共找到 81 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,beauty,me,1,0.671867,0.001168,0.505767,0.000404,3
1,DCNv2,beauty,me,3,0.6718,0.000656,0.505833,0.000513,3
2,DCNv2,beauty,me,7,0.671867,0.000252,0.505667,5.8e-05,3
3,DCNv2,beauty,moc,1,0.6713,0.001905,0.506,0.000624,3
4,DCNv2,beauty,moc,3,0.6719,0.001229,0.506033,0.000416,3
5,DCNv2,beauty,moc,7,0.673933,0.000907,0.504967,0.000351,3
6,DCNv2,beauty,rq,1,0.6718,0.000854,0.505867,0.000252,3
7,DCNv2,beauty,rq,3,0.673633,0.000902,0.5053,0.000781,3
8,DCNv2,beauty,rq,7,0.6741,0.000889,0.5049,0.0006,3
9,DCNv2,sports,me,1,0.695433,0.001779,0.464467,0.000321,3


### DCNv2 moc 无reg， dropout=0.2 加mix=32

In [8]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1107_mix32',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1107_mix32' 目录下共找到 81 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,beauty,me,1,0.670333,0.00095,0.507733,0.001531,3
1,DCNv2,beauty,me,3,0.6701,0.0,0.506533,0.000416,3
2,DCNv2,beauty,me,7,0.669767,0.001159,0.506733,0.000208,3
3,DCNv2,beauty,moc,1,0.6707,0.001778,0.507867,0.001589,3
4,DCNv2,beauty,moc,3,0.669833,0.000971,0.507133,0.000289,3
5,DCNv2,beauty,moc,7,0.6697,0.000436,0.506733,5.8e-05,3
6,DCNv2,beauty,rq,1,0.6708,0.001375,0.506933,0.00095,3
7,DCNv2,beauty,rq,3,0.671333,0.000404,0.506367,0.000451,3
8,DCNv2,beauty,rq,7,0.670967,0.000757,0.5068,0.000755,3
9,DCNv2,sports,me,1,0.695667,0.000231,0.464367,0.001172,3


### DeepFM 无reg

In [15]:
raw_results = analyze_experiment_logs(
    model_name='DeepFM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1107_woreg',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1107_woreg' 目录下共找到 162 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DeepFM,beauty,me,1,0.672467,0.000751,0.505533,0.000252,3
1,DeepFM,beauty,me,3,0.6714,0.000656,0.5061,0.000458,3
2,DeepFM,beauty,me,7,0.671833,0.001002,0.506367,0.000153,3
3,DeepFM,beauty,moc,1,0.672533,0.00171,0.5059,0.000721,3
4,DeepFM,beauty,moc,3,0.672167,0.000252,0.5059,0.0001,3
5,DeepFM,beauty,moc,7,0.6715,0.001375,0.5063,0.000529,3
6,DeepFM,beauty,rq,1,0.672433,0.001106,0.505767,0.000321,3
7,DeepFM,beauty,rq,3,0.670967,0.001419,0.506333,0.00085,3
8,DeepFM,beauty,rq,7,0.672833,0.001721,0.5065,0.000889,3
9,DeepFM,sports,me,1,0.696267,0.001102,0.4641,0.0005,3


### DeepFM reg0.01

In [16]:
raw_results = analyze_experiment_logs(
    model_name='DeepFM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1107_wreg01',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1107_wreg01' 目录下共找到 81 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DeepFM,beauty,me,1,0.6663,0.004257,0.508767,0.002318,3
1,DeepFM,beauty,me,3,0.670733,0.000569,0.5062,0.000265,3
2,DeepFM,beauty,me,7,0.6699,0.002696,0.506467,0.001102,3
3,DeepFM,beauty,moc,1,0.667367,0.002371,0.5081,0.0007,3
4,DeepFM,beauty,moc,3,0.670833,0.000208,0.506267,0.000115,3
5,DeepFM,beauty,moc,7,0.670633,0.001242,0.506833,0.001286,3
6,DeepFM,beauty,rq,1,0.666567,0.002444,0.507633,0.001069,3
7,DeepFM,beauty,rq,3,0.6681,0.00327,0.5072,0.001044,3
8,DeepFM,beauty,rq,7,0.667133,0.001387,0.508,0.000265,3
9,DeepFM,sports,me,1,0.688867,0.002136,0.466833,0.000551,3


### DeepFM reg0.001

In [20]:
raw_results = analyze_experiment_logs(
    model_name='DeepFM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1107_wreg001',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepFM/DeepFM_torch/moc_1107_wreg001' 目录下共找到 162 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DeepFM,beauty,me,1,0.6639,0.006122,0.534367,0.008675,3
1,DeepFM,beauty,me,3,0.663433,0.007508,0.529733,0.010027,3
2,DeepFM,beauty,me,7,0.666367,0.000757,0.508933,0.000493,3
3,DeepFM,beauty,moc,1,0.662967,0.001405,0.510233,0.00162,3
4,DeepFM,beauty,moc,3,0.661367,0.001893,0.510333,0.000757,3
5,DeepFM,beauty,moc,7,0.663933,0.001266,0.5108,0.000964,3
6,DeepFM,beauty,rq,1,0.662667,0.001943,0.5099,0.001136,3
7,DeepFM,beauty,rq,3,0.662367,0.001097,0.510533,0.000839,3
8,DeepFM,beauty,rq,7,0.661733,0.00085,0.566133,0.091435,3
9,DeepFM,sports,me,1,0.689167,0.001258,0.4684,0.0008,3


### DeepIM moc noreg

In [21]:
raw_results = analyze_experiment_logs(
    model_name='DeepIM',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepIM/moc_1107_woreg',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DeepIM/moc_1107_woreg' 目录下共找到 162 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DeepIM,beauty,me,1,0.668467,0.00135,0.5074,0.000624,3
1,DeepIM,beauty,me,3,0.669033,0.000503,0.5071,0.000265,3
2,DeepIM,beauty,me,7,0.670067,0.00106,0.506633,0.000404,3
3,DeepIM,beauty,moc,1,0.669367,0.00155,0.5069,0.00052,3
4,DeepIM,beauty,moc,3,0.669533,0.001172,0.506933,0.000473,3
5,DeepIM,beauty,moc,7,0.669333,0.000404,0.5067,0.000265,3
6,DeepIM,beauty,rq,1,0.668,0.002193,0.5075,0.000721,3
7,DeepIM,beauty,rq,3,0.668267,0.000723,0.5074,0.0001,3
8,DeepIM,beauty,rq,7,0.669933,0.000651,0.506533,0.000416,3
9,DeepIM,sports,me,1,0.693767,0.00105,0.465033,0.000473,3


### AutoInt moc 

In [5]:
raw_results = analyze_experiment_logs(
    model_name='AutoInt',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/AutoInt/moc_1107_woreg',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/AutoInt/moc_1107_woreg' 目录下共找到 162 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,AutoInt,beauty,me,1,0.6691,0.001082,0.507267,0.000961,3
1,AutoInt,beauty,me,3,0.667367,0.002532,0.5084,0.001212,3
2,AutoInt,beauty,me,7,0.669567,0.000764,0.506767,0.000306,3
3,AutoInt,beauty,moc,1,0.668,0.002081,0.507367,0.001021,3
4,AutoInt,beauty,moc,3,0.668633,0.001305,0.5078,0.000985,3
5,AutoInt,beauty,moc,7,0.669533,0.000252,0.506833,0.000351,3
6,AutoInt,beauty,rq,1,0.668867,0.001097,0.507133,0.000404,3
7,AutoInt,beauty,rq,3,0.668033,0.00106,0.508267,0.000503,3
8,AutoInt,beauty,rq,7,0.669,0.0014,0.507433,0.00095,3
9,AutoInt,sports,me,1,0.6936,0.000954,0.4649,0.000173,3


# mask 测试

In [7]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask1',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask1' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,1,0.744833,0.00138,0.447467,0.000757,3
1,DCNv2,toys,moc,3,0.745367,0.000569,0.4473,0.000361,3
2,DCNv2,toys,moc,7,0.745767,0.001007,0.4471,0.000458,3


In [8]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask2',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask2' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,1,0.7446,0.0006,0.449333,0.000814,3
1,DCNv2,toys,moc,3,0.7456,0.000557,0.447167,0.000351,3
2,DCNv2,toys,moc,7,0.746167,0.000404,0.447033,5.8e-05,3


In [9]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask3',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask3' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,1,0.744567,0.001172,0.447733,0.000569,3
1,DCNv2,toys,moc,3,0.745633,0.000351,0.4481,0.000346,3
2,DCNv2,toys,moc,7,0.7464,0.000794,0.4468,0.000458,3


In [10]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask4',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask4' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,1,0.744567,0.000473,0.447667,0.000451,3
1,DCNv2,toys,moc,3,0.745267,0.000643,0.447167,5.8e-05,3
2,DCNv2,toys,moc,7,0.746333,0.000493,0.446933,0.000231,3


In [11]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask5',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask5' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,1,0.7445,0.000436,0.449433,0.000153,3
1,DCNv2,toys,moc,3,0.745067,0.000321,0.447467,0.000306,3
2,DCNv2,toys,moc,7,0.7462,0.000265,0.446733,0.000351,3


### diff mask

In [5]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask012',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask012' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,0.745567,0.000351,0.447133,0.000252,3


In [6]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask123',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask123' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,0.745867,0.001069,0.447133,0.000379,3


In [7]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask137',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask137' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,0.745367,0.000586,0.4471,0.000346,3


In [8]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask0123456',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask0123456' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,7,0.746633,5.8e-05,0.446567,0.000153,3


In [9]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask0011223',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask0011223' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,7,0.7464,0.0006,0.447,0.000265,3


In [10]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask0112233',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask0112233' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,7,0.7466,0.000608,0.4468,0.0001,3


In [11]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask0122333',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask0122333' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,7,0.746133,0.000586,0.4471,0.000265,3


In [12]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask0001123',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_mask0001123' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,7,0.746133,0.000981,0.446933,0.000513,3


### drop

In [4]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop1',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop1' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,0.745567,0.000451,0.447267,5.8e-05,3
1,DCNv2,toys,moc,7,0.746333,0.000611,0.4469,0.0002,3


In [5]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop2',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop2' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,0.745233,0.000351,0.4473,0.0001,3
1,DCNv2,toys,moc,7,0.746467,0.000321,0.4468,0.0003,3


In [7]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop3',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop3' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,0.745667,0.000513,0.447133,0.000252,3
1,DCNv2,toys,moc,7,0.745733,0.000681,0.447167,0.000351,3


In [8]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop012',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop012' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,0.744933,0.000643,0.4475,0.0001,3


In [9]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop123',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop123' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,0.745333,0.000503,0.4473,0.000265,3


In [10]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop137',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop137' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,0.7456,0.000624,0.4472,0.0002,3


In [11]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop0123456',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop0123456' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,7,0.746633,0.001021,0.446667,0.000321,3


In [12]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop0001123',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop0001123' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,7,0.746333,0.000802,0.446733,0.000513,3


In [13]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop0112233',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1108_drop0112233' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,7,0.746167,0.000681,0.446967,0.000351,3


### mean版本结果

In [15]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_1',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_1' 目录下共找到 36 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,me,1,0.744433,0.000451,0.447767,0.000351,3
1,DCNv2,toys,me,3,0.7449,0.000954,0.447967,0.00145,3
2,DCNv2,toys,me,7,0.745167,0.000153,0.4477,0.0003,3
3,DCNv2,toys,rq,1,0.744533,0.00106,0.449067,0.001286,3
4,DCNv2,toys,rq,3,0.744767,0.000643,0.449267,0.000681,3
5,DCNv2,toys,rq,7,0.745067,0.000379,0.448067,0.000814,3


In [17]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_2',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_2' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,me,10,0.744967,0.000321,0.4477,0.000608,3
1,DCNv2,toys,me,14,0.744667,0.000208,0.4479,0.000361,3


In [18]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_3',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_3' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,rq,10,0.744367,0.000379,0.448,0.000265,3
1,DCNv2,toys,rq,14,0.744333,0.000306,0.448,0.000346,3


In [19]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask1',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask1' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,10,0.744467,0.000208,0.448133,0.000764,3
1,DCNv2,toys,moc,14,0.744367,0.000551,0.448367,0.000757,3


In [20]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask2',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask2' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,10,0.744833,0.000379,0.449333,0.000862,3
1,DCNv2,toys,moc,14,0.7446,0.001058,0.448067,0.000814,3


In [21]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask3',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask3' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,10,0.744867,0.000252,0.448067,0.000808,3
1,DCNv2,toys,moc,14,0.744467,0.000709,0.4481,0.000819,3


In [22]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask0011223344',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask0011223344' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,10,0.745333,0.000252,0.447533,0.000666,3


In [None]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask0112234567',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask0112234567' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,10,0.7446,0.0003,0.448233,0.000404,3


In [26]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask1122334455',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask1122334455' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,10,0.744667,0.000666,0.447933,0.000586,3


In [27]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask00112233445566',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask00112233445566' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,14,0.744867,0.000379,0.4477,0.000436,3


In [28]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask11223344556677',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_4_mask11223344556677' 目录下共找到 6 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,14,0.744867,0.000643,0.448,0.0007,3


In [30]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_5_mask1',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_5_mask1' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,1,0.744833,0.00138,0.447467,0.000757,3
1,DCNv2,toys,moc,3,0.744733,0.000929,0.449167,0.000351,3
2,DCNv2,toys,moc,7,0.745267,0.000635,0.449433,0.000666,3


In [31]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_5_mask2',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_5_mask2' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,1,0.7446,0.0006,0.449333,0.000814,3
1,DCNv2,toys,moc,3,0.744433,0.00155,0.447967,0.00135,3
2,DCNv2,toys,moc,7,0.7452,0.000458,0.4478,0.0007,3


In [32]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_5_mask3',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1109_5_mask3' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,1,0.744567,0.001172,0.447733,0.000569,3
1,DCNv2,toys,moc,3,0.744867,0.00194,0.447733,0.001422,3
2,DCNv2,toys,moc,7,0.744867,0.000231,0.4479,0.0002,3


# decor

In [None]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1110_decor',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1110_decor' 目录下共找到 60 个 .log 文件。开始解析...


Unnamed: 0,Model,AUC,Logloss,Dataset,Method,Scale,Seed,Lr,Decor
0,DCNv2,0.7462,0.4478,toys,moc,3,20,0.001,0.01
1,DCNv2,0.7453,0.4488,toys,moc,3,20,0.001,1.0
2,DCNv2,0.7454,0.4483,toys,moc,3,20,0.001,10.0
3,DCNv2,0.7451,0.4494,toys,moc,3,20,0.001,50.0
4,DCNv2,0.7454,0.4491,toys,moc,3,20,0.001,100.0
5,DCNv2,0.7462,0.4467,toys,moc,3,20,0.005,0.01
6,DCNv2,0.745,0.4479,toys,moc,3,20,0.005,1.0
7,DCNv2,0.7456,0.4476,toys,moc,3,20,0.005,10.0
8,DCNv2,0.7456,0.4474,toys,moc,3,20,0.005,50.0
9,DCNv2,0.7463,0.447,toys,moc,3,20,0.005,100.0


# decor mask

In [16]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1110_decor_mask',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1110_decor_mask' 目录下共找到 48 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,10.0,3,0.745467,0.000666,0.447067,0.000153,3
1,DCNv2,toys,moc,3,10.0,4,0.746067,0.000306,0.4469,0.0002,3
2,DCNv2,toys,moc,3,100.0,3,0.7458,0.0003,0.4471,0.000458,3
3,DCNv2,toys,moc,3,100.0,4,0.745467,0.000833,0.447067,0.000252,3
4,DCNv2,toys,moc,7,10.0,3,0.746,0.00052,0.447033,0.000289,3
5,DCNv2,toys,moc,7,10.0,4,0.746267,0.000586,0.446833,0.000611,3
6,DCNv2,toys,moc,7,100.0,3,0.7466,0.000656,0.446767,0.000208,3
7,DCNv2,toys,moc,7,100.0,4,0.747033,0.000643,0.446533,0.000153,3


In [17]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1110_decor_mask1',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1110_decor_mask1' 目录下共找到 24 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,10.0,2,0.745033,0.000702,0.448867,0.000764,3
1,DCNv2,toys,moc,7,10.0,1,0.746333,0.00105,0.446667,0.000404,3


# 只保留三个特征，看是否会有更好的效果

In [19]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/newbase_1111',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/newbase_1111' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,beauty,base,0,0,0,0.665167,0.000379,0.508267,0.000289,3
1,DCNv2,sports,base,0,0,0,0.6905,0.000693,0.466133,0.000503,3
2,DCNv2,toys,base,0,0,0,0.736733,0.000757,0.451833,0.000503,3


In [20]:
raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1111_3fea_rq',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1111_3fea_rq' 目录下共找到 18 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,rq,1,0,0,0.738167,0.00125,0.450967,0.000569,3
1,DCNv2,toys,rq,3,0,0,0.738433,0.000929,0.451567,0.001582,3
2,DCNv2,toys,rq,7,0,0,0.739667,0.001222,0.450233,0.000961,3


In [21]:

raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1111_3fea',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1111_3fea' 目录下共找到 48 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,10.0,3,0.7381,0.000458,0.451567,0.001159,3
1,DCNv2,toys,moc,3,10.0,4,0.738667,0.000929,0.4511,0.001249,3
2,DCNv2,toys,moc,3,100.0,3,0.7381,0.001493,0.4516,0.001375,3
3,DCNv2,toys,moc,3,100.0,4,0.738433,0.001069,0.451267,0.001436,3
4,DCNv2,toys,moc,7,10.0,3,0.739333,0.000231,0.450733,0.000153,3
5,DCNv2,toys,moc,7,10.0,4,0.7394,0.000361,0.450567,0.000115,3
6,DCNv2,toys,moc,7,100.0,3,0.739367,0.000551,0.4507,0.000265,3
7,DCNv2,toys,moc,7,100.0,4,0.739433,0.000611,0.4506,0.0007,3


# 删掉id特征

In [24]:

raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1111_noid',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1111_noid' 目录下共找到 48 个 .log 文件。开始解析...

未能解析到任何有效数据，请检查路径和文件格式。


KeyError: 'Model'

In [23]:

raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1111_noid',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1111_noid' 目录下共找到 48 个 .log 文件。开始解析...

未能解析到任何有效数据，请检查路径和文件格式。


KeyError: 'Model'

# topk

In [5]:

raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_topk_mask_decor',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_topk_mask_decor' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,100.0,4,0.745667,0.000379,0.447333,5.8e-05,3
1,DCNv2,toys,moc,7,100.0,4,0.747067,0.000416,0.4462,0.0002,3


In [7]:

raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_top5_mask_decor',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_top5_mask_decor' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,100.0,4,0.745833,0.000289,0.446967,0.000321,3
1,DCNv2,toys,moc,7,100.0,4,0.746867,0.000929,0.446367,0.000379,3


In [8]:

raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_top10_mask_decor',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_top10_mask_decor' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,100.0,4,0.745533,0.000379,0.447233,0.000551,3
1,DCNv2,toys,moc,7,100.0,4,0.746267,0.000586,0.446733,0.000586,3


# concat

In [10]:

raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_top10_mask_decor_concat',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_top10_mask_decor_concat' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,100.0,4,0.7459,0.000361,0.4467,0.0004,3
1,DCNv2,toys,moc,7,100.0,4,0.7466,0.000557,0.447167,0.000551,3


In [12]:

raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_top5_mask_decor_concat',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_top5_mask_decor_concat' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,100.0,4,0.7456,0.000954,0.4468,0.0007,3
1,DCNv2,toys,moc,7,100.0,4,0.747333,0.000231,0.446833,0.000473,3


In [None]:

raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_top3_mask_decor_concat',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df

在 '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1114_top3_mask_decor_concat' 目录下共找到 12 个 .log 文件。开始解析...


Unnamed: 0,Model,Dataset,Method,Scale,Decor,Mask,Mean_AUC,Std_AUC,Mean_Logloss,Std_Logloss,Run_Count
0,DCNv2,toys,moc,3,100.0,4,0.7457,0.000656,0.447033,0.000611,3
1,DCNv2,toys,moc,7,100.0,4,0.747433,0.000208,0.4467,0.000173,3


# 对比学习1，仅使用正样本

In [None]:

raw_results = analyze_experiment_logs(
    model_name='DCNv2',
    search_directory = '/data2/wangzhongren/taolin_project/FuxiCTR/model_zoo/DCNv2/moc_1115_top3_contrast',
    log_pattern_str=LOG_PATTERN
)
group_cols = ['Model', 'Dataset', 'Method', 'Scale','Decor','Mask']
idx = raw_results.groupby(group_cols + ['Lr'])['AUC'].mean().groupby(group_cols).idxmax()
best_lr_combinations = pd.DataFrame(idx.tolist(), columns=group_cols + ['Lr'])
filtered_results = pd.merge(raw_results, best_lr_combinations, on=group_cols + ['Lr'])
summary_df = filtered_results.groupby(group_cols).agg(
    Mean_AUC=('AUC', 'mean'),
    Std_AUC=('AUC', 'std'),
    Mean_Logloss=('Logloss', 'mean'),
    Std_Logloss=('Logloss', 'std'),
    Run_Count=('Seed', 'count')  # 统计每个组有多少个不同的种子运行
).reset_index()
summary_df