In [1]:
%cd ../../../

/Users/nseverin/MyData/Projects/Science/LLM/sasrec-bert4rec-recsys23


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
from collections import defaultdict
import re
import numpy as np
import pandas as pd



def open_text(filename): 
    with open(filename) as f:
        return f.read()


def parse_file_simple(log_data):
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    
    for line in log_data.splitlines():        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results['simple'][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def parse_file_llm(log_data):
    # Regex patterns
    config_pattern = re.compile(r"experiments(.+?\.yaml)")
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    current_config = None
        
    for line in log_data.splitlines():
        config_match = config_pattern.search(line)
        if config_match:
            current_config = config_match.group(1)
        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match and current_config:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results[current_config][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def find_max(results, config, metric='NDCG@10'):
    def get_dct_epoch(lst, epoch):
        for x in lst:
            if x['epoch'] == epoch:
                return x
        return lst[-1]
        # raise Exception()
    
    def postprocess(lst):
        res = []
        for dct in lst:
            res.append({
                'NDCG@10': round(dct['NDCG@10'], 5),
                'Recall@10': round(dct['Recall@10'], 5),
                'epoch': dct['epoch']
            })
        return res
    
    best_val_dct = None
    for dct in results[config]['validation_metrics']:
        if best_val_dct is None:
            best_val_dct = dct
        elif dct[metric] > best_val_dct[metric]:
            best_val_dct = dct
    
    best_val_epoch = best_val_dct['epoch']
    
    return {
        'test': postprocess([get_dct_epoch(results[config]['test_metrics'], best_val_epoch)]),
        'validation': postprocess([best_val_dct]),
    }


def create_dataframe(exp_data_with_max_val):
    # List to store extracted experiment data
    data = []
    
    # Parse experiment results
    for config_path, metrics in exp_data_with_max_val.items():
        # Extract config details from the filename
        config_file = config_path.split("/")[-1]  # Get only the filename
        config_name = config_file.replace(".yaml", "").lstrip('sasrec-').split("-")
        
        hidden_units = int(config_name[0])
        num_blocks = int(config_name[1])
        num_heads = int(config_name[2])
        dropout_rate = float(config_name[3])
        learning_rate = float(config_name[4])
        seed = int(config_name[5]) if len(config_name) == 6 else 42
        
        # Extract validation and test metrics
        val_metrics = metrics.get("validation", [{}])[0]
        test_metrics = metrics.get("test", [{}])[0]
    
        # Append extracted values to data list
        data.append([
            config_file, hidden_units, num_blocks, num_heads, dropout_rate, learning_rate, seed,
            val_metrics.get("NDCG@10", None), val_metrics.get("Recall@10", None), val_metrics.get("epoch", None),
            test_metrics.get("NDCG@10", None), test_metrics.get("Recall@10", None), test_metrics.get("epoch", None)
        ])
    
    # Create DataFrame
    columns = [
        "config_file", "hidden_units", "num_blocks", "num_heads", "dropout_rate", "learning_rate", "seed",
        "val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10", "test_epoch"
        ]
    
    df = pd.DataFrame(data, columns=columns)
    return df


def apply_seed_info(df):
    df['params'] = df.apply(lambda row: (row['hidden_units'],
                                         row['num_blocks'], 
                                         row['num_heads'],
                                         row['dropout_rate'], 
                                         row['learning_rate']), axis=1)
    params2seeds = {}
    for params, seed in zip(df['params'], df['seed']):
        if params not in params2seeds:
            params2seeds[params] = []
        params2seeds[params].append(seed)
    df['all_seeds'] = df.apply(lambda row: (params2seeds[row['params']]), axis=1)
    return df

In [3]:
MODEL_NAME = 'sasrec'
DATASET = 'beauty'
EXPERIMENT_NAME = 'baseline'
SPLIT_NAME = 'general'
MODE = 'BASELINE'

local_directory = f"experiments-2_0/results/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}" 

seed_folders = ['single_seed', 'other_seed']

In [4]:
import os


results = {}
for seed_folder in seed_folders:
    cur_folder = os.path.join(local_directory, seed_folder)
    for file in os.listdir(cur_folder):
        log_data = open_text(f'{cur_folder}/{file}')
        cur_results = parse_file_llm(log_data)
        # if cur_results['simple']['validation_metrics'] == []:
        #     continue
        results.update(cur_results)
    
    
exp_data_with_max_val = {}
for config in results:
    exp_data_with_max_val[config] = find_max(results, config, metric='NDCG@10')
exp_data_with_max_val.keys()

dict_keys(['/configs/new_exps_2025/beauty/sasrec-64-4-2-0.3-0.0005.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-2-0.3-0.001.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-2-0.3-0.005.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-2-0.3-0.01.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.1-0.0001.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.1-0.0005.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.1-0.001.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.1-0.005.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.1-0.01.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.2-0.0001.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.2-0.0005.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.2-0.001.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.2-0.005.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.2-0.01.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.3-0.0001.yaml', '/configs/new_exps_2025/beauty/sasrec-64-4-4-0.3-0.0005.

In [5]:
df = create_dataframe(exp_data_with_max_val)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,sasrec-64-4-2-0.3-0.0005.yaml,64,4,2,0.3,0.0005,42,0.02436,0.04742,19,0.01025,0.02045,19
1,sasrec-64-4-2-0.3-0.001.yaml,64,4,2,0.3,0.0010,42,0.02357,0.04757,13,0.01073,0.02289,13
2,sasrec-64-4-2-0.3-0.005.yaml,64,4,2,0.3,0.0050,42,0.00410,0.00847,117,0.00223,0.00422,117
3,sasrec-64-4-2-0.3-0.01.yaml,64,4,2,0.3,0.0100,42,0.00413,0.00862,109,0.00244,0.00422,109
4,sasrec-64-4-4-0.1-0.0001.yaml,64,4,4,0.1,0.0001,42,0.02180,0.04177,82,0.00958,0.01855,82
...,...,...,...,...,...,...,...,...,...,...,...,...,...
550,64-2-4-0.2-0.001-1.yaml,64,2,4,0.2,0.0010,1,0.02244,0.04460,12,0.01053,0.02176,12
551,64-2-4-0.2-0.001-256.yaml,64,2,4,0.2,0.0010,256,0.02208,0.04475,10,0.01013,0.02098,10
552,64-2-4-0.3-0.0005-1.yaml,64,2,4,0.3,0.0005,1,0.02226,0.04475,20,0.01011,0.02033,20
553,64-2-4-0.3-0.0005-256.yaml,64,2,4,0.3,0.0005,256,0.02275,0.04385,22,0.00951,0.01962,22


In [None]:
df

In [6]:
df = apply_seed_info(df)
df.sort_values(by='test_NDCG@10', ascending=False, inplace=True)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
83,sasrec-256-2-2-0.3-0.001.yaml,256,2,2,0.3,0.0010,42,0.02298,0.04608,6,0.01133,0.02360,6,"(256, 2, 2, 0.3, 0.001)","[42, 1]"
272,sasrec-64-1-8-0.2-0.01.yaml,64,1,8,0.2,0.0100,42,0.02133,0.04147,5,0.01111,0.02229,5,"(64, 1, 8, 0.2, 0.01)",[42]
468,128-4-2-0.3-0.0005-1.yaml,128,4,2,0.3,0.0005,1,0.02390,0.04816,13,0.01100,0.02312,13,"(128, 4, 2, 0.3, 0.0005)","[42, 1, 256]"
82,sasrec-256-2-2-0.3-0.0005.yaml,256,2,2,0.3,0.0005,42,0.02413,0.04683,12,0.01098,0.02366,12,"(256, 2, 2, 0.3, 0.0005)","[42, 1, 256]"
181,sasrec-256-3-2-0.2-0.0005.yaml,256,3,2,0.2,0.0005,42,0.02423,0.04846,11,0.01094,0.02312,11,"(256, 3, 2, 0.2, 0.0005)",[42]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,sasrec-128-4-2-0.2-0.01.yaml,128,4,2,0.2,0.0100,42,0.00468,0.01011,123,0.00178,0.00410,123,"(128, 4, 2, 0.2, 0.01)",[42]
430,sasrec-128-4-8-0.3-0.005.yaml,128,4,8,0.3,0.0050,42,0.00447,0.00907,5,0.00176,0.00357,5,"(128, 4, 8, 0.3, 0.005)",[42]
12,sasrec-64-4-4-0.2-0.005.yaml,64,4,4,0.2,0.0050,42,0.00426,0.00907,46,0.00164,0.00315,46,"(64, 4, 4, 0.2, 0.005)",[42]
7,sasrec-64-4-4-0.1-0.005.yaml,64,4,4,0.1,0.0050,42,0.00453,0.00996,107,0.00160,0.00374,107,"(64, 4, 4, 0.1, 0.005)",[42]


In [9]:
df_all = df[df['all_seeds'].apply(len) == 3]
df_all[df_all['seed'] == 1]

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
468,128-4-2-0.3-0.0005-1.yaml,128,4,2,0.3,0.0005,1,0.0239,0.04816,13,0.011,0.02312,13,"(128, 4, 2, 0.3, 0.0005)","[42, 1, 256]"
498,64-4-2-0.3-0.001-1.yaml,64,4,2,0.3,0.001,1,0.02429,0.04697,12,0.01088,0.02205,12,"(64, 4, 2, 0.3, 0.001)","[42, 1, 256]"
539,128-2-4-0.3-0.0005-1.yaml,128,2,4,0.3,0.0005,1,0.02328,0.04638,21,0.01084,0.02289,21,"(128, 2, 4, 0.3, 0.0005)","[42, 1, 256]"
490,64-4-4-0.3-0.001-1.yaml,64,4,4,0.3,0.001,1,0.02339,0.04534,12,0.01075,0.02217,12,"(64, 4, 4, 0.3, 0.001)","[42, 1, 256]"
543,128-4-2-0.2-0.0005-1.yaml,128,4,2,0.2,0.0005,1,0.02349,0.04623,11,0.01074,0.02205,11,"(128, 4, 2, 0.2, 0.0005)","[42, 1, 256]"
541,128-2-4-0.3-0.001-1.yaml,128,2,4,0.3,0.001,1,0.02264,0.04653,8,0.01073,0.02235,8,"(128, 2, 4, 0.3, 0.001)","[42, 1, 256]"
519,128-2-2-0.2-0.0005-1.yaml,128,2,2,0.2,0.0005,1,0.02381,0.04742,11,0.01059,0.02176,11,"(128, 2, 2, 0.2, 0.0005)","[42, 1, 256]"
550,64-2-4-0.2-0.001-1.yaml,64,2,4,0.2,0.001,1,0.02244,0.0446,12,0.01053,0.02176,12,"(64, 2, 4, 0.2, 0.001)","[42, 1, 256]"
472,128-4-4-0.2-0.0005-1.yaml,128,4,4,0.2,0.0005,1,0.02398,0.04712,10,0.0105,0.0211,10,"(128, 4, 4, 0.2, 0.0005)","[42, 1, 256]"
488,64-4-4-0.3-0.0005-1.yaml,64,4,4,0.3,0.0005,1,0.02398,0.04608,18,0.01047,0.02003,18,"(64, 4, 4, 0.3, 0.0005)","[42, 1, 256]"


In [11]:
def get_stats_seeds(df_all):
    metric_cols = ["val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10"]
        
    grouped_df = df_all.drop(['config_file', 'all_seeds', 'seed'], axis=1).groupby('params')
    df_mean = grouped_df.agg('mean')
    df_std = grouped_df.agg('std')
    for col in metric_cols:
        df_mean[col + '__std'] = df_std[col]
    return df_mean


def reorder_cols(df):
    order = ['val_epoch', 'val_epoch__std', 'hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate', 'val_NDCG@10', 'val_NDCG@10__std', 'val_Recall@10', 'val_Recall@10__std', 'test_NDCG@10', 'test_NDCG@10__std', 'test_Recall@10', 'test_Recall@10__std']
    return df[order]


df_all = df[df['all_seeds'].apply(len) == 3]
df_final = get_stats_seeds(df_all)
df_final = reorder_cols(df_final)
df_final.sort_values(by='test_NDCG@10', ascending=False, inplace=True)
df_final

Unnamed: 0_level_0,val_epoch,val_epoch__std,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_NDCG@10__std,val_Recall@10,val_Recall@10__std,test_NDCG@10,test_NDCG@10__std,test_Recall@10,test_Recall@10__std
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
"(64, 4, 2, 0.3, 0.001)",11.666667,1.527525,64.0,4.0,2.0,0.3,0.001,0.023827,0.000402,0.04717,0.000346,0.01067,0.000246,0.021717,0.001371
"(256, 2, 2, 0.2, 0.0005)",10.0,1.0,256.0,2.0,2.0,0.2,0.0005,0.023767,0.000208,0.046777,0.000522,0.01061,0.000167,0.022807,0.00061
"(128, 2, 4, 0.3, 0.0005)",16.333333,5.033223,128.0,2.0,4.0,0.3,0.0005,0.023517,0.000335,0.04762,0.001193,0.010593,0.00024,0.022173,0.000902
"(128, 4, 2, 0.3, 0.0005)",12.333333,0.57735,128.0,4.0,2.0,0.3,0.0005,0.024153,0.000405,0.048163,0.000895,0.01055,0.000404,0.021657,0.001334
"(64, 4, 4, 0.3, 0.001)",11.666667,1.527525,64.0,4.0,4.0,0.3,0.001,0.023287,0.000503,0.046083,0.001419,0.010547,0.000178,0.0214,0.000742
"(256, 2, 4, 0.2, 0.0005)",9.0,1.0,256.0,2.0,4.0,0.2,0.0005,0.02379,0.000384,0.04772,0.001181,0.01045,0.000118,0.02223,6e-05
"(256, 2, 2, 0.3, 0.0005)",10.0,2.0,256.0,2.0,2.0,0.3,0.0005,0.02375,0.000333,0.047473,0.000601,0.01043,0.000632,0.022293,0.001576
"(128, 2, 2, 0.3, 0.0005)",16.666667,4.041452,128.0,2.0,2.0,0.3,0.0005,0.023597,0.000153,0.047273,0.001293,0.01037,0.00026,0.021917,0.00071
"(64, 4, 4, 0.3, 0.0005)",20.666667,3.785939,64.0,4.0,4.0,0.3,0.0005,0.02376,0.000192,0.045933,0.000254,0.010333,0.000187,0.020367,0.000532
"(128, 2, 2, 0.2, 0.0005)",12.0,1.732051,128.0,2.0,2.0,0.2,0.0005,0.023473,0.000574,0.046727,0.000671,0.01033,0.000344,0.021263,0.00086


In [12]:
df_final.to_csv('beauty_baseline_runs.csv', index=False)

In [13]:
df.to_csv('beauty_baseline_all_runs.csv', index=False)

In [56]:
grouped_df.get_group((128, 2, 2, 0.2, 0.0005))

Unnamed: 0,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params
519,128,2,2,0.2,0.0005,0.02381,0.04742,11,0.01059,0.02176,11,"(128, 2, 2, 0.2, 0.0005)"
122,128,2,2,0.2,0.0005,0.0238,0.04668,14,0.01046,0.02176,14,"(128, 2, 2, 0.2, 0.0005)"
520,128,2,2,0.2,0.0005,0.02281,0.04608,11,0.00994,0.02027,11,"(128, 2, 2, 0.2, 0.0005)"


In [None]:
df_all.