In [1]:
%cd ../../../

/Users/nseverin/MyData/Projects/Science/LLM/sasrec-bert4rec-recsys23


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [11]:
from collections import defaultdict
import re
import numpy as np
import pandas as pd



def open_text(filename): 
    with open(filename) as f:
        return f.read()


def parse_file_simple(log_data):
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    
    for line in log_data.splitlines():        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results['simple'][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def parse_file_llm(log_data):
    # Regex patterns
    config_pattern = re.compile(r"experiments(.+?\.yaml)")
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    current_config = None
        
    for line in log_data.splitlines():
        config_match = config_pattern.search(line)
        if config_match:
            current_config = config_match.group(1)
        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match and current_config:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results[current_config][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def find_max(results, config, metric='NDCG@10'):
    def get_dct_epoch(lst, epoch):
        for x in lst:
            if x['epoch'] == epoch:
                return x
        return lst[-1]
        # raise Exception()
    
    def postprocess(lst):
        res = []
        for dct in lst:
            res.append({
                'NDCG@10': round(dct['NDCG@10'], 5),
                'Recall@10': round(dct['Recall@10'], 5),
                'epoch': dct['epoch']
            })
        return res
    
    best_val_dct = None
    for dct in results[config]['validation_metrics']:
        if best_val_dct is None:
            best_val_dct = dct
        elif dct[metric] > best_val_dct[metric]:
            best_val_dct = dct
    
    best_val_epoch = best_val_dct['epoch']
    
    return {
        'test': postprocess([get_dct_epoch(results[config]['test_metrics'], best_val_epoch)]),
        'validation': postprocess([best_val_dct]),
    }


def create_dataframe(exp_data_with_max_val):
    # List to store extracted experiment data
    data = []
    
    # Parse experiment results
    for config_path, metrics in exp_data_with_max_val.items():
        # Extract config details from the filename
        config_file = config_path.split("/")[-1]  # Get only the filename
        config_name = config_file.replace(".yaml", "").lstrip('sasrec-').split("-")
        
        hidden_units = int(config_name[0])
        num_blocks = int(config_name[1])
        num_heads = int(config_name[2])
        dropout_rate = float(config_name[3])
        learning_rate = float(config_name[4])
        seed = int(config_name[5]) if len(config_name) == 6 else 42
        
        # Extract validation and test metrics
        val_metrics = metrics.get("validation", [{}])[0]
        test_metrics = metrics.get("test", [{}])[0]
    
        # Append extracted values to data list
        data.append([
            config_file, hidden_units, num_blocks, num_heads, dropout_rate, learning_rate, seed,
            val_metrics.get("NDCG@10", None), val_metrics.get("Recall@10", None), val_metrics.get("epoch", None),
            test_metrics.get("NDCG@10", None), test_metrics.get("Recall@10", None), test_metrics.get("epoch", None)
        ])
    
    # Create DataFrame
    columns = [
        "config_file", "hidden_units", "num_blocks", "num_heads", "dropout_rate", "learning_rate", "seed",
        "val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10", "test_epoch"
        ]
    
    df = pd.DataFrame(data, columns=columns)
    return df


PARAMS = ['hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate']



def apply_seed_info(df):
    df['params'] = df.apply(lambda row: (row['hidden_units'],
                                         row['num_blocks'], 
                                         row['num_heads'],
                                         row['dropout_rate'], 
                                         row['learning_rate']), axis=1)
    params2seeds = {}
    for params, seed in zip(df['params'], df['seed']):
        if params not in params2seeds:
            params2seeds[params] = []
        params2seeds[params].append(seed)
    df['all_seeds'] = df.apply(lambda row: (params2seeds[row['params']]), axis=1)
    return df

In [12]:
MODEL_NAME = 'sasrec'
DATASET = 'kion_en'
EXPERIMENT_NAME = 'baseline'
SPLIT_NAME = 'general'
MODE = 'BASELINE'

local_directory = f"experiments-2_0/results/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}" 

seed_folders = ['single_seed']

In [13]:
import os


results = {}
for seed_folder in seed_folders:
    cur_folder = os.path.join(local_directory, seed_folder)
    for file in os.listdir(cur_folder):
        log_data = open_text(f'{cur_folder}/{file}')
        cur_results = parse_file_llm(log_data)
        # if cur_results['simple']['validation_metrics'] == []:
        #     continue
        results.update(cur_results)
    
    
exp_data_with_max_val = {}
for config in results:
    exp_data_with_max_val[config] = find_max(results, config, metric='NDCG@10')
exp_data_with_max_val.keys()

dict_keys(['-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.2-0.0005-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.2-0.00075-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.2-0.001-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.3-0.0005-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.3-0.00075-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.3-0.001-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.2-0.0005-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.2-0.00075-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.2-0.001-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.3-0.0005-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.3-0.00075-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.3-0.001-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_se

In [14]:
df = create_dataframe(exp_data_with_max_val)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,256-2-8-0.2-0.0005-42.yaml,256,2,8,0.2,0.00050,42,0.09434,0.17987,3,0.05650,0.11146,3
1,256-2-8-0.2-0.00075-42.yaml,256,2,8,0.2,0.00075,42,0.09486,0.17959,3,0.05765,0.11144,3
2,256-2-8-0.2-0.001-42.yaml,256,2,8,0.2,0.00100,42,0.09681,0.17883,8,0.05492,0.10617,8
3,256-2-8-0.3-0.0005-42.yaml,256,2,8,0.3,0.00050,42,0.09376,0.18161,6,0.05549,0.11311,6
4,256-2-8-0.3-0.00075-42.yaml,256,2,8,0.3,0.00075,42,0.09493,0.17899,3,0.05730,0.11092,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,64-4-4-0.2-0.00075-42.yaml,64,4,4,0.2,0.00075,42,0.09886,0.18345,8,0.05949,0.11464,8
104,64-4-4-0.2-0.001-42.yaml,64,4,4,0.2,0.00100,42,0.09961,0.18498,6,0.05876,0.11414,6
105,64-4-4-0.3-0.0005-42.yaml,64,4,4,0.3,0.00050,42,0.09930,0.18394,12,0.05952,0.11433,12
106,64-4-4-0.3-0.00075-42.yaml,64,4,4,0.3,0.00075,42,0.09876,0.18297,12,0.05878,0.11349,12


In [15]:
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,256-2-8-0.2-0.0005-42.yaml,256,2,8,0.2,0.00050,42,0.09434,0.17987,3,0.05650,0.11146,3
1,256-2-8-0.2-0.00075-42.yaml,256,2,8,0.2,0.00075,42,0.09486,0.17959,3,0.05765,0.11144,3
2,256-2-8-0.2-0.001-42.yaml,256,2,8,0.2,0.00100,42,0.09681,0.17883,8,0.05492,0.10617,8
3,256-2-8-0.3-0.0005-42.yaml,256,2,8,0.3,0.00050,42,0.09376,0.18161,6,0.05549,0.11311,6
4,256-2-8-0.3-0.00075-42.yaml,256,2,8,0.3,0.00075,42,0.09493,0.17899,3,0.05730,0.11092,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
103,64-4-4-0.2-0.00075-42.yaml,64,4,4,0.2,0.00075,42,0.09886,0.18345,8,0.05949,0.11464,8
104,64-4-4-0.2-0.001-42.yaml,64,4,4,0.2,0.00100,42,0.09961,0.18498,6,0.05876,0.11414,6
105,64-4-4-0.3-0.0005-42.yaml,64,4,4,0.3,0.00050,42,0.09930,0.18394,12,0.05952,0.11433,12
106,64-4-4-0.3-0.00075-42.yaml,64,4,4,0.3,0.00075,42,0.09876,0.18297,12,0.05878,0.11349,12


In [16]:
df = apply_seed_info(df)
df.sort_values(by='test_NDCG@10', ascending=False, inplace=True)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
105,64-4-4-0.3-0.0005-42.yaml,64,4,4,0.3,0.00050,42,0.09930,0.18394,12,0.05952,0.11433,12,"(64, 4, 4, 0.3, 0.0005)",[42]
103,64-4-4-0.2-0.00075-42.yaml,64,4,4,0.2,0.00075,42,0.09886,0.18345,8,0.05949,0.11464,8,"(64, 4, 4, 0.2, 0.00075)",[42]
16,64-4-2-0.3-0.00075-42.yaml,64,4,2,0.3,0.00075,42,0.09864,0.18324,10,0.05924,0.11514,10,"(64, 4, 2, 0.3, 0.00075)",[42]
62,64-2-2-0.2-0.001-42.yaml,64,2,2,0.2,0.00100,42,0.09738,0.18210,5,0.05918,0.11357,5,"(64, 2, 2, 0.2, 0.001)",[42]
10,64-2-4-0.3-0.00075-42.yaml,64,2,4,0.3,0.00075,42,0.09768,0.18193,8,0.05907,0.11284,8,"(64, 2, 4, 0.3, 0.00075)",[42]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,256-4-2-0.2-0.001-42.yaml,256,4,2,0.2,0.00100,42,0.08238,0.14957,17,0.04548,0.08323,17,"(256, 4, 2, 0.2, 0.001)",[42]
28,256-4-4-0.3-0.00075-42.yaml,256,4,4,0.3,0.00075,42,0.08236,0.14957,16,0.04547,0.08323,16,"(256, 4, 4, 0.3, 0.00075)",[42]
49,256-4-8-0.2-0.00075-42.yaml,256,4,8,0.2,0.00075,42,0.08182,0.14930,17,0.04529,0.08379,17,"(256, 4, 8, 0.2, 0.00075)",[42]
52,256-4-8-0.3-0.00075-42.yaml,256,4,8,0.3,0.00075,42,0.08163,0.15022,17,0.04487,0.08377,17,"(256, 4, 8, 0.3, 0.00075)",[42]


In [9]:
df_all = df[df['all_seeds'].apply(len) == 3]
df_all[df_all['seed'] == 1]

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
468,128-4-2-0.3-0.0005-1.yaml,128,4,2,0.3,0.0005,1,0.0239,0.04816,13,0.011,0.02312,13,"(128, 4, 2, 0.3, 0.0005)","[42, 1, 256]"
498,64-4-2-0.3-0.001-1.yaml,64,4,2,0.3,0.001,1,0.02429,0.04697,12,0.01088,0.02205,12,"(64, 4, 2, 0.3, 0.001)","[42, 1, 256]"
539,128-2-4-0.3-0.0005-1.yaml,128,2,4,0.3,0.0005,1,0.02328,0.04638,21,0.01084,0.02289,21,"(128, 2, 4, 0.3, 0.0005)","[42, 1, 256]"
490,64-4-4-0.3-0.001-1.yaml,64,4,4,0.3,0.001,1,0.02339,0.04534,12,0.01075,0.02217,12,"(64, 4, 4, 0.3, 0.001)","[42, 1, 256]"
543,128-4-2-0.2-0.0005-1.yaml,128,4,2,0.2,0.0005,1,0.02349,0.04623,11,0.01074,0.02205,11,"(128, 4, 2, 0.2, 0.0005)","[42, 1, 256]"
541,128-2-4-0.3-0.001-1.yaml,128,2,4,0.3,0.001,1,0.02264,0.04653,8,0.01073,0.02235,8,"(128, 2, 4, 0.3, 0.001)","[42, 1, 256]"
519,128-2-2-0.2-0.0005-1.yaml,128,2,2,0.2,0.0005,1,0.02381,0.04742,11,0.01059,0.02176,11,"(128, 2, 2, 0.2, 0.0005)","[42, 1, 256]"
550,64-2-4-0.2-0.001-1.yaml,64,2,4,0.2,0.001,1,0.02244,0.0446,12,0.01053,0.02176,12,"(64, 2, 4, 0.2, 0.001)","[42, 1, 256]"
472,128-4-4-0.2-0.0005-1.yaml,128,4,4,0.2,0.0005,1,0.02398,0.04712,10,0.0105,0.0211,10,"(128, 4, 4, 0.2, 0.0005)","[42, 1, 256]"
488,64-4-4-0.3-0.0005-1.yaml,64,4,4,0.3,0.0005,1,0.02398,0.04608,18,0.01047,0.02003,18,"(64, 4, 4, 0.3, 0.0005)","[42, 1, 256]"


In [10]:
def get_stats_seeds(df_all):
    metric_cols = ["val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10"]
        
    grouped_df = df_all.drop(['config_file', 'all_seeds', 'seed'], axis=1).groupby('params')
    df_mean = grouped_df.agg('mean')
    df_std = grouped_df.agg('std')
    for col in metric_cols:
        df_mean[col + '__std'] = df_std[col]
    return df_mean


def reorder_cols(df):
    order = ['val_epoch', 'val_epoch__std', 'hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate', 'val_NDCG@10', 'val_NDCG@10__std', 'val_Recall@10', 'val_Recall@10__std', 'test_NDCG@10', 'test_NDCG@10__std', 'test_Recall@10', 'test_Recall@10__std']
    return df[order]


df_all = df[df['all_seeds'].apply(len) == 3]
df_final = get_stats_seeds(df_all)
df_final = reorder_cols(df_final)
df_final.sort_values(by='test_NDCG@10', ascending=False, inplace=True)
df_final

Unnamed: 0_level_0,val_epoch,val_epoch__std,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_NDCG@10__std,val_Recall@10,val_Recall@10__std,test_NDCG@10,test_NDCG@10__std,test_Recall@10,test_Recall@10__std
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1


In [12]:
df_final.to_csv('beauty_baseline_runs.csv', index=False)

In [18]:
# df.to_csv('kion_baseline_all_runs.csv', index=False)
df[PARAMS].to_json('kion_baseline_all_runs.json', index=False, orient="records")


In [56]:
grouped_df.get_group((128, 2, 2, 0.2, 0.0005))

Unnamed: 0,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params
519,128,2,2,0.2,0.0005,0.02381,0.04742,11,0.01059,0.02176,11,"(128, 2, 2, 0.2, 0.0005)"
122,128,2,2,0.2,0.0005,0.0238,0.04668,14,0.01046,0.02176,14,"(128, 2, 2, 0.2, 0.0005)"
520,128,2,2,0.2,0.0005,0.02281,0.04608,11,0.00994,0.02027,11,"(128, 2, 2, 0.2, 0.0005)"


In [None]:
df_all.