In [1]:
%cd ../../../

/Users/nseverin/MyData/Projects/Science/LLM/sasrec-bert4rec-recsys23


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
from collections import defaultdict
import re
import numpy as np
import pandas as pd



def open_text(filename): 
    with open(filename) as f:
        return f.read()


def parse_file_simple(log_data):
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    
    for line in log_data.splitlines():        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results['simple'][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def parse_file_llm(log_data):
    # Regex patterns
    config_pattern = re.compile(r"experiments(.+?\.yaml)")
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    current_config = None
        
    for line in log_data.splitlines():
        config_match = config_pattern.search(line)
        if config_match:
            current_config = config_match.group(1)
        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match and current_config:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results[current_config][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def find_max(results, config, metric='NDCG@10'):
    def get_dct_epoch(lst, epoch):
        for x in lst:
            if x['epoch'] == epoch:
                return x
        return lst[-1]
        # raise Exception()
    
    def postprocess(lst):
        res = []
        for dct in lst:
            res.append({
                'NDCG@10': round(dct['NDCG@10'], 5),
                'Recall@10': round(dct['Recall@10'], 5),
                'epoch': dct['epoch']
            })
        return res
    
    best_val_dct = None
    for dct in results[config]['validation_metrics']:
        if best_val_dct is None:
            best_val_dct = dct
        elif dct[metric] > best_val_dct[metric]:
            best_val_dct = dct
    
    best_val_epoch = best_val_dct['epoch']
    
    return {
        'test': postprocess([get_dct_epoch(results[config]['test_metrics'], best_val_epoch)]),
        'validation': postprocess([best_val_dct]),
    }


def create_dataframe(exp_data_with_max_val):
    # List to store extracted experiment data
    data = []
    
    # Parse experiment results
    for config_path, metrics in exp_data_with_max_val.items():
        # Extract config details from the filename
        config_file = config_path.split("/")[-1]  # Get only the filename
        config_name = config_file.replace(".yaml", "").lstrip('sasrec-').split("-")
        
        hidden_units = int(config_name[0])
        num_blocks = int(config_name[1])
        num_heads = int(config_name[2])
        dropout_rate = float(config_name[3])
        learning_rate = float(config_name[4])
        seed = int(config_name[5]) if len(config_name) == 6 else 42
        
        # Extract validation and test metrics
        val_metrics = metrics.get("validation", [{}])[0]
        test_metrics = metrics.get("test", [{}])[0]
    
        # Append extracted values to data list
        data.append([
            config_file, hidden_units, num_blocks, num_heads, dropout_rate, learning_rate, seed,
            val_metrics.get("NDCG@10", None), val_metrics.get("Recall@10", None), val_metrics.get("epoch", None),
            test_metrics.get("NDCG@10", None), test_metrics.get("Recall@10", None), test_metrics.get("epoch", None)
        ])
    
    # Create DataFrame
    columns = [
        "config_file", "hidden_units", "num_blocks", "num_heads", "dropout_rate", "learning_rate", "seed",
        "val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10", "test_epoch"
        ]
    
    df = pd.DataFrame(data, columns=columns)
    return df


PARAMS = ['hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate']


def apply_seed_info(df):
    df['params'] = df.apply(lambda row: (row['hidden_units'],
                                         row['num_blocks'], 
                                         row['num_heads'],
                                         row['dropout_rate'], 
                                         row['learning_rate']), axis=1)
    params2seeds = {}
    for params, seed in zip(df['params'], df['seed']):
        if params not in params2seeds:
            params2seeds[params] = []
        params2seeds[params].append(seed)
    df['all_seeds'] = df.apply(lambda row: (params2seeds[row['params']]), axis=1)
    return df

In [3]:
MODEL_NAME = 'sasrec'
DATASET = 'ml20m'
EXPERIMENT_NAME = 'baseline'
SPLIT_NAME = 'general'
MODE = 'BASELINE'

local_directory = f"experiments-2_0/results/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}" 

seed_folders = ['single_seed', 'other_seed']

In [4]:
import os


results = {}
for seed_folder in seed_folders:
    cur_folder = os.path.join(local_directory, seed_folder)
    for file in os.listdir(cur_folder):
        log_data = open_text(f'{cur_folder}/{file}')
        cur_results = parse_file_llm(log_data)
        # if cur_results['simple']['validation_metrics'] == []:
        #     continue
        results.update(cur_results)
    
    
exp_data_with_max_val = {}
for config in results:
    exp_data_with_max_val[config] = find_max(results, config, metric='NDCG@10')
exp_data_with_max_val.keys()

dict_keys(['-2_0/configs/sasrec/ml20m/baseline/single_seed/64-4-8-0.2-0.0005-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/64-4-8-0.2-0.00075-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/64-4-8-0.2-0.001-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/64-4-8-0.3-0.0005-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/64-4-8-0.3-0.00075-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/64-4-8-0.3-0.001-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/128-2-8-0.2-0.0005-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/128-2-8-0.2-0.00075-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/128-2-8-0.2-0.001-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/128-2-8-0.3-0.0005-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/128-2-8-0.3-0.00075-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/128-2-8-0.3-0.001-42.yaml', '-2_0/configs/sasrec/ml20m/baseline/single_seed/256-2-8-0.2-0.0005-42.y

In [5]:
df = create_dataframe(exp_data_with_max_val)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,64-4-8-0.2-0.0005-42.yaml,64,4,8,0.2,0.00050,42,0.12264,0.22098,90,0.03476,0.07085,90
1,64-4-8-0.2-0.00075-42.yaml,64,4,8,0.2,0.00075,42,0.12395,0.22210,99,0.03280,0.06528,99
2,64-4-8-0.2-0.001-42.yaml,64,4,8,0.2,0.00100,42,0.12403,0.22308,100,0.03472,0.06998,100
3,64-4-8-0.3-0.0005-42.yaml,64,4,8,0.3,0.00050,42,0.11424,0.20989,95,0.03515,0.07304,95
4,64-4-8-0.3-0.00075-42.yaml,64,4,8,0.3,0.00075,42,0.11606,0.21189,96,0.03411,0.07048,96
...,...,...,...,...,...,...,...,...,...,...,...,...,...
178,256-2-8-0.3-0.001-1.yaml,256,2,8,0.3,0.00100,1,0.16679,0.26997,100,0.04566,0.07874,100
179,256-2-8-0.3-0.001-256.yaml,256,2,8,0.3,0.00100,256,0.16488,0.26943,100,0.04772,0.08303,100
180,256-4-2-0.2-0.001-1.yaml,256,4,2,0.2,0.00100,1,0.17783,0.28419,97,0.04192,0.07380,97
181,256-4-2-0.2-0.001-256.yaml,256,4,2,0.2,0.00100,256,0.17802,0.28356,100,0.04129,0.07223,100


In [6]:
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,64-4-8-0.2-0.0005-42.yaml,64,4,8,0.2,0.00050,42,0.12264,0.22098,90,0.03476,0.07085,90
1,64-4-8-0.2-0.00075-42.yaml,64,4,8,0.2,0.00075,42,0.12395,0.22210,99,0.03280,0.06528,99
2,64-4-8-0.2-0.001-42.yaml,64,4,8,0.2,0.00100,42,0.12403,0.22308,100,0.03472,0.06998,100
3,64-4-8-0.3-0.0005-42.yaml,64,4,8,0.3,0.00050,42,0.11424,0.20989,95,0.03515,0.07304,95
4,64-4-8-0.3-0.00075-42.yaml,64,4,8,0.3,0.00075,42,0.11606,0.21189,96,0.03411,0.07048,96
...,...,...,...,...,...,...,...,...,...,...,...,...,...
178,256-2-8-0.3-0.001-1.yaml,256,2,8,0.3,0.00100,1,0.16679,0.26997,100,0.04566,0.07874,100
179,256-2-8-0.3-0.001-256.yaml,256,2,8,0.3,0.00100,256,0.16488,0.26943,100,0.04772,0.08303,100
180,256-4-2-0.2-0.001-1.yaml,256,4,2,0.2,0.00100,1,0.17783,0.28419,97,0.04192,0.07380,97
181,256-4-2-0.2-0.001-256.yaml,256,4,2,0.2,0.00100,256,0.17802,0.28356,100,0.04129,0.07223,100


In [7]:
df = apply_seed_info(df)
df.sort_values(by='test_NDCG@10', ascending=False, inplace=True)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
81,256-4-4-0.2-0.00075-42.yaml,256,4,4,0.2,0.00075,42,0.16923,0.27246,99,0.05034,0.08566,99,"(256, 4, 4, 0.2, 0.00075)","[42, 1, 256]"
179,256-2-8-0.3-0.001-256.yaml,256,2,8,0.3,0.00100,256,0.16488,0.26943,100,0.04772,0.08303,100,"(256, 2, 8, 0.3, 0.001)","[42, 1, 256]"
132,256-2-2-0.3-0.001-256.yaml,256,2,2,0.3,0.00100,256,0.16216,0.26261,98,0.04651,0.08031,98,"(256, 2, 2, 0.3, 0.001)","[42, 1, 256]"
42,256-4-8-0.2-0.00075-42.yaml,256,4,8,0.2,0.00075,42,0.17929,0.28490,91,0.04604,0.07896,91,"(256, 4, 8, 0.2, 0.00075)","[42, 1, 256]"
112,256-2-4-0.3-0.001-256.yaml,256,2,4,0.3,0.00100,256,0.16336,0.26774,98,0.04599,0.07880,98,"(256, 2, 4, 0.3, 0.001)","[42, 1, 256]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18,64-2-4-0.2-0.0005-42.yaml,64,2,4,0.2,0.00050,42,0.11549,0.21238,96,0.02897,0.06034,96,"(64, 2, 4, 0.2, 0.0005)",[42]
43,256-4-8-0.2-0.001-42.yaml,256,4,8,0.2,0.00100,42,0.03224,0.06441,92,0.01928,0.04053,92,"(256, 4, 8, 0.2, 0.001)",[42]
84,256-4-4-0.3-0.00075-42.yaml,256,4,4,0.3,0.00075,42,0.02161,0.04493,37,0.01860,0.03975,37,"(256, 4, 4, 0.3, 0.00075)",[42]
82,256-4-4-0.2-0.001-42.yaml,256,4,4,0.2,0.00100,42,0.03217,0.06498,68,0.01849,0.03943,68,"(256, 4, 4, 0.2, 0.001)",[42]


In [9]:
df_all = df[df['all_seeds'].apply(len) == 3]

In [10]:
def get_stats_seeds(df_all):
    metric_cols = ["val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10"]
        
    grouped_df = df_all.drop(['config_file', 'all_seeds', 'seed'], axis=1).groupby('params')
    df_mean = grouped_df.agg('mean')
    df_std = grouped_df.agg('std')
    for col in metric_cols:
        df_mean[col + '__std'] = df_std[col]
    return df_mean


def reorder_cols(df):
    order = ['val_epoch', 'val_epoch__std', 'hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate', 'val_NDCG@10', 'val_NDCG@10__std', 'val_Recall@10', 'val_Recall@10__std', 'test_NDCG@10', 'test_NDCG@10__std', 'test_Recall@10', 'test_Recall@10__std']
    return df[order]


df_all = df[df['all_seeds'].apply(len) == 3]
df_final = get_stats_seeds(df_all)
df_final = reorder_cols(df_final)
df_final.sort_values(by='test_NDCG@10', ascending=False, inplace=True)
df_final

Unnamed: 0_level_0,val_epoch,val_epoch__std,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_NDCG@10__std,val_Recall@10,val_Recall@10__std,test_NDCG@10,test_NDCG@10__std,test_Recall@10,test_Recall@10__std
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
"(256, 2, 8, 0.3, 0.001)",99.666667,0.57735,256.0,2.0,8.0,0.3,0.001,0.165983,0.000989,0.269477,0.000472,0.04527,0.002666,0.078783,0.004225
"(256, 4, 4, 0.2, 0.00075)",98.0,1.732051,256.0,4.0,4.0,0.2,0.00075,0.1758,0.005723,0.280963,0.007423,0.044903,0.005376,0.077917,0.007671
"(256, 2, 2, 0.3, 0.001)",97.333333,2.081666,256.0,2.0,2.0,0.3,0.001,0.16156,0.0006,0.261987,0.000556,0.044803,0.001746,0.077353,0.002733
"(256, 2, 4, 0.3, 0.001)",82.666667,28.307832,256.0,2.0,4.0,0.3,0.001,0.16231,0.003034,0.266103,0.003753,0.044443,0.00155,0.077093,0.00156
"(256, 2, 8, 0.3, 0.00075)",91.333333,8.020806,256.0,2.0,8.0,0.3,0.00075,0.16448,0.000923,0.268247,0.001497,0.043863,0.000569,0.076217,0.001505
"(256, 4, 8, 0.3, 0.0005)",98.333333,1.527525,256.0,4.0,8.0,0.3,0.0005,0.173447,0.001276,0.27986,0.0022,0.043347,0.000646,0.075737,0.001466
"(256, 2, 8, 0.2, 0.001)",97.0,2.645751,256.0,2.0,8.0,0.2,0.001,0.171103,0.000523,0.27447,0.001021,0.042693,0.001883,0.074723,0.003499
"(256, 2, 4, 0.3, 0.00075)",95.0,6.928203,256.0,2.0,4.0,0.3,0.00075,0.162493,0.001034,0.265747,0.002548,0.04219,0.001816,0.07438,0.002885
"(256, 4, 8, 0.2, 0.00075)",93.0,2.645751,256.0,4.0,8.0,0.2,0.00075,0.17841,0.001195,0.284677,0.000195,0.042037,0.003559,0.073733,0.004567
"(256, 2, 8, 0.3, 0.0005)",96.666667,2.081666,256.0,2.0,8.0,0.3,0.0005,0.162797,0.000404,0.266773,0.001097,0.04115,0.00039,0.072263,0.000906


In [12]:
df_final.to_csv('beauty_baseline_runs.csv', index=False)

In [8]:
# df.to_csv('kion_baseline_all_runs.csv', index=False)
df[PARAMS].to_json('ml20m_baseline_all_runs.json', index=False, orient="records")


In [56]:
grouped_df.get_group((128, 2, 2, 0.2, 0.0005))

Unnamed: 0,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params
519,128,2,2,0.2,0.0005,0.02381,0.04742,11,0.01059,0.02176,11,"(128, 2, 2, 0.2, 0.0005)"
122,128,2,2,0.2,0.0005,0.0238,0.04668,14,0.01046,0.02176,14,"(128, 2, 2, 0.2, 0.0005)"
520,128,2,2,0.2,0.0005,0.02281,0.04608,11,0.00994,0.02027,11,"(128, 2, 2, 0.2, 0.0005)"


In [None]:
df_all.