In [1]:
%cd ../../../

/Users/nseverin/MyData/Projects/Science/LLM/sasrec-bert4rec-recsys23


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
from collections import defaultdict
import re
import numpy as np
import pandas as pd



def open_text(filename): 
    with open(filename) as f:
        return f.read()


def parse_file_simple(log_data):
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    
    for line in log_data.splitlines():        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results['simple'][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def parse_file_llm(log_data):
    # Regex patterns
    config_pattern = re.compile(r"experiments(.+?\.yaml)")
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    current_config = None
        
    for line in log_data.splitlines():
        config_match = config_pattern.search(line)
        if config_match:
            current_config = config_match.group(1)
        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match and current_config:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results[current_config][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def find_max(results, config, metric='NDCG@10'):
    def get_dct_epoch(lst, epoch):
        for x in lst:
            if x['epoch'] == epoch:
                return x
        return lst[-1]
        # raise Exception()
    
    def postprocess(lst):
        res = []
        for dct in lst:
            res.append({
                'NDCG@10': round(dct['NDCG@10'], 5),
                'Recall@10': round(dct['Recall@10'], 5),
                'epoch': dct['epoch']
            })
        return res
    
    best_val_dct = None
    for dct in results[config]['validation_metrics']:
        if best_val_dct is None:
            best_val_dct = dct
        elif dct[metric] > best_val_dct[metric]:
            best_val_dct = dct
    
    best_val_epoch = best_val_dct['epoch']
    
    return {
        'test': postprocess([get_dct_epoch(results[config]['test_metrics'], best_val_epoch)]),
        'validation': postprocess([best_val_dct]),
    }


def create_dataframe(exp_data_with_max_val):
    # List to store extracted experiment data
    data = []
    
    # Parse experiment results
    for config_path, metrics in exp_data_with_max_val.items():
        # Extract config details from the filename
        config_file = config_path.split("/")[-1]  # Get only the filename
        config_name = config_file.replace(".yaml", "").lstrip('sasrec-').split("-")
        
        hidden_units = int(config_name[0])
        num_blocks = int(config_name[1])
        num_heads = int(config_name[2])
        dropout_rate = float(config_name[3])
        learning_rate = float(config_name[4])
        seed = int(config_name[5]) if len(config_name) == 6 else 42
        
        # Extract validation and test metrics
        val_metrics = metrics.get("validation", [{}])[0]
        test_metrics = metrics.get("test", [{}])[0]
    
        # Append extracted values to data list
        data.append([
            config_file, hidden_units, num_blocks, num_heads, dropout_rate, learning_rate, seed,
            val_metrics.get("NDCG@10", None), val_metrics.get("Recall@10", None), val_metrics.get("epoch", None),
            test_metrics.get("NDCG@10", None), test_metrics.get("Recall@10", None), test_metrics.get("epoch", None)
        ])
    
    # Create DataFrame
    columns = [
        "config_file", "hidden_units", "num_blocks", "num_heads", "dropout_rate", "learning_rate", "seed",
        "val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10", "test_epoch"
        ]
    
    df = pd.DataFrame(data, columns=columns)
    return df


PARAMS = ['hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate']


def apply_seed_info(df):
    df['params'] = df.apply(lambda row: (row['hidden_units'],
                                         row['num_blocks'], 
                                         row['num_heads'],
                                         row['dropout_rate'], 
                                         row['learning_rate']), axis=1)
    params2seeds = {}
    for params, seed in zip(df['params'], df['seed']):
        if params not in params2seeds:
            params2seeds[params] = []
        params2seeds[params].append(seed)
    df['all_seeds'] = df.apply(lambda row: (params2seeds[row['params']]), axis=1)
    return df

In [3]:
MODEL_NAME = 'sasrec'
DATASET = 'amazon_m2'
EXPERIMENT_NAME = 'baseline'
SPLIT_NAME = 'general'
MODE = 'BASELINE'

local_directory = f"experiments-2_0/results/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}" 

seed_folders = ['single_seed', 'other_seed']

In [4]:
import os


results = {}
for seed_folder in seed_folders:
    cur_folder = os.path.join(local_directory, seed_folder)
    for file in os.listdir(cur_folder):
        log_data = open_text(f'{cur_folder}/{file}')
        cur_results = parse_file_llm(log_data)
        # if cur_results['simple']['validation_metrics'] == []:
        #     continue
        results.update(cur_results)
    
    
exp_data_with_max_val = {}
for config in results:
    exp_data_with_max_val[config] = find_max(results, config, metric='NDCG@10')
exp_data_with_max_val.keys()

dict_keys(['-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-4-8-0.3-0.001-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-2-2-0.2-0.0005-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-2-2-0.2-0.00075-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-2-2-0.2-0.001-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-2-2-0.3-0.0005-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-2-2-0.3-0.00075-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-2-2-0.3-0.001-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-2-4-0.2-0.0005-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-2-4-0.2-0.00075-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-2-4-0.2-0.001-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-2-4-0.3-0.0005-42.yaml', '-2_0/configs/sasrec/amazon_m2/baseline/single_seed/64-4-2-0.2-0.001-42.yaml', '-2_0/configs/sasrec/amazon_m2/

In [5]:
df = create_dataframe(exp_data_with_max_val)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,64-4-8-0.3-0.001-42.yaml,64,4,8,0.3,0.001,42,0.24795,0.37936,14,0.36083,0.53687,14
1,64-2-2-0.2-0.0005-42.yaml,64,2,2,0.2,0.0005,42,0.25735,0.39618,11,0.36569,0.54299,11
2,64-2-2-0.2-0.00075-42.yaml,64,2,2,0.2,0.00075,42,0.25095,0.38951,8,0.36619,0.54232,8
3,64-2-2-0.2-0.001-42.yaml,64,2,2,0.2,0.001,42,0.25043,0.37872,11,0.35447,0.52463,11
4,64-2-2-0.3-0.0005-42.yaml,64,2,2,0.3,0.0005,42,0.25627,0.39803,15,0.3763,0.55115,15
5,64-2-2-0.3-0.00075-42.yaml,64,2,2,0.3,0.00075,42,0.25455,0.38976,18,0.36998,0.5429,18
6,64-2-2-0.3-0.001-42.yaml,64,2,2,0.3,0.001,42,0.25263,0.38123,21,0.36267,0.53054,21
7,64-2-4-0.2-0.0005-42.yaml,64,2,4,0.2,0.0005,42,0.25206,0.38185,12,0.35515,0.52348,12
8,64-2-4-0.2-0.00075-42.yaml,64,2,4,0.2,0.00075,42,0.25639,0.38578,13,0.36593,0.53706,13
9,64-2-4-0.2-0.001-42.yaml,64,2,4,0.2,0.001,42,0.2509,0.38696,9,0.36802,0.54327,9


In [6]:
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,64-4-8-0.3-0.001-42.yaml,64,4,8,0.3,0.001,42,0.24795,0.37936,14,0.36083,0.53687,14
1,64-2-2-0.2-0.0005-42.yaml,64,2,2,0.2,0.0005,42,0.25735,0.39618,11,0.36569,0.54299,11
2,64-2-2-0.2-0.00075-42.yaml,64,2,2,0.2,0.00075,42,0.25095,0.38951,8,0.36619,0.54232,8
3,64-2-2-0.2-0.001-42.yaml,64,2,2,0.2,0.001,42,0.25043,0.37872,11,0.35447,0.52463,11
4,64-2-2-0.3-0.0005-42.yaml,64,2,2,0.3,0.0005,42,0.25627,0.39803,15,0.3763,0.55115,15
5,64-2-2-0.3-0.00075-42.yaml,64,2,2,0.3,0.00075,42,0.25455,0.38976,18,0.36998,0.5429,18
6,64-2-2-0.3-0.001-42.yaml,64,2,2,0.3,0.001,42,0.25263,0.38123,21,0.36267,0.53054,21
7,64-2-4-0.2-0.0005-42.yaml,64,2,4,0.2,0.0005,42,0.25206,0.38185,12,0.35515,0.52348,12
8,64-2-4-0.2-0.00075-42.yaml,64,2,4,0.2,0.00075,42,0.25639,0.38578,13,0.36593,0.53706,13
9,64-2-4-0.2-0.001-42.yaml,64,2,4,0.2,0.001,42,0.2509,0.38696,9,0.36802,0.54327,9


In [7]:
df = apply_seed_info(df)
df.sort_values(by='val_NDCG@10', ascending=False, inplace=True)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
36,128-4-8-0.2-0.0005-42.yaml,128,4,8,0.2,0.0005,42,0.26725,0.40285,7,0.36764,0.53922,7,"(128, 4, 8, 0.2, 0.0005)","[42, 1, 256]"
28,64-2-8-0.2-0.0005-42.yaml,64,2,8,0.2,0.0005,42,0.26468,0.40849,11,0.37045,0.54875,11,"(64, 2, 8, 0.2, 0.0005)","[42, 1, 256]"
15,64-4-4-0.2-0.0005-42.yaml,64,4,4,0.2,0.0005,42,0.26021,0.40112,11,0.36381,0.53675,11,"(64, 4, 4, 0.2, 0.0005)","[42, 1, 256]"
44,128-4-8-0.2-0.0005-256.yaml,128,4,8,0.2,0.0005,256,0.25924,0.39383,6,0.36594,0.54083,6,"(128, 4, 8, 0.2, 0.0005)","[42, 1, 256]"
43,128-4-8-0.2-0.0005-1.yaml,128,4,8,0.2,0.0005,1,0.25863,0.38731,7,0.35942,0.53489,7,"(128, 4, 8, 0.2, 0.0005)","[42, 1, 256]"
29,64-2-8-0.2-0.00075-42.yaml,64,2,8,0.2,0.00075,42,0.25814,0.40103,9,0.36626,0.54388,9,"(64, 2, 8, 0.2, 0.00075)","[42, 1, 256]"
21,64-4-4-0.2-0.00075-42.yaml,64,4,4,0.2,0.00075,42,0.25788,0.39469,10,0.36315,0.53806,10,"(64, 4, 4, 0.2, 0.00075)","[42, 1, 256]"
1,64-2-2-0.2-0.0005-42.yaml,64,2,2,0.2,0.0005,42,0.25735,0.39618,11,0.36569,0.54299,11,"(64, 2, 2, 0.2, 0.0005)",[42]
26,64-2-4-0.3-0.00075-42.yaml,64,2,4,0.3,0.00075,42,0.25674,0.39244,19,0.36986,0.54561,19,"(64, 2, 4, 0.3, 0.00075)",[42]
8,64-2-4-0.2-0.00075-42.yaml,64,2,4,0.2,0.00075,42,0.25639,0.38578,13,0.36593,0.53706,13,"(64, 2, 4, 0.2, 0.00075)",[42]


Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
176,256-4-4-0.2-0.00075-1.yaml,256,4,4,0.2,0.00075,1,0.1797,0.28428,99,0.04478,0.07777,99,"(256, 4, 4, 0.2, 0.00075)","[42, 1, 256]"


In [17]:
df_all = df[df['all_seeds'].apply(len) == 3]

In [8]:
def get_stats_seeds(df_all):
    metric_cols = ["val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10"]
        
    grouped_df = df_all.drop(['config_file', 'all_seeds', 'seed'], axis=1).groupby('params')
    df_mean = grouped_df.agg('mean')
    df_std = grouped_df.agg('std')
    for col in metric_cols:
        df_mean[col + '__std'] = df_std[col]
    return df_mean


def reorder_cols(df):
    order = ['val_epoch', 'val_epoch__std', 'hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate', 'val_NDCG@10', 'val_NDCG@10__std', 'val_Recall@10', 'val_Recall@10__std', 'test_NDCG@10', 'test_NDCG@10__std', 'test_Recall@10', 'test_Recall@10__std']
    return df[order]


df_all = df[df['all_seeds'].apply(len) == 3]
df_final = get_stats_seeds(df_all)
df_final = reorder_cols(df_final)
df_final.sort_values(by='val_NDCG@10', ascending=False, inplace=True)
df_final


# 0.045270
# 0.052483

Unnamed: 0_level_0,val_epoch,val_epoch__std,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_NDCG@10__std,val_Recall@10,val_Recall@10__std,test_NDCG@10,test_NDCG@10__std,test_Recall@10,test_Recall@10__std
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
"(128, 4, 8, 0.2, 0.0005)",6.666667,0.57735,128.0,4.0,8.0,0.2,0.0005,0.261707,0.00481,0.394663,0.007803,0.364333,0.004339,0.538313,0.003072
"(64, 2, 8, 0.2, 0.0005)",10.666667,2.516611,64.0,2.0,8.0,0.2,0.0005,0.257037,0.006619,0.395397,0.012792,0.364653,0.007146,0.53734,0.012324
"(64, 4, 4, 0.2, 0.0005)",10.0,1.0,64.0,4.0,4.0,0.2,0.0005,0.25412,0.005379,0.393313,0.006761,0.363383,0.001448,0.537797,0.002753
"(64, 4, 4, 0.2, 0.00075)",10.333333,1.527525,64.0,4.0,4.0,0.2,0.00075,0.253907,0.003664,0.38778,0.00736,0.36368,0.002291,0.539217,0.002578
"(64, 2, 8, 0.2, 0.00075)",10.666667,2.081666,64.0,2.0,8.0,0.2,0.00075,0.25283,0.004618,0.385467,0.013657,0.358063,0.007106,0.528643,0.013201


In [9]:
df_final.drop(index=(128, 4, 8, 0.2, 0.0005))[PARAMS[1:]].to_json('RESULTS_LATE/m2_base.json', index=False, orient="records")

In [9]:
df_final.drop(index=(128, 4, 8, 0.2, 0.0005)).to_csv('m2_baseline_final.csv')

In [29]:
df[df['params'] == (256, 4, 8, 0.2, 0.00075)]

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
42,256-4-8-0.2-0.00075-42.yaml,256,4,8,0.2,0.00075,42,0.17929,0.2849,91,0.04604,0.07896,91,"(256, 4, 8, 0.2, 0.00075)","[42, 1, 256]"
126,256-4-8-0.2-0.00075-256.yaml,256,4,8,0.2,0.00075,256,0.17889,0.28459,92,0.04084,0.07173,92,"(256, 4, 8, 0.2, 0.00075)","[42, 1, 256]"
125,256-4-8-0.2-0.00075-1.yaml,256,4,8,0.2,0.00075,1,0.17705,0.28454,96,0.03923,0.07051,96,"(256, 4, 8, 0.2, 0.00075)","[42, 1, 256]"


In [25]:
df_final.columns 

Index(['val_epoch', 'val_epoch__std', 'hidden_units', 'num_blocks',
       'num_heads', 'dropout_rate', 'learning_rate', 'val_NDCG@10',
       'val_NDCG@10__std', 'val_Recall@10', 'val_Recall@10__std',
       'test_NDCG@10', 'test_NDCG@10__std', 'test_Recall@10',
       'test_Recall@10__std'],
      dtype='object')

In [18]:
df_final.to_csv('ml20m_baseline_3seed_runs_NEW.csv', index=False)

In [19]:
df.to_csv('ml20m_baseline_all_runs_NEW.csv', index=False)

In [10]:
# df.to_csv('kion_baseline_all_runs.csv', index=False)
df[PARAMS].to_json('m2_baseline_all_runs.json', index=False, orient="records")


In [56]:
grouped_df.get_group((128, 2, 2, 0.2, 0.0005))

Unnamed: 0,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params
519,128,2,2,0.2,0.0005,0.02381,0.04742,11,0.01059,0.02176,11,"(128, 2, 2, 0.2, 0.0005)"
122,128,2,2,0.2,0.0005,0.0238,0.04668,14,0.01046,0.02176,14,"(128, 2, 2, 0.2, 0.0005)"
520,128,2,2,0.2,0.0005,0.02281,0.04608,11,0.00994,0.02027,11,"(128, 2, 2, 0.2, 0.0005)"


In [15]:
np.corrcoef(df_final['test_NDCG@10'].values[:15], df_final['val_NDCG@10'].values[:15])

array([[1.        , 0.27889656],
       [0.27889656, 1.        ]])