In [1]:
%cd ../../../

/Users/nseverin/MyData/Projects/Science/LLM/sasrec-bert4rec-recsys23


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
from collections import defaultdict
import re
import numpy as np
import pandas as pd



def open_text(filename): 
    with open(filename) as f:
        return f.read()


def parse_file_simple(log_data):
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    
    for line in log_data.splitlines():        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results['simple'][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def parse_file_llm(log_data):
    # Regex patterns
    config_pattern = re.compile(r"experiments(.+?\.yaml)")
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    current_config = None
        
    for line in log_data.splitlines():
        config_match = config_pattern.search(line)
        if config_match:
            current_config = config_match.group(1)
        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match and current_config:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results[current_config][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def find_max(results, config, metric='NDCG@10'):
    def get_dct_epoch(lst, epoch):
        for x in lst:
            if x['epoch'] == epoch:
                return x
        return lst[-1]
        # raise Exception()
    
    def postprocess(lst):
        res = []
        for dct in lst:
            res.append({
                'NDCG@10': round(dct['NDCG@10'], 5),
                'Recall@10': round(dct['Recall@10'], 5),
                'epoch': dct['epoch']
            })
        return res
    
    best_val_dct = None
    for dct in results[config]['validation_metrics']:
        if best_val_dct is None:
            best_val_dct = dct
        elif dct[metric] > best_val_dct[metric]:
            best_val_dct = dct
    
    best_val_epoch = best_val_dct['epoch']
    
    return {
        'test': postprocess([get_dct_epoch(results[config]['test_metrics'], best_val_epoch)]),
        'validation': postprocess([best_val_dct]),
    }


def create_dataframe(exp_data_with_max_val):
    # List to store extracted experiment data
    data = []
    
    # Parse experiment results
    for config_path, metrics in exp_data_with_max_val.items():
        # Extract config details from the filename
        config_file = config_path.split("/")[-1]  # Get only the filename
        config_name = config_file.replace(".yaml", "").lstrip('sasrec-').split("-")
        
        hidden_units = int(config_name[0])
        num_blocks = int(config_name[1])
        num_heads = int(config_name[2])
        dropout_rate = float(config_name[3])
        learning_rate = float(config_name[4])
        seed = int(config_name[5]) if len(config_name) == 6 else 42
        
        # Extract validation and test metrics
        val_metrics = metrics.get("validation", [{}])[0]
        test_metrics = metrics.get("test", [{}])[0]
    
        # Append extracted values to data list
        data.append([
            config_file, hidden_units, num_blocks, num_heads, dropout_rate, learning_rate, seed,
            val_metrics.get("NDCG@10", None), val_metrics.get("Recall@10", None), val_metrics.get("epoch", None),
            test_metrics.get("NDCG@10", None), test_metrics.get("Recall@10", None), test_metrics.get("epoch", None)
        ])
    
    # Create DataFrame
    columns = [
        "config_file", "hidden_units", "num_blocks", "num_heads", "dropout_rate", "learning_rate", "seed",
        "val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10", "test_epoch"
        ]
    
    df = pd.DataFrame(data, columns=columns)
    return df


PARAMS = ['hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate']



def apply_seed_info(df):
    df['params'] = df.apply(lambda row: (row['hidden_units'],
                                         row['num_blocks'], 
                                         row['num_heads'],
                                         row['dropout_rate'], 
                                         row['learning_rate']), axis=1)
    params2seeds = {}
    for params, seed in zip(df['params'], df['seed']):
        if params not in params2seeds:
            params2seeds[params] = []
        params2seeds[params].append(seed)
    df['all_seeds'] = df.apply(lambda row: (params2seeds[row['params']]), axis=1)
    return df

In [3]:
MODEL_NAME = 'sasrec'
DATASET = 'kion_en'
EXPERIMENT_NAME = 'baseline'
SPLIT_NAME = 'general'
MODE = 'BASELINE'

local_directory = f"experiments-2_0/results/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}" 

seed_folders = ['single_seed', 'other_seed']

In [4]:
import os


results = {}
for seed_folder in seed_folders:
    cur_folder = os.path.join(local_directory, seed_folder)
    for file in os.listdir(cur_folder):
        log_data = open_text(f'{cur_folder}/{file}')
        cur_results = parse_file_llm(log_data)
        # if cur_results['simple']['validation_metrics'] == []:
        #     continue
        results.update(cur_results)
    
    
exp_data_with_max_val = {}
for config in results:
    exp_data_with_max_val[config] = find_max(results, config, metric='NDCG@10')
exp_data_with_max_val.keys()

dict_keys(['-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.2-0.0005-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.2-0.00075-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.2-0.001-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.3-0.0005-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.3-0.00075-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/256-2-8-0.3-0.001-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.2-0.0005-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.2-0.00075-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.2-0.001-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.3-0.0005-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.3-0.00075-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_seed/64-2-4-0.3-0.001-42.yaml', '-2_0/configs/sasrec/kion_en/baseline/single_se

In [5]:
df = create_dataframe(exp_data_with_max_val)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,256-2-8-0.2-0.0005-42.yaml,256,2,8,0.2,0.00050,42,0.09434,0.17987,3,0.05650,0.11146,3
1,256-2-8-0.2-0.00075-42.yaml,256,2,8,0.2,0.00075,42,0.09486,0.17959,3,0.05765,0.11144,3
2,256-2-8-0.2-0.001-42.yaml,256,2,8,0.2,0.00100,42,0.09681,0.17883,8,0.05492,0.10617,8
3,256-2-8-0.3-0.0005-42.yaml,256,2,8,0.3,0.00050,42,0.09376,0.18161,6,0.05549,0.11311,6
4,256-2-8-0.3-0.00075-42.yaml,256,2,8,0.3,0.00075,42,0.09493,0.17899,3,0.05730,0.11092,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
183,128-4-2-0.3-0.0005-1.yaml,128,4,2,0.3,0.00050,1,0.09719,0.18248,7,0.05843,0.11523,7
184,128-4-2-0.3-0.0005-256.yaml,128,4,2,0.3,0.00050,256,0.09709,0.18324,7,0.05668,0.11234,7
185,128-4-4-0.2-0.0005-1.yaml,128,4,4,0.2,0.00050,1,0.09705,0.18378,8,0.05785,0.11378,8
186,128-4-4-0.2-0.0005-256.yaml,128,4,4,0.2,0.00050,256,0.09750,0.18280,6,0.05848,0.11337,6


In [6]:
df['seed'].value_counts()

seed
42     108
1       40
256     40
Name: count, dtype: int64

In [7]:
df = apply_seed_info(df)
df.sort_values(by='val_NDCG@10', ascending=False, inplace=True)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
159,64-4-8-0.3-0.00075-1.yaml,64,4,8,0.3,0.00075,1,0.10010,0.18427,8,0.05906,0.11345,8,"(64, 4, 8, 0.3, 0.00075)","[42, 1, 256]"
161,64-4-8-0.3-0.001-1.yaml,64,4,8,0.3,0.00100,1,0.09985,0.18433,8,0.05940,0.11466,8,"(64, 4, 8, 0.3, 0.001)","[42, 1, 256]"
124,64-4-4-0.3-0.001-1.yaml,64,4,4,0.3,0.00100,1,0.09981,0.18313,8,0.05946,0.11437,8,"(64, 4, 4, 0.3, 0.001)","[42, 1, 256]"
104,64-4-4-0.2-0.001-42.yaml,64,4,4,0.2,0.00100,42,0.09961,0.18498,6,0.05876,0.11414,6,"(64, 4, 4, 0.2, 0.001)","[42, 1, 256]"
15,64-4-2-0.3-0.0005-42.yaml,64,4,2,0.3,0.00050,42,0.09958,0.18487,15,0.05844,0.11383,15,"(64, 4, 2, 0.3, 0.0005)","[42, 1, 256]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
68,256-4-2-0.2-0.001-42.yaml,256,4,2,0.2,0.00100,42,0.08238,0.14957,17,0.04548,0.08323,17,"(256, 4, 2, 0.2, 0.001)",[42]
28,256-4-4-0.3-0.00075-42.yaml,256,4,4,0.3,0.00075,42,0.08236,0.14957,16,0.04547,0.08323,16,"(256, 4, 4, 0.3, 0.00075)",[42]
49,256-4-8-0.2-0.00075-42.yaml,256,4,8,0.2,0.00075,42,0.08182,0.14930,17,0.04529,0.08379,17,"(256, 4, 8, 0.2, 0.00075)",[42]
52,256-4-8-0.3-0.00075-42.yaml,256,4,8,0.3,0.00075,42,0.08163,0.15022,17,0.04487,0.08377,17,"(256, 4, 8, 0.3, 0.00075)",[42]


In [8]:
df_all = df[df['all_seeds'].apply(len) == 3]
# df_all[df_all['seed'] == 3]

In [8]:
def get_stats_seeds(df_all):
    metric_cols = ["val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10"]
        
    grouped_df = df_all.drop(['config_file', 'all_seeds', 'seed'], axis=1).groupby('params')
    df_mean = grouped_df.agg('mean')
    df_std = grouped_df.agg('std')
    for col in metric_cols:
        df_mean[col + '__std'] = df_std[col]
    return df_mean


def reorder_cols(df):
    order = ['val_epoch', 'val_epoch__std', 'hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate', 'val_NDCG@10', 'val_NDCG@10__std', 'val_Recall@10', 'val_Recall@10__std', 'test_NDCG@10', 'test_NDCG@10__std', 'test_Recall@10', 'test_Recall@10__std']
    return df[order]


df_all = df[df['all_seeds'].apply(len) == 3]
df_final = get_stats_seeds(df_all)
df_final = reorder_cols(df_final)
df_final.sort_values(by='val_NDCG@10', ascending=False, inplace=True)
df_final

Unnamed: 0_level_0,val_epoch,val_epoch__std,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_NDCG@10__std,val_Recall@10,val_Recall@10__std,test_NDCG@10,test_NDCG@10__std,test_Recall@10,test_Recall@10__std
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
"(64, 4, 4, 0.3, 0.0005)",13.333333,3.21455,64.0,4.0,4.0,0.3,0.0005,0.09936,7.2e-05,0.184087,0.000721,0.058487,0.000906,0.113477,0.000904
"(64, 4, 4, 0.3, 0.001)",8.333333,1.527525,64.0,4.0,4.0,0.3,0.001,0.099347,0.000404,0.183273,0.000163,0.058657,0.000922,0.11347,0.001103
"(64, 4, 8, 0.3, 0.001)",8.333333,1.527525,64.0,4.0,8.0,0.3,0.001,0.099337,0.000469,0.184183,0.000168,0.058387,0.000912,0.113143,0.00143
"(64, 4, 8, 0.3, 0.00075)",7.666667,0.57735,64.0,4.0,8.0,0.3,0.00075,0.09932,0.000735,0.183927,0.000309,0.059063,6e-06,0.113617,0.000289
"(64, 4, 4, 0.2, 0.001)",7.0,1.0,64.0,4.0,4.0,0.2,0.001,0.099277,0.000297,0.184707,0.00025,0.05901,0.000486,0.114223,0.00018
"(64, 4, 2, 0.3, 0.0005)",12.333333,2.309401,64.0,4.0,2.0,0.3,0.0005,0.09923,0.000416,0.182983,0.001706,0.058367,0.000277,0.113393,0.000647
"(64, 4, 4, 0.3, 0.00075)",9.666667,2.081666,64.0,4.0,4.0,0.3,0.00075,0.099017,0.000445,0.18384,0.001193,0.059023,0.001559,0.113957,0.001708
"(64, 4, 4, 0.2, 0.0005)",9.666667,1.527525,64.0,4.0,4.0,0.2,0.0005,0.098993,0.000457,0.18389,0.000467,0.058587,0.000685,0.11377,0.000589
"(64, 4, 8, 0.3, 0.0005)",13.0,1.732051,64.0,4.0,8.0,0.3,0.0005,0.098973,0.00028,0.18438,0.0012,0.058023,0.000642,0.113043,0.000372
"(64, 4, 8, 0.2, 0.001)",7.333333,1.154701,64.0,4.0,8.0,0.2,0.001,0.098937,0.000762,0.183747,0.001109,0.05878,0.000652,0.113957,0.000724


In [10]:
df_final.to_csv('kion_baseline_runs_NEW.csv', index=False)

In [18]:
# df.to_csv('kion_baseline_all_runs.csv', index=False)
df_final[PARAMS].to_json('kion_baseline_all_runs.json', index=False, orient="records")


In [9]:
df_final[PARAMS[1:]].to_json('RESULTS_LATE/kion_base.json', index=False, orient="records")

In [56]:
grouped_df.get_group((128, 2, 2, 0.2, 0.0005))

Unnamed: 0,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params
519,128,2,2,0.2,0.0005,0.02381,0.04742,11,0.01059,0.02176,11,"(128, 2, 2, 0.2, 0.0005)"
122,128,2,2,0.2,0.0005,0.0238,0.04668,14,0.01046,0.02176,14,"(128, 2, 2, 0.2, 0.0005)"
520,128,2,2,0.2,0.0005,0.02281,0.04608,11,0.00994,0.02027,11,"(128, 2, 2, 0.2, 0.0005)"


In [10]:
import numpy as np
np.corrcoef(df_final['test_NDCG@10'].values, df_final['val_NDCG@10'].values)

array([[1.        , 0.88060897],
       [0.88060897, 1.        ]])