In [1]:
%cd ../../../

/Users/nseverin/MyData/Projects/Science/LLM/sasrec-bert4rec-recsys23


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
from collections import defaultdict
import re
import numpy as np
import pandas as pd

def open_text(filename): 
    with open(filename) as f:
        return f.read()


def parse_file_llm(log_data):
    # Regex patterns
    config_pattern = re.compile(r"experiments(.+?\.yaml)")
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+, Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    current_config = None
        
    for line in log_data.splitlines():
        config_match = config_pattern.search(line)
        if config_match:
            current_config = config_match.group(1)
        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match and current_config:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results[current_config][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def find_max(results, config, metric='NDCG@10'):
    def get_dct_epoch(lst, epoch):
        for x in lst:
            if x['epoch'] == epoch:
                return x
        return lst[-1]
        # raise Exception()
    
    def postprocess(lst):
        res = []
        for dct in lst:
            res.append({
                'NDCG@10': round(dct['NDCG@10'], 5),
                'Recall@10': round(dct['Recall@10'], 5),
                'epoch': dct['epoch']
            })
        return res
    
    best_val_dct = None
    for dct in results[config]['validation_metrics']:
        if best_val_dct is None:
            best_val_dct = dct
        elif dct[metric] > best_val_dct[metric]:
            best_val_dct = dct
    
    best_val_epoch = best_val_dct['epoch']
    
    return {
        'test': postprocess([get_dct_epoch(results[config]['test_metrics'], best_val_epoch)]),
        'validation': postprocess([best_val_dct]),
    }


PARAMS = ["config_file", "weighting_scheme", "alpha", "fine_tune_epoch", 'reconstruct_loss', "reconstruction_layer", 'weight_scale', 'use_down_scale','use_upscale','multi_profile','multi_profile_aggr_scheme','scale_guide_loss','user_profile_embeddings_files']


def create_dataframe(exp_data_with_max_val):
    # List to store extracted experiment data
    data = []
    
    # Parse experiment results
    for config_path, metrics in exp_data_with_max_val.items():
        # Extract config details from the filename
        config_file = config_path.split("/")[-1]  # Get only the filename
        config_name = config_file.replace(".yaml", "").lstrip('sasrec-').split("-")
        
        
        # {weighting_scheme}-{alpha}-{fine_tune_epoch}-{reconstruct_loss}-{reconstruction_layer}-{weight_scale}-{use_down_scale}-{use_upscale}-{multi_profile}-{multi_profile_aggr_scheme}-{scale_guide_loss}-{user_profile_embeddings_files}-{seed}.yaml
        weighting_scheme = (config_name[0])
        alpha = float(config_name[1])
        fine_tune_epoch = int(config_name[2])
        reconstruct_loss = (config_name[3])
        reconstruction_layer = int(config_name[4])
        weight_scale = float(config_name[5])
        use_down_scale = (config_name[6])
        use_upscale = (config_name[7])
        multi_profile = (config_name[8])
        multi_profile_aggr_scheme = (config_name[9])
        scale_guide_loss = (config_name[10])
        user_profile_embeddings_files = (config_name[11])
        seed = int(config_name[12])
        
        # Extract validation and test metrics
        val_metrics = metrics.get("validation", [{}])[0]
        test_metrics = metrics.get("test", [{}])[0]
    
        # Append extracted values to data list
        data.append([
            config_file, weighting_scheme, alpha, fine_tune_epoch, reconstruct_loss, reconstruction_layer, weight_scale, use_down_scale,use_upscale,multi_profile,multi_profile_aggr_scheme,scale_guide_loss,user_profile_embeddings_files,seed,
            val_metrics.get("NDCG@10", None), val_metrics.get("Recall@10", None), val_metrics.get("epoch", None),
            test_metrics.get("NDCG@10", None), test_metrics.get("Recall@10", None), test_metrics.get("epoch", None)
        ])
    
    # Create DataFrame
    columns = PARAMS + ['seed',
        "val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10", "test_epoch"
        ]
    
    df = pd.DataFrame(data, columns=columns)
    return df


def apply_seed_info(df):
    df['params'] = df.apply(lambda row: tuple([row[param] for param in PARAMS[1:]]), axis=1)
    params2seeds = {}
    for params, seed in zip(df['params'], df['seed']):
        if params not in params2seeds:
            params2seeds[params] = []
        params2seeds[params].append(seed)
    df['all_seeds'] = df.apply(lambda row: (params2seeds[row['params']]), axis=1)
    return df

In [3]:
len('{weighting_scheme}-{alpha}-{fine_tune_epoch}-{reconstruct_loss}-{reconstruction_layer}-{weight_scale}-{use_down_scale}-{use_upscale}-{multi_profile}-{multi_profile_aggr_scheme}-{scale_guide_loss}-{user_profile_embeddings_files}-{seed}.yaml'.split('-'))

13

In [4]:
MODEL_NAME = 'sasrec'
DATASET = 'kion_en'
EXPERIMENT_NAME = 'KION_NORMAL'
SPLIT_NAME = 'general'
MODE = 'LLM'

local_directory = f"experiments-2_0/results/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}" 

seed_folders = ['other_seed']

In [5]:
import os


results = {}
for seed_folder in seed_folders:
    cur_folder = os.path.join(local_directory, seed_folder)
    for file in os.listdir(cur_folder):
        log_data = open_text(f'{cur_folder}/{file}')
        cur_results = parse_file_llm(log_data)
        # if cur_results['simple']['validation_metrics'] == []:
        #     continue
        results.update(cur_results)
    
    
exp_data_with_max_val = {}
for config in results:
    exp_data_with_max_val[config] = find_max(results, config, metric='NDCG@10')
exp_data_with_max_val.keys()

dict_keys(['-2_0/configs/sasrec/kion_en/KION_NORMAL/other_seed/exponential-0.5-8-RMSE-2-0.1-False-True-False-mean-False-gemma_short_large_umap_single-1.yaml', '-2_0/configs/sasrec/kion_en/KION_NORMAL/other_seed/exponential-0.5-8-RMSE-2-0.1-False-True-False-mean-False-gemma_short_large_umap_single-256.yaml', '-2_0/configs/sasrec/kion_en/KION_NORMAL/other_seed/exponential-0.5-8-RMSE-2-0.1-False-True-False-mean-False-gemma_short_large_umap_single-42.yaml', '-2_0/configs/sasrec/kion_en/KION_NORMAL/other_seed/exponential-0.5-8-RMSE-2-0.1-True-False-False-mean-True-gemma_long_large_umap_single-1.yaml', '-2_0/configs/sasrec/kion_en/KION_NORMAL/other_seed/exponential-0.5-8-RMSE-2-0.1-True-False-False-mean-True-gemma_long_large_umap_single-256.yaml', '-2_0/configs/sasrec/kion_en/KION_NORMAL/other_seed/exponential-0.5-8-RMSE-2-0.1-True-False-False-mean-True-gemma_long_large_umap_single-42.yaml', '-2_0/configs/sasrec/kion_en/KION_NORMAL/other_seed/exponential-0.5-8-RMSE-2-0.1-True-False-False-mea

In [6]:
df = create_dataframe(exp_data_with_max_val)
df['weighting_scheme'] = df['weighting_scheme'].apply(lambda x: x if x != 'ttention' else 'attention')
df['weighting_scheme'] = df['weighting_scheme'].apply(lambda x: x if x != 'xponential' else 'exponential')
df

Unnamed: 0,config_file,weighting_scheme,alpha,fine_tune_epoch,reconstruct_loss,reconstruction_layer,weight_scale,use_down_scale,use_upscale,multi_profile,multi_profile_aggr_scheme,scale_guide_loss,user_profile_embeddings_files,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,exponential-0.5-8-RMSE-2-0.1-False-True-False-...,exponential,0.5,8,RMSE,2,0.1,False,True,False,mean,False,gemma_short_large_umap_single,1,0.0993,0.18264,16,0.0589,0.11401,16
1,exponential-0.5-8-RMSE-2-0.1-False-True-False-...,exponential,0.5,8,RMSE,2,0.1,False,True,False,mean,False,gemma_short_large_umap_single,256,0.09982,0.18416,19,0.05898,0.11414,19
2,exponential-0.5-8-RMSE-2-0.1-False-True-False-...,exponential,0.5,8,RMSE,2,0.1,False,True,False,mean,False,gemma_short_large_umap_single,42,0.09928,0.18416,18,0.05939,0.11429,18
3,exponential-0.5-8-RMSE-2-0.1-True-False-False-...,exponential,0.5,8,RMSE,2,0.1,True,False,False,mean,True,gemma_long_large_umap_single,1,0.09918,0.18269,17,0.05913,0.11393,17
4,exponential-0.5-8-RMSE-2-0.1-True-False-False-...,exponential,0.5,8,RMSE,2,0.1,True,False,False,mean,True,gemma_long_large_umap_single,256,0.0994,0.18373,19,0.05971,0.11567,19
5,exponential-0.5-8-RMSE-2-0.1-True-False-False-...,exponential,0.5,8,RMSE,2,0.1,True,False,False,mean,True,gemma_long_large_umap_single,42,0.09962,0.18427,20,0.05932,0.11393,20
6,exponential-0.5-8-RMSE-2-0.1-True-False-False-...,exponential,0.5,8,RMSE,2,0.1,True,False,False,mean,True,gemma_short_large_umap_single,1,0.09967,0.18329,17,0.05904,0.11401,17
7,exponential-0.5-8-RMSE-2-0.1-True-False-False-...,exponential,0.5,8,RMSE,2,0.1,True,False,False,mean,True,gemma_short_large_umap_single,256,0.10006,0.18427,19,0.05767,0.11245,19
8,exponential-0.5-8-RMSE-2-0.1-True-False-False-...,exponential,0.5,8,RMSE,2,0.1,True,False,False,mean,True,gemma_short_large_umap_single,42,0.10021,0.1853,14,0.06063,0.11623,14
9,exponential-0.65-6-RMSE-1-0.1-False-True-False...,exponential,0.65,6,RMSE,1,0.1,False,True,False,mean,True,gemma_short_large_umap_single,1,0.09897,0.184,20,0.05874,0.11299,20


In [7]:
df['weighting_scheme'].value_counts()

weighting_scheme
exponential    30
mean           30
Name: count, dtype: int64

In [8]:
df = apply_seed_info(df)
df.sort_values(by='val_NDCG@10', ascending=False, inplace=True)
df

Unnamed: 0,config_file,weighting_scheme,alpha,fine_tune_epoch,reconstruct_loss,reconstruction_layer,weight_scale,use_down_scale,use_upscale,multi_profile,...,user_profile_embeddings_files,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
21,exponential-0.65-8-RMSE-2-0.1-False-True-False...,exponential,0.65,8,RMSE,2,0.1,False,True,False,...,gemma_short_large_umap_single,1,0.10036,0.1853,20,0.05808,0.11169,20,"(exponential, 0.65, 8, RMSE, 2, 0.1, False, Tr...","[1, 256, 42]"
46,mean-0.65-8-RMSE-2-0.1-True-False-False-mean-T...,mean,0.65,8,RMSE,2,0.1,True,False,False,...,gemma_short_large_umap_single,256,0.10024,0.18623,18,0.06011,0.11531,18,"(mean, 0.65, 8, RMSE, 2, 0.1, True, False, Fal...","[1, 256, 42]"
47,mean-0.65-8-RMSE-2-0.1-True-False-False-mean-T...,mean,0.65,8,RMSE,2,0.1,True,False,False,...,gemma_short_large_umap_single,42,0.10022,0.18481,16,0.05982,0.11424,16,"(mean, 0.65, 8, RMSE, 2, 0.1, True, False, Fal...","[1, 256, 42]"
8,exponential-0.5-8-RMSE-2-0.1-True-False-False-...,exponential,0.5,8,RMSE,2,0.1,True,False,False,...,gemma_short_large_umap_single,42,0.10021,0.1853,14,0.06063,0.11623,14,"(exponential, 0.5, 8, RMSE, 2, 0.1, True, Fals...","[1, 256, 42]"
41,mean-0.65-6-MSE-2-0.1-False-True-False-mean-Fa...,mean,0.65,6,MSE,2,0.1,False,True,False,...,gemma_long_large_umap_single,42,0.10009,0.18422,20,0.05994,0.11508,20,"(mean, 0.65, 6, MSE, 2, 0.1, False, True, Fals...","[1, 256, 42]"
7,exponential-0.5-8-RMSE-2-0.1-True-False-False-...,exponential,0.5,8,RMSE,2,0.1,True,False,False,...,gemma_short_large_umap_single,256,0.10006,0.18427,19,0.05767,0.11245,19,"(exponential, 0.5, 8, RMSE, 2, 0.1, True, Fals...","[1, 256, 42]"
37,mean-0.5-8-RMSE-1-0.1-False-True-False-mean-Fa...,mean,0.5,8,RMSE,1,0.1,False,True,False,...,gemma_short_large_umap_single,256,0.10002,0.18367,19,0.05974,0.11475,19,"(mean, 0.5, 8, RMSE, 1, 0.1, False, True, Fals...","[1, 256, 42]"
32,mean-0.8-6-RMSE-1-0.1-False-True-False-mean-Fa...,mean,0.8,6,RMSE,1,0.1,False,True,False,...,gemma_short_large_umap_single,42,0.09992,0.18389,20,0.06029,0.11619,20,"(mean, 0.8, 6, RMSE, 1, 0.1, False, True, Fals...","[1, 256, 42]"
45,mean-0.65-8-RMSE-2-0.1-True-False-False-mean-T...,mean,0.65,8,RMSE,2,0.1,True,False,False,...,gemma_short_large_umap_single,1,0.09992,0.18389,17,0.05907,0.11408,17,"(mean, 0.65, 8, RMSE, 2, 0.1, True, False, Fal...","[1, 256, 42]"
54,mean-0.5-4-RMSE-2-0.1-False-True-False-mean-Fa...,mean,0.5,4,RMSE,2,0.1,False,True,False,...,gemma_long_large_umap_single,1,0.09991,0.18476,17,0.05846,0.11334,17,"(mean, 0.5, 4, RMSE, 2, 0.1, False, True, Fals...","[1, 256, 42]"


In [22]:
df.to_csv('kion_llm_all_runs.csv')

In [9]:
def get_stats_seeds(df_all):
    def aggregate_mean(series):
        first_elem = list(series)[0]
        if isinstance(first_elem, str) or isinstance(first_elem, bool):
            return first_elem
        return series.mean()
        # print(df)
        # all_cols = set(df.columns)
        # df_mean = df.mean(numeric_only=True)
        # rest_cols = all_cols - set(df_mean.columns)
        # for col in rest_cols:
        #     df_mean[col] = list(df[col])[0]
        # return df_mean
    
    def aggregate_std(series):
        # print(series)
        first_elem = list(series)[0]
        if isinstance(first_elem, str) or isinstance(first_elem, bool):
            return first_elem
        if len(series) == 1:
            return 0
        return series.std()
        # all_cols = set(df.columns)
        # df_mean = df.std(numeric_only=True)
        # rest_cols = all_cols - set(df_mean.columns)
        # for col in rest_cols:
        #     df_mean[col] = list(df[col])[0]
        # return df_mean
    
    
    metric_cols = ["val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10"]
        
    grouped_df = df_all.drop(['config_file', 'all_seeds', 'seed'], axis=1).groupby('params')
    df_mean = grouped_df.agg(aggregate_mean)
    df_std = grouped_df.agg(aggregate_std)
    for col in metric_cols:
        df_mean[col + '__std'] = df_std[col]
    return df_mean


def reorder_cols(df):
    order = ['val_epoch', 'val_epoch__std'] + PARAMS[1:] + ['val_NDCG@10', 'val_NDCG@10__std', 'val_Recall@10', 'val_Recall@10__std', 'test_NDCG@10', 'test_NDCG@10__std', 'test_Recall@10', 'test_Recall@10__std']
    return df[order]


df_all = df[df['all_seeds'].apply(len) == 3]
df_final = get_stats_seeds(df_all)
df_final = reorder_cols(df_final)
df_final.sort_values(by='val_NDCG@10', ascending=False, inplace=True)
df_final

Unnamed: 0_level_0,val_epoch,val_epoch__std,weighting_scheme,alpha,fine_tune_epoch,reconstruct_loss,reconstruction_layer,weight_scale,use_down_scale,use_upscale,...,scale_guide_loss,user_profile_embeddings_files,val_NDCG@10,val_NDCG@10__std,val_Recall@10,val_Recall@10__std,test_NDCG@10,test_NDCG@10__std,test_Recall@10,test_Recall@10__std
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"(mean, 0.65, 8, RMSE, 2, 0.1, True, False, False, mean, True, gemma_short_large_umap_single)",17.0,1.0,mean,0.65,8.0,RMSE,2.0,0.1,True,False,...,True,gemma_short_large_umap_single,0.100127,0.000179,0.184977,0.001179,0.059667,0.000537,0.114543,0.000669
"(exponential, 0.5, 8, RMSE, 2, 0.1, True, False, False, mean, True, gemma_short_large_umap_single)",16.666667,2.516611,exponential,0.5,8.0,RMSE,2.0,0.1,True,False,...,True,gemma_short_large_umap_single,0.09998,0.000279,0.184287,0.001005,0.059113,0.001481,0.11423,0.0019
"(exponential, 0.65, 8, RMSE, 2, 0.1, False, True, False, mean, True, gemma_short_large_umap_single)",18.666667,1.527525,exponential,0.65,8.0,RMSE,2.0,0.1,False,True,...,True,gemma_short_large_umap_single,0.099813,0.000577,0.184343,0.001038,0.05883,0.000722,0.113447,0.001623
"(mean, 0.5, 4, RMSE, 2, 0.1, False, True, False, mean, False, gemma_long_large_umap_single)",16.333333,2.081666,mean,0.5,4.0,RMSE,2.0,0.1,False,True,...,False,gemma_long_large_umap_single,0.099757,0.000166,0.183563,0.001666,0.05895,0.000691,0.114077,0.001165
"(mean, 0.5, 8, RMSE, 1, 0.1, False, True, False, mean, False, gemma_short_large_umap_single)",19.666667,0.57735,mean,0.5,8.0,RMSE,1.0,0.1,False,True,...,False,gemma_short_large_umap_single,0.09971,0.000269,0.183923,0.000537,0.059483,0.000645,0.11405,0.001899
"(mean, 0.8, 6, MSE, 2, 0.1, False, True, False, mean, True, gemma_long_large_umap_single)",18.666667,1.527525,mean,0.8,6.0,MSE,2.0,0.1,False,True,...,True,gemma_long_large_umap_single,0.099617,0.000244,0.18302,0.000358,0.05919,0.000699,0.114793,0.001115
"(exponential, 0.5, 8, RMSE, 2, 0.1, False, True, False, mean, False, gemma_short_large_umap_single)",17.666667,1.527525,exponential,0.5,8.0,RMSE,2.0,0.1,False,True,...,False,gemma_short_large_umap_single,0.099467,0.000306,0.183653,0.000878,0.05909,0.000263,0.114147,0.00014
"(mean, 0.65, 6, MSE, 2, 0.1, False, True, False, mean, True, gemma_short_large_umap_single)",18.333333,1.527525,mean,0.65,6.0,MSE,2.0,0.1,False,True,...,True,gemma_short_large_umap_single,0.099443,0.000276,0.183477,0.000977,0.059533,0.000792,0.114743,0.001046
"(mean, 0.8, 6, RMSE, 1, 0.1, False, True, False, mean, False, gemma_short_large_umap_single)",19.333333,0.57735,mean,0.8,6.0,RMSE,1.0,0.1,False,True,...,False,gemma_short_large_umap_single,0.099443,0.000418,0.183163,0.000629,0.059673,0.000534,0.115163,0.000903
"(exponential, 0.8, 8, MSE, 2, 0.1, False, True, False, mean, True, gemma_long_large_umap_single)",18.333333,1.527525,exponential,0.8,8.0,MSE,2.0,0.1,False,True,...,True,gemma_long_large_umap_single,0.09944,0.000331,0.18385,0.001101,0.059113,0.00061,0.11364,0.000637


In [12]:
df_final.to_csv('kion_llm_all_runs_NEW.csv')

In [13]:
df_final['weighting_scheme'].value_counts()

weighting_scheme
mean           23
exponential    17
Name: count, dtype: int64

In [14]:
df_final[df_final['scale_guide_loss'] == 'False']

Unnamed: 0_level_0,val_epoch,val_epoch__std,weighting_scheme,alpha,fine_tune_epoch,reconstruct_loss,reconstruction_layer,weight_scale,use_down_scale,use_upscale,...,scale_guide_loss,user_profile_embeddings_files,val_NDCG@10,val_NDCG@10__std,val_Recall@10,val_Recall@10__std,test_NDCG@10,test_NDCG@10__std,test_Recall@10,test_Recall@10__std
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"(mean, 0.5, 6, MSE, 1, 0.1, False, True, False, mean, False, gemma_short_large_umap_single)",14.0,0.0,mean,0.5,6.0,MSE,1.0,0.1,False,True,...,False,gemma_short_large_umap_single,0.099597,0.000808,0.1834,0.000946,0.059867,0.000621,0.1152,0.000375
"(mean, 0.5, 4, RMSE, 2, 0.1, False, True, False, mean, False, gemma_long_large_umap_single)",9.333333,1.527525,mean,0.5,4.0,RMSE,2.0,0.1,False,True,...,False,gemma_long_large_umap_single,0.099523,7.2e-05,0.182587,0.000343,0.059717,0.000998,0.114783,0.001294
"(mean, 0.5, 6, MSE, 1, 0.1, False, True, False, mean, False, gemma_long_large_umap_single)",14.0,0.0,mean,0.5,6.0,MSE,1.0,0.1,False,True,...,False,gemma_long_large_umap_single,0.099293,0.000384,0.18376,0.000537,0.059667,0.000748,0.114967,0.000718
"(exponential, 0.8, 8, RMSE, 2, 0.1, False, True, False, mean, False, gemma_long_large_umap_single)",12.666667,1.527525,exponential,0.8,8.0,RMSE,2.0,0.1,False,True,...,False,gemma_long_large_umap_single,0.099367,0.000376,0.183277,0.000603,0.05964,0.000927,0.114837,0.001643
"(exponential, 0.8, 8, RMSE, 1, 0.1, False, True, False, mean, False, gemma_short_large_umap_single)",13.0,1.732051,exponential,0.8,8.0,RMSE,1.0,0.1,False,True,...,False,gemma_short_large_umap_single,0.099387,0.000116,0.183037,0.000872,0.059383,0.000947,0.11402,0.000931
"(exponential, 0.5, 8, RMSE, 2, 0.1, False, True, False, mean, False, gemma_short_large_umap_single)",13.666667,0.57735,exponential,0.5,8.0,RMSE,2.0,0.1,False,True,...,False,gemma_short_large_umap_single,0.099817,0.000266,0.183417,0.001399,0.059373,0.001048,0.114633,0.002053
"(exponential, 0.8, 8, MSE, 1, 0.1, False, True, False, mean, False, gemma_long_large_umap_single)",14.666667,1.154701,exponential,0.8,8.0,MSE,1.0,0.1,False,True,...,False,gemma_long_large_umap_single,0.099327,0.000539,0.183563,0.000333,0.059353,0.000945,0.114333,0.000127
"(mean, 0.65, 8, RMSE, 1, 0.1, False, True, False, mean, False, gemma_short_large_umap_single)",14.0,0.0,mean,0.65,8.0,RMSE,1.0,0.1,False,True,...,False,gemma_short_large_umap_single,0.099287,0.000652,0.183017,0.001596,0.05933,0.000872,0.11389,0.000656
"(mean, 0.8, 6, RMSE, 1, 0.1, False, True, False, mean, False, gemma_short_large_umap_single)",14.0,0.0,mean,0.8,6.0,RMSE,1.0,0.1,False,True,...,False,gemma_short_large_umap_single,0.09929,0.000521,0.183927,0.000652,0.059287,0.001598,0.114723,0.00165
"(exponential, 0.65, 8, MSE, 1, 0.1, False, True, False, mean, False, gemma_short_large_umap_single)",14.0,0.0,exponential,0.65,8.0,MSE,1.0,0.1,False,True,...,False,gemma_short_large_umap_single,0.098963,0.0001,0.183003,0.000907,0.05925,0.00099,0.113957,0.001367


In [20]:
df_final2 = df_final.drop([('mean', 0.65, 8, 'RMSE', 2, 0.1, 'True', 'False', 'False', 'mean', 'True', 'gemma_short_large_umap_single'), 
               ('mean', 0.65, 6, 'MSE', 2, 0.1, 'False', 'True', 'False', 'mean', 'False', 'gemma_long_large_umap_single')], axis=0, inplace=False)

df_final2.to_csv('beauty_llm_all_runs_NORMAL.csv')

In [16]:
df_final.index

Index([       ('mean', 0.65, 8, 'RMSE', 2, 0.1, 'True', 'False', 'False', 'mean', 'True', 'gemma_short_large_umap_single'),
        ('exponential', 0.5, 8, 'RMSE', 2, 0.1, 'True', 'False', 'False', 'mean', 'True', 'gemma_short_large_umap_single'),
              ('mean', 0.8, 6, 'RMSE', 1, 0.1, 'False', 'True', 'False', 'mean', 'False', 'gemma_short_large_umap_single'),
       ('exponential', 0.65, 6, 'RMSE', 1, 0.1, 'False', 'True', 'False', 'mean', 'True', 'gemma_short_large_umap_single'),
         ('exponential', 0.5, 8, 'RMSE', 2, 0.1, 'True', 'False', 'False', 'mean', 'True', 'gemma_long_large_umap_single'),
               ('mean', 0.8, 8, 'MSE', 1, 0.1, 'False', 'True', 'False', 'mean', 'False', 'gemma_short_large_umap_single'),
               ('mean', 0.5, 4, 'RMSE', 2, 0.1, 'False', 'True', 'False', 'mean', 'False', 'gemma_long_large_umap_single'),
       ('exponential', 0.65, 8, 'RMSE', 1, 0.1, 'False', 'True', 'False', 'mean', 'True', 'gemma_short_large_umap_single'),
        

In [40]:
df['params'].value_counts()

params
(mean, 0.7, 12, MSE, 1, 0.1, True, False, False, mean, True, gemma_short_large_umap_single)         3
(mean, 0.7, 12, MSE, 2, 0.1, True, False, False, mean, False, gemma_long_large_umap_single)         3
(mean, 0.6, 6, RMSE, 2, 0.1, True, False, False, mean, True, gemma_long_large_umap_single)          3
(attention, 0.5, 6, RMSE, 1, 0.1, True, False, False, mean, False, gemma_long_large_umap_single)    3
(mean, 0.7, 12, MSE, 2, 0.1, True, False, False, mean, False, gemma_long_large_single)              3
                                                                                                   ..
(mean, 0.6, 12, MSE, 1, 0.1, True, False, False, mean, False, gemma_long_large_umap_single)         1
(mean, 0.7, 6, RMSE, 1, 0.1, False, False, False, mean, True, gemma_long_large_umap_single)         1
(attention, 0.5, 6, MSE, 2, 0.1, True, False, False, mean, False, gemma_long_large_single)          1
(attention, 0.6, 6, MSE, 2, 0.1, False, False, False, mean, True, gemma_lon

In [18]:
df['']

Unnamed: 0,config_file,weighting_scheme,alpha,fine_tune_epoch,reconstruct_loss,reconstruction_layer,weight_scale,use_down_scale,use_upscale,multi_profile,...,user_profile_embeddings_files,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
663,mean-0.8-6-RMSE-1-0.1-False-True-False-mean-Fa...,mean,0.80,6,RMSE,1,0.1,False,True,False,...,gemma_short_large_umap_single,42,0.09987,0.18422,14,0.06111,0.11661,14,"(mean, 0.8, 6, RMSE, 1, 0.1, False, True, Fals...",[42]
837,exponential-0.8-8-MSE-2-0.1-False-True-False-m...,exponential,0.80,8,MSE,2,0.1,False,True,False,...,gemma_long_large_umap_single,42,0.09898,0.18188,14,0.06083,0.11617,14,"(exponential, 0.8, 8, MSE, 2, 0.1, False, True...",[42]
779,exponential-0.65-8-RMSE-2-0.1-False-True-False...,exponential,0.65,8,RMSE,2,0.1,False,True,False,...,gemma_short_large_umap_single,42,0.09998,0.18356,14,0.06072,0.11579,14,"(exponential, 0.65, 8, RMSE, 2, 0.1, False, Tr...",[42]
649,mean-0.8-6-MSE-2-0.1-False-True-False-mean-Tru...,mean,0.80,6,MSE,2,0.1,False,True,False,...,gemma_long_large_umap_single,42,0.09983,0.18389,14,0.06072,0.11615,14,"(mean, 0.8, 6, MSE, 2, 0.1, False, True, False...",[42]
467,exponential-0.5-8-RMSE-2-0.1-True-False-False-...,exponential,0.50,8,RMSE,2,0.1,True,False,False,...,gemma_short_large_umap_single,42,0.09985,0.18454,14,0.06072,0.11703,14,"(exponential, 0.5, 8, RMSE, 2, 0.1, True, Fals...",[42]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
876,exponential-0.8-8-RMSE-2-0.1-True-False-False-...,exponential,0.80,8,RMSE,2,0.1,True,False,False,...,gemma_long_large_single,42,0.09951,0.18465,19,0.05589,0.10964,19,"(exponential, 0.8, 8, RMSE, 2, 0.1, True, Fals...",[42]
792,exponential-0.8-4-MSE-1-0.1-False-True-False-m...,exponential,0.80,4,MSE,1,0.1,False,True,False,...,gemma_long_large_single,42,0.09851,0.18318,11,0.05586,0.10847,11,"(exponential, 0.8, 4, MSE, 1, 0.1, False, True...",[42]
25,attention-0.65-8-MSE-1-0.1-False-True-False-me...,attention,0.65,8,MSE,1,0.1,False,True,False,...,gemma_long_large_umap_single,42,0.09773,0.18112,18,0.05583,0.10835,18,"(attention, 0.65, 8, MSE, 1, 0.1, False, True,...",[42]
274,attention-0.8-6-MSE-2-0.1-True-False-False-mea...,attention,0.80,6,MSE,2,0.1,True,False,False,...,gemma_short_large_single,42,0.09796,0.18210,7,0.05579,0.10824,7,"(attention, 0.8, 6, MSE, 2, 0.1, True, False, ...",[42]


In [17]:
df[PARAMS[1:]].to_json('best_kion.json', index=False, orient="records")

In [42]:
df_final[PARAMS[1:]].to_json('best_beauty_3seeds.json', index=False, orient="records")

In [17]:
results.keys()

dict_keys(['-2_0/configs/sasrec/beauty/BEAUTY_INITIAL/single_seed/attention-0.7-12-MSE-2-0.1-False-False-False-mean-False-gemma_long_large_umap_single-42.yaml', '-2_0/configs/sasrec/beauty/BEAUTY_INITIAL/single_seed/attention-0.7-12-MSE-2-0.1-False-False-False-mean-False-gemma_short_large_umap_single-42.yaml', '-2_0/configs/sasrec/beauty/BEAUTY_INITIAL/single_seed/attention-0.7-12-MSE-2-0.1-False-False-False-mean-True-gemma_long_large_umap_single-42.yaml', '-2_0/configs/sasrec/beauty/BEAUTY_INITIAL/single_seed/attention-0.7-12-MSE-2-0.1-False-False-False-mean-True-gemma_short_large_umap_single-42.yaml', '-2_0/configs/sasrec/beauty/BEAUTY_INITIAL/single_seed/attention-0.7-12-MSE-2-0.1-True-False-False-mean-False-gemma_long_large_single-42.yaml', '-2_0/configs/sasrec/beauty/BEAUTY_INITIAL/single_seed/attention-0.7-12-MSE-2-0.1-True-False-False-mean-False-gemma_long_large_umap_single-42.yaml', '-2_0/configs/sasrec/beauty/BEAUTY_INITIAL/single_seed/attention-0.7-12-MSE-2-0.1-True-False-Fal

In [23]:
results['-2_0/configs/sasrec/beauty/BEAUTY_INITIAL/single_seed/attention-0.7-12-MSE-2-0.1-False-False-False-mean-False-gemma_long_large_umap_single-42.yaml']

{'validation_metrics': [{'epoch': 1,
   'loss': 10.828,
   'Recall@5': 0.004162330905306972,
   'NDCG@5': 0.0023390187078764875,
   'Recall@10': 0.006243496357960458,
   'NDCG@10': 0.00302255539974203,
   'Recall@20': 0.010851791288836034,
   'NDCG@20': 0.004198139363666408,
   'loss_recsys': np.float64(10.347532872800473),
   'loss_guide': np.float64(0.0)},
  {'epoch': 2,
   'loss': 5.2138,
   'Recall@5': 0.004013676230117437,
   'NDCG@5': 0.0024368647296024288,
   'Recall@10': 0.008324661810613945,
   'NDCG@10': 0.003826736836797046,
   'Recall@20': 0.014122194143005798,
   'NDCG@20': 0.0052561474396673165,
   'loss_recsys': np.float64(10.325713298938892),
   'loss_guide': np.float64(0.0)},
  {'epoch': 3,
   'loss': 4.8,
   'Recall@5': 0.006094841682770923,
   'NDCG@5': 0.003388410317555781,
   'Recall@10': 0.009216589861751152,
   'NDCG@10': 0.004382003442624782,
   'Recall@20': 0.015014122194143005,
   'NDCG@20': 0.005846106928100518,
   'loss_recsys': np.float64(10.315859370761448

In [22]:
df['config_file']

426    mean-0.7-12-MSE-1-0.1-True-False-False-mean-Tr...
11     attention-0.7-12-MSE-2-0.1-True-False-False-me...
88     mean-0.7-6-RMSE-1-0.1-True-False-False-mean-Tr...
283    mean-0.5-6-MSE-1-0.1-True-False-False-mean-Tru...
373    attention-0.5-12-MSE-1-0.1-True-False-False-me...
                             ...                        
73     mean-0.7-6-MSE-2-0.1-True-False-False-mean-Tru...
257    mean-0.7-12-MSE-2-0.1-True-False-False-mean-Tr...
234    mean-0.6-6-MSE-2-0.1-True-False-False-mean-Tru...
259    mean-0.7-12-MSE-2-0.1-True-False-False-mean-Tr...
2      attention-0.7-12-MSE-2-0.1-False-False-False-m...
Name: config_file, Length: 558, dtype: object

In [1]:
PARAMS

NameError: name 'PARAMS' is not defined

In [47]:
df_final.to_csv('beauty_llm_runs.csv')

In [11]:
import numpy as np
np.corrcoef(df_final['test_NDCG@10'].values, df_final['val_NDCG@10'].values)

array([[ 1.        , -0.11176954],
       [-0.11176954,  1.        ]])