In [1]:
%cd ../../../

/Users/nseverin/MyData/Projects/Science/LLM/sasrec-bert4rec-recsys23


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [2]:
from collections import defaultdict
import re
import numpy as np
import pandas as pd



def open_text(filename): 
    with open(filename) as f:
        return f.read()


def parse_file_simple(log_data):
    epoch_pattern = re.compile(r"Epoch (\d+)/\d+ | Train Total Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    
    for line in log_data.splitlines():        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results['simple'][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def parse_file_llm(log_data):
    # Regex patterns
    config_pattern = re.compile(r"experiments(.+?\.yaml)")
    epoch_pattern = re.compile(r"Epoch: (\d+)/\d+ \| Train Total Loss: ([\d.]+)")
    metrics_pattern = re.compile(r"(Validation|Test) Metrics: ({.+?})")
    
    # Parse the log
    results = defaultdict(lambda: {"validation_metrics": [], "test_metrics": []})
    current_config = None
        
    for line in log_data.splitlines():
        config_match = config_pattern.search(line)
        if config_match:
            current_config = config_match.group(1)
        
        epoch_match = epoch_pattern.search(line)
        if epoch_match:
            print(epoch_match.group(1))
            epoch_num = int(epoch_match.group(1))
            loss = float(epoch_match.group(2))
        
        metrics_match = metrics_pattern.search(line)
        if metrics_match and current_config:
            metric_type = metrics_match.group(1).lower()  # "validation" or "test"
            metrics = eval(metrics_match.group(2))  # Convert string to dict
            results[current_config][f"{metric_type}_metrics"].append({"epoch": epoch_num, "loss": loss, **metrics})
    return results


def find_max(results, config, metric='NDCG@10'):
    def get_dct_epoch(lst, epoch):
        for x in lst:
            if x['epoch'] == epoch:
                return x
        return lst[-1]
        # raise Exception()
    
    def postprocess(lst):
        res = []
        for dct in lst:
            res.append({
                'NDCG@10': round(dct['NDCG@10'], 5),
                'Recall@10': round(dct['Recall@10'], 5),
                'epoch': dct['epoch']
            })
        return res
    
    best_val_dct = None
    for dct in results[config]['validation_metrics']:
        if best_val_dct is None:
            best_val_dct = dct
        elif dct[metric] > best_val_dct[metric]:
            best_val_dct = dct
    
    best_val_epoch = best_val_dct['epoch']
    
    return {
        'test': postprocess([get_dct_epoch(results[config]['test_metrics'], best_val_epoch)]),
        'validation': postprocess([best_val_dct]),
    }


def create_dataframe(exp_data_with_max_val):
    # List to store extracted experiment data
    data = []
    
    # Parse experiment results
    for config_path, metrics in exp_data_with_max_val.items():
        # Extract config details from the filename
        config_file = config_path.split("/")[-1]  # Get only the filename
        config_name = config_file.replace(".yaml", "").lstrip('sasrec-').split("-")
        
        hidden_units = int(config_name[0])
        num_blocks = int(config_name[1])
        num_heads = int(config_name[2])
        dropout_rate = float(config_name[3])
        learning_rate = float(config_name[4])
        seed = int(config_name[5]) if len(config_name) == 6 else 42
        
        # Extract validation and test metrics
        val_metrics = metrics.get("validation", [{}])[0]
        test_metrics = metrics.get("test", [{}])[0]
    
        # Append extracted values to data list
        data.append([
            config_file, hidden_units, num_blocks, num_heads, dropout_rate, learning_rate, seed,
            val_metrics.get("NDCG@10", None), val_metrics.get("Recall@10", None), val_metrics.get("epoch", None),
            test_metrics.get("NDCG@10", None), test_metrics.get("Recall@10", None), test_metrics.get("epoch", None)
        ])
    
    # Create DataFrame
    columns = [
        "config_file", "hidden_units", "num_blocks", "num_heads", "dropout_rate", "learning_rate", "seed",
        "val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10", "test_epoch"
        ]
    
    df = pd.DataFrame(data, columns=columns)
    return df


PARAMS = ['hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate']



def apply_seed_info(df):
    df['params'] = df.apply(lambda row: (row['hidden_units'],
                                         row['num_blocks'], 
                                         row['num_heads'],
                                         row['dropout_rate'], 
                                         row['learning_rate']), axis=1)
    params2seeds = {}
    for params, seed in zip(df['params'], df['seed']):
        if params not in params2seeds:
            params2seeds[params] = []
        params2seeds[params].append(seed)
    df['all_seeds'] = df.apply(lambda row: (params2seeds[row['params']]), axis=1)
    return df

In [3]:
MODEL_NAME = 'sasrec'
DATASET = 'ml20m'
EXPERIMENT_NAME = 'baseline_cold_users'
SEED_FOLDER = 'single_seed'
SPLIT_NAME = 'cold_users'
MODE = 'BASELINE'

local_directory = f"experiments-2_0/results/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}" 

seed_folders = ['single_seed']

In [4]:
import os


results = {}
for seed_folder in seed_folders:
    cur_folder = os.path.join(local_directory, seed_folder)
    for file in os.listdir(cur_folder):
        log_data = open_text(f'{cur_folder}/{file}')
        cur_results = parse_file_llm(log_data)
        # if cur_results['simple']['validation_metrics'] == []:
        #     continue
        results.update(cur_results)
    
    
exp_data_with_max_val = {}
for config in results:
    exp_data_with_max_val[config] = find_max(results, config, metric='NDCG@10')
exp_data_with_max_val.keys()

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104


dict_keys(['-2_0/configs/sasrec/ml20m/baseline_cold_users/single_seed/256-4-4-0.3-0.00075-1.yaml', '-2_0/configs/sasrec/ml20m/baseline_cold_users/single_seed/256-4-4-0.3-0.00075-256.yaml', '-2_0/configs/sasrec/ml20m/baseline_cold_users/single_seed/256-4-4-0.3-0.00075-42.yaml'])

In [5]:
df = create_dataframe(exp_data_with_max_val)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,256-4-4-0.3-0.00075-1.yaml,256,4,4,0.3,0.00075,1,0.18242,0.28891,188,0.05855,0.09867,188
1,256-4-4-0.3-0.00075-256.yaml,256,4,4,0.3,0.00075,256,0.18025,0.28637,185,0.05526,0.09411,185
2,256-4-4-0.3-0.00075-42.yaml,256,4,4,0.3,0.00075,42,0.18004,0.28584,164,0.04989,0.08685,164


In [6]:
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch
0,256-4-4-0.3-0.00075-1.yaml,256,4,4,0.3,0.00075,1,0.18242,0.28891,188,0.05855,0.09867,188
1,256-4-4-0.3-0.00075-256.yaml,256,4,4,0.3,0.00075,256,0.18025,0.28637,185,0.05526,0.09411,185
2,256-4-4-0.3-0.00075-42.yaml,256,4,4,0.3,0.00075,42,0.18004,0.28584,164,0.04989,0.08685,164


In [7]:
df = apply_seed_info(df)
df.sort_values(by='val_NDCG@10', ascending=False, inplace=True)
df

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
0,256-4-4-0.3-0.00075-1.yaml,256,4,4,0.3,0.00075,1,0.18242,0.28891,188,0.05855,0.09867,188,"(256, 4, 4, 0.3, 0.00075)","[1, 256, 42]"
1,256-4-4-0.3-0.00075-256.yaml,256,4,4,0.3,0.00075,256,0.18025,0.28637,185,0.05526,0.09411,185,"(256, 4, 4, 0.3, 0.00075)","[1, 256, 42]"
2,256-4-4-0.3-0.00075-42.yaml,256,4,4,0.3,0.00075,42,0.18004,0.28584,164,0.04989,0.08685,164,"(256, 4, 4, 0.3, 0.00075)","[1, 256, 42]"


In [8]:
df_all = df[df['all_seeds'].apply(len) == 3]

In [9]:
def get_stats_seeds(df_all):
    metric_cols = ["val_NDCG@10", "val_Recall@10", "val_epoch", "test_NDCG@10", "test_Recall@10"]
        
    grouped_df = df_all.drop(['config_file', 'all_seeds', 'seed'], axis=1).groupby('params')
    df_mean = grouped_df.agg('mean')
    df_std = grouped_df.agg('std')
    for col in metric_cols:
        df_mean[col + '__std'] = df_std[col]
    return df_mean


def reorder_cols(df):
    order = ['val_epoch', 'val_epoch__std', 'hidden_units', 'num_blocks', 'num_heads', 'dropout_rate', 'learning_rate', 'val_NDCG@10', 'val_NDCG@10__std', 'val_Recall@10', 'val_Recall@10__std', 'test_NDCG@10', 'test_NDCG@10__std', 'test_Recall@10', 'test_Recall@10__std']
    return df[order]


df_all = df[df['all_seeds'].apply(len) == 3]
df_final = get_stats_seeds(df_all)
df_final = reorder_cols(df_final)
df_final.sort_values(by='val_NDCG@10', ascending=False, inplace=True)
df_final


# 0.045270
# 0.052483

Unnamed: 0_level_0,val_epoch,val_epoch__std,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_NDCG@10__std,val_Recall@10,val_Recall@10__std,test_NDCG@10,test_NDCG@10__std,test_Recall@10,test_Recall@10__std
params,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
"(256, 4, 4, 0.3, 0.00075)",179.0,13.076697,256.0,4.0,4.0,0.3,0.00075,0.180903,0.001318,0.28704,0.001641,0.054567,0.004371,0.09321,0.005961


In [9]:
df_final.drop(index=(128, 4, 8, 0.2, 0.0005))[PARAMS[1:]].to_json('RESULTS_LATE/m2_base.json', index=False, orient="records")

In [9]:
df_final.drop(index=(128, 4, 8, 0.2, 0.0005)).to_csv('m2_baseline_final.csv')

In [29]:
df[df['params'] == (256, 4, 8, 0.2, 0.00075)]

Unnamed: 0,config_file,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,seed,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params,all_seeds
42,256-4-8-0.2-0.00075-42.yaml,256,4,8,0.2,0.00075,42,0.17929,0.2849,91,0.04604,0.07896,91,"(256, 4, 8, 0.2, 0.00075)","[42, 1, 256]"
126,256-4-8-0.2-0.00075-256.yaml,256,4,8,0.2,0.00075,256,0.17889,0.28459,92,0.04084,0.07173,92,"(256, 4, 8, 0.2, 0.00075)","[42, 1, 256]"
125,256-4-8-0.2-0.00075-1.yaml,256,4,8,0.2,0.00075,1,0.17705,0.28454,96,0.03923,0.07051,96,"(256, 4, 8, 0.2, 0.00075)","[42, 1, 256]"


In [25]:
df_final.columns 

Index(['val_epoch', 'val_epoch__std', 'hidden_units', 'num_blocks',
       'num_heads', 'dropout_rate', 'learning_rate', 'val_NDCG@10',
       'val_NDCG@10__std', 'val_Recall@10', 'val_Recall@10__std',
       'test_NDCG@10', 'test_NDCG@10__std', 'test_Recall@10',
       'test_Recall@10__std'],
      dtype='object')

In [18]:
df_final.to_csv('ml20m_baseline_3seed_runs_NEW.csv', index=False)

In [19]:
df.to_csv('ml20m_baseline_all_runs_NEW.csv', index=False)

In [10]:
# df.to_csv('kion_baseline_all_runs.csv', index=False)
df[PARAMS].to_json('m2_baseline_all_runs.json', index=False, orient="records")


In [56]:
grouped_df.get_group((128, 2, 2, 0.2, 0.0005))

Unnamed: 0,hidden_units,num_blocks,num_heads,dropout_rate,learning_rate,val_NDCG@10,val_Recall@10,val_epoch,test_NDCG@10,test_Recall@10,test_epoch,params
519,128,2,2,0.2,0.0005,0.02381,0.04742,11,0.01059,0.02176,11,"(128, 2, 2, 0.2, 0.0005)"
122,128,2,2,0.2,0.0005,0.0238,0.04668,14,0.01046,0.02176,14,"(128, 2, 2, 0.2, 0.0005)"
520,128,2,2,0.2,0.0005,0.02281,0.04608,11,0.00994,0.02027,11,"(128, 2, 2, 0.2, 0.0005)"


In [15]:
np.corrcoef(df_final['test_NDCG@10'].values[:15], df_final['val_NDCG@10'].values[:15])

array([[1.        , 0.27889656],
       [0.27889656, 1.        ]])