In [3]:
%cd ../../../

/Users/nseverin/MyData/Projects/Science/LLM/sasrec-bert4rec-recsys23


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


In [123]:
import os
from itertools import product
from copy import deepcopy
import os

from src.experiment_tools.utils import read_text, save_text, read_json


def generate_config_params():
    cur_main_params = deepcopy(CONFIG_PARAMS)
    final_combinations = []
    keys, values = zip(*cur_main_params.items())
    all_combinations = [dict(zip(keys, v)) for v in product(*values)]
    for combination in all_combinations:
        if MODE == 'BASELINE':
            final_combinations.append(combination)
        else:
            if combination['use_down_scale'] and combination['use_upscale']:
                continue
            if combination['use_down_scale'] == False and combination['use_upscale'] == False and (
                    combination['user_profile_embeddings_files'] is not None and
                    ('-256.json' in combination['user_profile_embeddings_files'] or
                     '-128.json' in combination['user_profile_embeddings_files'])):
                continue
            if combination['multi_profile_aggr_scheme'] == 'attention' and combination['multi_profile'] and combination['use_upscale']:
                continue
            if not combination['multi_profile'] and combination['multi_profile_aggr_scheme'] != 'mean':
                continue
            # if combination['multi_profile']:
            #     
            final_combinations.append(combination)
    return final_combinations


def create_list_of_lists(k, N):
    result = []
    start = 0
    while start < N:
        end = min(start + k - 1, N)
        result.append([start, end])
        start += k
    return result


def create_config_files(generated_configs):
    def fill_by_config_dict(config_base, config_vals):
        for field in config_vals:
            cur_val = config_vals[field]
            if field not in config_base:
                print(field)
                raise Exception()
            config_base = config_base.replace('{' + field + '}', f'{cur_val}')
        return config_base

    for config in generated_configs:
        config_base = read_text(CONFIG_TEMPLATE_PATH)
        config_base = fill_by_config_dict(config_base, PREDEFINED_PARAMS)
        config_base = fill_by_config_dict(config_base, config)
        save_path = SAVE_CONFIG_FILE.format(**config)
        save_text(save_path, config_base)


def create_bash(start, end):
    base_bash = read_text(BASH_TEMPLATE_PATH)
    base_bash = ((base_bash.replace('{start}', str(start))
                 .replace('{end}', str(end)))
                 .replace('{folder_path}', CONFIG_PATH))
    save_text(os.path.join(BASH_PATH, f'{start}-{end}.sh'), base_bash)


def create_bash_files(generated_confs, n_confs_per_bash):
    ind_lst = create_list_of_lists(n_confs_per_bash, len(generated_confs))
    print(len(ind_lst))
    for start, end in ind_lst:
        create_bash(start, end)


def create_sbatch_files():
    sbatch_temp = read_text(SBATCH_TEMPLATE_PATH)

    for file in os.listdir(BASH_PATH):
        cur_sbatch = (sbatch_temp.
                      replace('{model_name}', MODEL_NAME).
                      replace('{dataset}', DATASET).
                      replace('{time_to_take}', TIME_TO_TAKE).
                      replace('{artefact_path}', ARTEFACT_PATH).
                      replace('{experiment_name}', EXPERIMENT_NAME).
                      replace('{seed_folder}', SEED_FOLDER).
                      replace('{bash_name}', file))
        if file[0] == '.':
            continue
        save_text(os.path.join(SBATCH_PATH, file.replace('.sh', '.sbatch')), cur_sbatch)


SBATCH_TEMPLATE_PATH = 'experiments-2_0/sbatch/BASE.sbatch'
BASH_TEMPLATE_PATH = 'experiments-2_0/bash/BASE.sh'
FILE_MAPPING_PATH = 'experiments-2_0/configs/file_mapping'

In [124]:
# BASELINE MODE

MODEL_NAME = 'sasrec'
DATASET = 'beauty'
EXPERIMENT_NAME = 'baseline'
SEED_FOLDER = 'other_seed'
SPLIT_NAME = 'general'
MODE = 'BASELINE'

PROFILE_PATH_NAMES = None

PREDEFINED_PARAMS = {'epochs': 30}

CONFIG_PARAMS = {
    'hidden_units': [64, 128, 256],
    'num_blocks': [2,4],
    'num_heads': [2,4],
    'dropout_rate': [0.2, 0.3],
    'learning_rate': [0.001, 0.0005],
    'seed': [42] if SEED_FOLDER == 'single_seed' else [1, 256],
}

CONFIG_TEMPLATE_PATH = f'experiments-2_0/configs/{MODEL_NAME}/template/baseline.yaml'
SAVE_FILE_TEMPLATE = '{hidden_units}-{num_blocks}-{num_heads}-{dropout_rate}-{learning_rate}-{seed}.yaml'


In [129]:
# LLM MODE

MODEL_NAME = 'sasrec'
DATASET = 'beauty'
EXPERIMENT_NAME = 'BEAUTY_INITIAL'
SEED_FOLDER = 'other_seed'
SPLIT_NAME = 'general'
MODE = 'LLM'

PROFILE_PATH_NAMES = ['gemma_long_small_single', 'gemma_long_large_single', "gemma_long_large-umap_single", 'gemma_short_large-umap_single']

PREDEFINED_PARAMS = {
    'epochs': 20,
    'hidden_units': 256,
    'num_blocks': 2,
    'num_heads': 2,
    'dropout_rate': 0.2,
    'learning_rate': 0.0005,
}

CONFIG_PARAMS = {
    'weighting_scheme': ['mean', 'attention'],
    'alpha': [0.6, 0.5, 0.7],
    'fine_tune_epoch': [6, 12],
    'reconstruct_loss': ['MSE', 'RMSE'],
    'reconstruction_layer': [1,2],
    'weight_scale': [0.1],
    'use_down_scale': [True, False],
    'use_upscale': [False],
    'multi_profile': [False],
    'multi_profile_aggr_scheme': ['mean'],
    # 'multi_profile': [True, False],
    # 'multi_profile_aggr_scheme': ['mean', 'attention'],
    'seed': [42] if SEED_FOLDER == 'single_seed' else [1, 256],
}

CONFIG_TEMPLATE_PATH = f'experiments-2_0/configs/{MODEL_NAME}/template/llm.yaml'
SAVE_FILE_TEMPLATE = '{weighting_scheme}-{alpha}-{fine_tune_epoch}-{reconstruct_loss}-{reconstruction_layer}-{weight_scale}-{use_down_scale}-{seed}.yaml'

In [130]:
ARTEFACT_PATH = f'results/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}/{SEED_FOLDER}'
CONFIG_PATH = f'experiments-2_0/configs/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}/{SEED_FOLDER}'
BASH_PATH = f'experiments-2_0/bash/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}/{SEED_FOLDER}'
SBATCH_PATH = f'experiments-2_0/sbatch/{MODEL_NAME}/{DATASET}/{EXPERIMENT_NAME}/{SEED_FOLDER}'

SAVE_CONFIG_FILE = os.path.join(CONFIG_PATH, SAVE_FILE_TEMPLATE)

# creation of folders
os.makedirs(ARTEFACT_PATH, exist_ok=True)
os.makedirs(CONFIG_PATH, exist_ok=True)
os.makedirs(BASH_PATH, exist_ok=True)
os.makedirs(SBATCH_PATH, exist_ok=True)

FILE_MAPPING = read_json(os.path.join(FILE_MAPPING_PATH, f'{DATASET}.json'))

# add to predefined params the values about dataset
PREDEFINED_PARAMS.update({
    'profile_train_sequences': FILE_MAPPING[SPLIT_NAME]['profile_train_sequences'],
    'finetune_train_sequences': FILE_MAPPING[SPLIT_NAME]['finetune_train_sequences'],
    'valid_sequences': FILE_MAPPING[SPLIT_NAME]['valid_sequences'],
    'test_sequences': FILE_MAPPING[SPLIT_NAME]['test_sequences'],
    'mappings': FILE_MAPPING[SPLIT_NAME]['mappings'],
    'counts': FILE_MAPPING[SPLIT_NAME]['counts'],
    'experiment_name': EXPERIMENT_NAME,
})

# add different profiles
CONFIG_PARAMS.update({
    'user_profile_embeddings_files': [None] if PROFILE_PATH_NAMES is None else
        [FILE_MAPPING[SPLIT_NAME]['user_profile_embeddings_files'][profile_name] for profile_name in PROFILE_PATH_NAMES]
})

#--------------------

In [131]:
all_config_params = generate_config_params()
N_CONFS_PER_BASH = 20
TIME_TO_TAKE = '0-10:0'

print('Number of configs:', len(all_config_params))
print(len(create_list_of_lists(N_CONFS_PER_BASH, len(all_config_params))))

Number of configs: 576
29


In [132]:
create_config_files(all_config_params)
create_bash_files(all_config_params, n_confs_per_bash=N_CONFS_PER_BASH)
create_sbatch_files()

29


In [133]:
for file in os.listdir(SBATCH_PATH):
    print('sbatch', SBATCH_PATH+'/'+file)

sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/380-399.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/60-79.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/220-239.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/460-479.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/160-179.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/40-59.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/280-299.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/320-339.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/180-199.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/20-39.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/260-279.sbatch
sbatch experiments-2_0/sbatch/sasrec/beauty/BEAUTY_INITIAL/other_seed/0