# RecListsTest
Notebook to store the recommendation lists of the trained models, as well as the test set.

In [1]:
from recbole.quick_start import load_data_and_model
import pandas as pd
from tqdm import tqdm
from recbole.utils.case_study import full_sort_topk
import os
from recsyslearn.accuracy.metrics import NDCG
from recsyslearn.dataset.utils import find_relevant_items
import numpy as np

In [2]:
from recsyslearn.fairness.metrics import FairnessMetric

In [3]:
from recsyslearn.fairness.utils import eff_matrix
from recsyslearn.utils import check_columns_exist

In [4]:
def rec_lists_over_folder(
    folder,
    k=50,
):
    """
    Iterates over the recbole models in the folder passed as argument, 
    computes the metrics and saves the corresponding reclists as .tsv
    
    :param str folder: the folder over which to iterate
    """

    
    for model_file in tqdm(os.listdir(folder)):
        model_name = model_file[:-4]
        model_path = os.path.join(folder, model_file)
        if model_file[-9:]=='model.pth':
            print(model_file)
            # Get the train, test, and topk dataframes from the recbole file
            top_k_df = convert_recbole_tensor_to_recsyslearn_dataframe(model_file=model_path, k=k)
            top_k_df.to_csv(folder + model_name + '.tsv', sep='\t', index=None)

In [5]:
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

In [6]:
def convert_recbole_tensor_to_recsyslearn_dataframe(
        model_file,
        k=10,
        device='cpu',
        chunk_size=1000,
        which_set='test',
):
    """
    Function that converts the tensor of shape
    (n_users, k) storing the top k recommendations
    for each user to a dataframe with columns ['user', 'item', 'rank']
    as required by RecSysLearn.

    The steps are
     - load the stored model, as well as the config, and the dataset
     - convert the recbole internal user ids to the tokens of the original dataset
     - get the top k's as a pytorch tensor
     - convert the rows of the tensors to lists of tokens
     - convert everything to a dataframe


    Args:
        model_file: path to the model saved by recbole
        k: the length of the list to be returned
    Returns:
        top_k_df: DataFrame with columns (‘user’, ‘item’, ‘rank’), where each user
        appears in k rows (one row for each item of the k-length list).

    """

    config, model, dataset, _, val_data, test_data = load_data_and_model(
        model_file=model_file,
    )

    if which_set=='val':
        test_data = val_data
    uid_series = range(1, dataset.user_num)
    # print(dataset.user_num, test_data.dataset.user_num)
    top_k_df = pd.DataFrame(columns=['user','item','rank'])
    
    for user_chunk in tqdm(chunker(uid_series, chunk_size)):
        topk_score, topk_iid_list = full_sort_topk(
            user_chunk, model, test_data, k=k, device=device,
        )
        
        u_token_series = dataset.id2token(dataset.uid_field, user_chunk)
        external_item_list = dataset.id2token(dataset.iid_field, topk_iid_list.cpu())
        
        chunk_top_k_df = pd.DataFrame()
        chunk_top_k_df.insert(0, 'user', u_token_series)
        chunk_top_k_df.insert(1, 'item', list(external_item_list))
        chunk_top_k_df.insert(2, 'rank', [range(1, k+1)] * len(chunk_top_k_df))
        
        
        top_k_df = pd.concat([top_k_df, chunk_top_k_df])

    top_k_df = top_k_df.explode(['item', 'rank'])
    return top_k_df

def convert_recbole_dataset_to_recsyslearn_dataframe(
        model_file,
        which_set='test',
        return_rank=False,
        chunk_size=10000,
):
    """
    TODO
     if the dataset contains ratings, exclude interactions below some threshold.

    Function that converts the tensor of shape
    (n_users, ***) and the tensor of shape
    (n_users, ***) storing the internal item id's
    interacted with by the user with the id in that row,
    as well as the rating,
    to a dataframe with columns ['user', 'item', 'rank']
    as required by RecSysLearn.

    REMARK!! The row with index 0 is a dummy padding user
    with no user item interaction entries.

    The steps are
     - load the stored model, as well as the config, and the dataset
     - convert the recbole internal user ids to the tokens of the original dataset
     - get the top k's as a pytorch tensor
     - convert the rows of the tensors to lists of tokens
     - convert everything to a dataframe


    Args:
        model_file: path to the model saved by recbole
        which_set: string marking which dataset has to be converted (full, train, val, or test).
                   default is train.
    Returns:
        history_df: DataFrame with columns (‘user’, ‘item’), with a row for
                    each element of the user-item interaction history.

    """

    _, _, dataset, train_data, valid_data, test_data = load_data_and_model(
        model_file,
    )
    

    if which_set == 'full':
        dataset = dataset
    elif which_set == 'train':
        dataset = train_data.dataset
    elif which_set == 'val':
        dataset = valid_data.dataset
    elif which_set == 'test':
        dataset = test_data.dataset
        
    uid_series = range(1, dataset.user_num)

    i = 0
    for user_chunk in tqdm(chunker(uid_series, chunk_size)):   
        i += 1
        u_token_series = dataset.id2token(dataset.uid_field, user_chunk)
        
        interacted_item_ids = dataset.history_item_matrix()[0][user_chunk]
        external_item_list = dataset.id2token(dataset.iid_field, interacted_item_ids.cpu())

        interacted_ratings = dataset.history_item_matrix()[1][user_chunk].numpy()

        chunk_history_df = pd.DataFrame()
        chunk_history_df.insert(0, 'user', u_token_series)
        chunk_history_df.insert(1, 'item', list(external_item_list))
        chunk_history_df.insert(2, 'rank', list(interacted_ratings))

        chunk_history_df = chunk_history_df.explode(['item', 'rank'])
        # ====
        # chunk_history_df = chunk_history_df[['user', 'item']]
        # ===
        chunk_history_df.to_csv(which_set + '/chunk' + str(int(i)) + '.tsv', sep='\t', index=None)

    chunks = []
    for chunk_file in os.listdir(which_set + '/'):
        # print(chunk_file)
        if chunk_file[:5]=='chunk':
            
            chunk_path = os.path.join(which_set + '/', chunk_file)
            chunk = pd.read_csv(chunk_path, sep='\t')
            chunk['rank'] = chunk['rank'].astype(int)
            # print(chunk.size)
            chunk = chunk[chunk['item'] != '[PAD]']
            # print(chunk.info())
            chunks += [chunk]
            os.remove(chunk_path)

    # print("here")
    history_df = pd.concat(chunks)
    #print(history_df.head())
#    history_df = history_df.set_index('user')
    #bad_df = history_df.item.isin(['[PAD]'])
    #print("here")
    #history_df = history_df[~bad_df]
    # history_df = history_df[history_df['item'] != '[PAD]']
    if return_rank:
        return history_df#[['user', 'item', 'rank']]
    else:
        return history_df[['user', 'item']]

In [9]:
harmful_list = [0.00, 0.01, 0.05, 0.10]

# DATASET_LIST = ['ml-100k']
DATASET_LIST = ['ml-100k', 'lastfm']
#DATASET_LIST = ['lastfm']
BASE_FOLDER = '/home/marta/jku/activity_fair/'
MODEL_LIST = ['ItemKNN']#, 'MultiVAE', 'BPR', ]
model_dict = {
    'BPR': 'bpr',
    'ItemKNN': 'iknn',
    'MultiVAE': 'vae',
}


for MODEL in MODEL_LIST:
    MODEL_SUBFOLDER = model_dict[MODEL]
    for DATASET in DATASET_LIST:
        for harmful in harmful_list:
            DS_STRING = f'{DATASET}_harm{str(int(100*harmful)).zfill(2)}'

            MODEL_FOLDER = f'{BASE_FOLDER}saved/{MODEL_SUBFOLDER}/{DS_STRING}/'
            MODEL_FILE = MODEL_FOLDER + 'best_model.pth'
            TEST_DATA_FILE = f'{BASE_FOLDER}/test/{DS_STRING}.tsv'
            print(TEST_DATA_FILE)
            TEST_DATASET_RANK = convert_recbole_dataset_to_recsyslearn_dataframe(
                model_file=MODEL_FILE,
                which_set='test', 
                return_rank=True
            )
            print(TEST_DATASET_RANK.info())
            TEST_DATASET_RANK.to_csv(TEST_DATA_FILE, sep='\t', index=None)
            rec_lists_over_folder(MODEL_FOLDER)

/home/marta/jku/activity_fair//test/ml-100k_harm00.tsv


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/ml-100k_harm00
checkpoint_dir = /home/marta/jku/activity_fair/saved/ml-100k_harm00
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
val

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/ml-100k_harm00/ml-100k_harm00-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]
1it [00:00,  5.91it/s]


<class 'pandas.core.frame.DataFrame'>
Int64Index: 16504 entries, 0 to 99854
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   user    16504 non-null  int64 
 1   item    16504 non-null  object
 2   rank    16504 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 515.8+ KB
None


  0%|                                                    | 0/21 [00:00<?, ?it/s]

best_model.pth


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/ml-100k_harm00
checkpoint_dir = /home/marta/jku/activity_fair/saved/ml-100k_harm00
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
val

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/ml-100k_harm00/ml-100k_harm00-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]

0it [00:00, ?it/s]

Error for i =  209
Error for i =  329
Error for i =  340
Error for i =  380
Error for i =  675
Error for i =  738
Error for i =  923
Error for i =  935


1it [00:00, 10.81it/s]
100%|███████████████████████████████████████████| 21/21 [00:01<00:00, 20.74it/s]


/home/marta/jku/activity_fair//test/ml-100k_harm01.tsv


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/ml-100k_harm01
checkpoint_dir = /home/marta/jku/activity_fair/saved/ml-100k_harm01
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
val

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/ml-100k_harm01/ml-100k_harm01-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]
1it [00:00,  6.32it/s]


<class 'pandas.core.frame.DataFrame'>
Int64Index: 16485 entries, 0 to 99835
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   user    16485 non-null  int64 
 1   item    16485 non-null  object
 2   rank    16485 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 515.2+ KB
None


  0%|                                                    | 0/21 [00:00<?, ?it/s]

best_model.pth


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/ml-100k_harm01
checkpoint_dir = /home/marta/jku/activity_fair/saved/ml-100k_harm01
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
val

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/ml-100k_harm01/ml-100k_harm01-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]

0it [00:00, ?it/s]

Error for i =  633



1it [00:00,  7.17it/s]
100%|███████████████████████████████████████████| 21/21 [00:01<00:00, 18.33it/s]


/home/marta/jku/activity_fair//test/ml-100k_harm05.tsv


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/ml-100k_harm05
checkpoint_dir = /home/marta/jku/activity_fair/saved/ml-100k_harm05
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
val

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/ml-100k_harm05/ml-100k_harm05-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]
1it [00:00,  5.51it/s]


<class 'pandas.core.frame.DataFrame'>
Int64Index: 16367 entries, 0 to 110981
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   user    16367 non-null  int64 
 1   item    16367 non-null  object
 2   rank    16367 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 511.5+ KB
None


  0%|                                                    | 0/21 [00:00<?, ?it/s]

best_model.pth


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/ml-100k_harm05
checkpoint_dir = /home/marta/jku/activity_fair/saved/ml-100k_harm05
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
val

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/ml-100k_harm05/ml-100k_harm05-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]

0it [00:00, ?it/s]

Error for i =  213
Error for i =  249
Error for i =  438
Error for i =  537
Error for i =  711


1it [00:00, 11.75it/s]
100%|███████████████████████████████████████████| 21/21 [00:01<00:00, 20.85it/s]


/home/marta/jku/activity_fair//test/ml-100k_harm10.tsv


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/ml-100k_harm10
checkpoint_dir = /home/marta/jku/activity_fair/saved/ml-100k_harm10
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
val

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/ml-100k_harm10/ml-100k_harm10-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]
1it [00:00,  6.16it/s]


<class 'pandas.core.frame.DataFrame'>
Int64Index: 16191 entries, 0 to 90740
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   user    16191 non-null  int64 
 1   item    16191 non-null  object
 2   rank    16191 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 506.0+ KB
None


  0%|                                                    | 0/12 [00:00<?, ?it/s]

best_model.pth


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/ml-100k_harm10
checkpoint_dir = /home/marta/jku/activity_fair/saved/ml-100k_harm10
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
val

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/ml-100k_harm10/ml-100k_harm10-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]

0it [00:00, ?it/s]

Error for i =  31
Error for i =  282
Error for i =  335



1it [00:00,  6.62it/s]
100%|███████████████████████████████████████████| 12/12 [00:01<00:00, 10.61it/s]


/home/marta/jku/activity_fair//test/lastfm_harm00.tsv


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/lastfm_harm00
checkpoint_dir = /home/marta/jku/activity_fair/saved/lastfm_harm00
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
valid

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/lastfm_harm00/lastfm_harm00-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]
1it [00:00, 13.73it/s]


<class 'pandas.core.frame.DataFrame'>
Int64Index: 18431 entries, 0 to 35482
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   user    18431 non-null  int64 
 1   item    18431 non-null  object
 2   rank    18431 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 576.0+ KB
None


  0%|                                                    | 0/12 [00:00<?, ?it/s]

best_model.pth


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/lastfm_harm00
checkpoint_dir = /home/marta/jku/activity_fair/saved/lastfm_harm00
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
valid

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/lastfm_harm00/lastfm_harm00-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]

0it [00:00, ?it/s]

Error for i =  689



1it [00:00,  6.65it/s]

Error for i =  162
Error for i =  612



2it [00:00,  6.83it/s]
100%|███████████████████████████████████████████| 12/12 [00:04<00:00,  2.94it/s]


/home/marta/jku/activity_fair//test/lastfm_harm01.tsv


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/lastfm_harm01
checkpoint_dir = /home/marta/jku/activity_fair/saved/lastfm_harm01
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
valid

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/lastfm_harm01/lastfm_harm01-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]
1it [00:00, 11.84it/s]


<class 'pandas.core.frame.DataFrame'>
Int64Index: 18384 entries, 0 to 38814
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   user    18384 non-null  int64 
 1   item    18384 non-null  object
 2   rank    18384 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 574.5+ KB
None


  0%|                                                    | 0/12 [00:00<?, ?it/s]

best_model.pth


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/lastfm_harm01
checkpoint_dir = /home/marta/jku/activity_fair/saved/lastfm_harm01
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
valid

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/lastfm_harm01/lastfm_harm01-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]

0it [00:00, ?it/s]
1it [00:00,  6.66it/s]
2it [00:00,  6.92it/s]
100%|███████████████████████████████████████████| 12/12 [00:03<00:00,  3.19it/s]


/home/marta/jku/activity_fair//test/lastfm_harm05.tsv


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/lastfm_harm05
checkpoint_dir = /home/marta/jku/activity_fair/saved/lastfm_harm05
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
valid

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/lastfm_harm05/lastfm_harm05-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]
1it [00:00, 13.80it/s]


<class 'pandas.core.frame.DataFrame'>
Int64Index: 17744 entries, 0 to 35484
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   user    17744 non-null  int64 
 1   item    17744 non-null  object
 2   rank    17744 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 554.5+ KB
None


  0%|                                                    | 0/12 [00:00<?, ?it/s]

best_model.pth


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/lastfm_harm05
checkpoint_dir = /home/marta/jku/activity_fair/saved/lastfm_harm05
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
valid

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/lastfm_harm05/lastfm_harm05-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]

0it [00:00, ?it/s]
1it [00:00,  6.69it/s]
2it [00:00,  7.31it/s]
100%|███████████████████████████████████████████| 12/12 [00:03<00:00,  3.34it/s]


/home/marta/jku/activity_fair//test/lastfm_harm10.tsv


15 Feb 15:27    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/lastfm_harm10
checkpoint_dir = /home/marta/jku/activity_fair/saved/lastfm_harm10
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
valid

phases: 3	 datasets: 3


15 Feb 15:27    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/lastfm_harm10/lastfm_harm10-for-ItemKNN-dataloader.pth]
15 Feb 15:27    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:27    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]
1it [00:00, 13.72it/s]


<class 'pandas.core.frame.DataFrame'>
Int64Index: 16806 entries, 0 to 36971
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   user    16806 non-null  int64 
 1   item    16806 non-null  object
 2   rank    16806 non-null  int64 
dtypes: int64(2), object(1)
memory usage: 525.2+ KB
None


  0%|                                                    | 0/12 [00:00<?, ?it/s]

best_model.pth


15 Feb 15:28    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /home/marta/jku/activity_fair/datasets/filtered_datasets/lastfm_harm10
checkpoint_dir = /home/marta/jku/activity_fair/saved/lastfm_harm10
show_progress = True
save_dataset = True
dataset_save_path = /home/marta/jku/activity_fair/recbole_dataset/
save_dataloaders = True
dataloaders_save_path = /home/marta/jku/activity_fair/recbole_dataloader/
log_wandb = False

Training Hyper Parameters:
epochs = 300
train_batch_size = 1024
learner = adam
learning_rate = 0.001
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}
repeatable = False
metrics = ['NDCG']
topk = [10]
valid

phases: 3	 datasets: 3


15 Feb 15:28    INFO  Saving split dataloaders into: [/home/marta/jku/activity_fair/saved/lastfm_harm10/lastfm_harm10-for-ItemKNN-dataloader.pth]
15 Feb 15:28    INFO  [Training]: train_batch_size = [1024] train_neg_sample_args: [{'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}]
15 Feb 15:28    INFO  [Evaluation]: eval_batch_size = [1024] eval_args: [{'split': {'RS': [0.6, 0.2, 0.2]}, 'group_by': 'none', 'order': 'RO', 'mode': 'full'}]

0it [00:00, ?it/s]
1it [00:00,  7.13it/s]
2it [00:00,  8.11it/s]
100%|███████████████████████████████████████████| 12/12 [00:03<00:00,  3.60it/s]
