In [None]:
!pip install recbole==0.2.1

In [None]:
!mkdir /content/x && git clone https://github.com/RecoHut-Stanzas/RGRecSys.git /content/x
%cd /content/x

Cloning into '/content/x'...
remote: Enumerating objects: 25, done.[K
remote: Counting objects: 100% (25/25), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 25 (delta 6), reused 24 (delta 5), pack-reused 0[K
Unpacking objects: 100% (25/25), done.
/content/x


In [None]:
!mkdir -p saved

In [None]:
import copy
import pandas as pd
from sklearn.utils import shuffle as sk_shuffle
from collections.abc import Iterable
from collections import Counter
import random
import numpy as np
import torch
from logging import getLogger, shutdown
import importlib
import pprint as pprint
import pickle

from recbole.data.dataset import Dataset
from recbole.data.utils import get_data_loader
from recbole.data import save_split_dataloaders
from recbole.utils.utils import set_color, ModelType, init_seed
from recbole.utils.enum_type import FeatureType
from recbole.utils import ModelType, init_logger, get_model, get_trainer, init_seed, InputType
from recbole.utils.utils import set_color
from recbole.config import Config, EvalSetting
from recbole.sampler import Sampler, RepeatableSampler, KGSampler

In [None]:
LOG_DIR = "./log"
RESULTS_DIR = "./results"
SAVED_DIR = "./saved"
DATASETS_DIR = ".datasets/"

GENERAL_MODELS = ["Pop", "ItemKNN", "BPR", "NeuMF", "ConvNCF", "DMF", "FISM", "NAIS", "SpectralCF", "GCMC",
                  "NGCF", "LightGCN", "DGCF", "LINE", "MultiVAE", "MultiDAE", "MacridVAE", "CDAE", "ENMF",
                  "NNCF", "RaCT", "RecVAE", "EASE", "SLIMElastic"]

CONTEXT_MODELS = ["LR", "FM", "NFM", "DeepFM", "xDeepFM", "AFM", "FFM", "FwFM", "FNN", "PNN", "DSSM", "WideDeep",
                  "DCN", "AutoInt"]

KNOWLEDGE_MODELS = ["CKE", "CFKG", "KTUP", "KGAT", "RippleNet", "MKR", "KGCN", "KGNNLS"]

SEQUENTIAL_MODELS = ["FPMC", "GRU4REC", "NARM", "STAMP", "Caser", "NextItNet", "TransRec", "SASRec", "BERT4Rec",
                     "SRGNN", "GCSAN", "GRU4RecF", "SASRecF", "FDSA", "S3Rec", "GRU4RecKG", "KSR", "FOSSIL",
                     "SHAN", "RepeatNet", "HGN", "HRM", "NPE"]


In [None]:
class RobustnessGymDataset(Dataset):
    def __init__(self, config):
        super().__init__(config)

    def copy(self, new_inter_feat):
        nxt = copy.deepcopy(self)
        nxt.inter_feat = new_inter_feat
        return nxt

    def _data_filtering(self):
        """
        Filters data by removing nans, removing duplications,
        updating interaction if nans/duplications removed,
        and resetting index.
        """
        self._filter_nan_user_or_item()
        self._remove_duplication()
        self._filter_inter_by_user_or_item()
        self._reset_index()

    def split_by_ratio(self, ratios, group_by=None):
        self.logger.debug(f'split by ratios [{ratios}], group_by=[{group_by}]')
        tot_ratio = sum(ratios)
        ratios = [_ / tot_ratio for _ in ratios]

        if group_by is None:
            tot_cnt = self.__len__()
            split_ids = self._calcu_split_ids(tot=tot_cnt, ratios=ratios)
            next_index = [range(start, end) for start, end in zip([0] + split_ids, split_ids + [tot_cnt])]
        else:
            grouped_inter_feat_index = self._grouped_index(self.inter_feat[group_by].to_numpy())
            next_index = [[] for _ in range(len(ratios))]
            for grouped_index in grouped_inter_feat_index:
                tot_cnt = len(grouped_index)
                split_ids = self._calcu_split_ids(tot=tot_cnt, ratios=ratios)
                for index, start, end in zip(next_index, [0] + split_ids, split_ids + [tot_cnt]):
                    index.extend(grouped_index[start:end])

        self._drop_unused_col()
        next_df = [self.inter_feat.iloc[index] for index in next_index]
        next_ds = [self.copy(_) for _ in next_df]
        return next_ds

    def leave_one_out(self, group_by, leave_one_num=1):
        self.logger.debug(f'leave one out, group_by=[{group_by}], leave_one_num=[{leave_one_num}]')
        if group_by is None:
            raise ValueError('leave one out strategy require a group field')

        grouped_inter_feat_index = self._grouped_index(self.inter_feat[group_by].numpy())
        next_index = self._split_index_by_leave_one_out(grouped_inter_feat_index, leave_one_num)

        self._drop_unused_col()
        next_df = [self.inter_feat.iloc[index] for index in next_index]
        next_ds = [self.copy(_) for _ in next_df]
        return next_ds

    def _transform_by_field_value_random(self):
        """
        Transforms x% of feature/field values by removing the current value and
        replacing with random value selected from set of all possible values.

        Returns:

        """
        transform_percents = self.config['transform_val']
        if transform_percents is None:
            return []

        self.logger.debug(set_color('transform_by_field_value', 'blue') + f': val={transform_percents}')
        for field in transform_percents:
            if field not in self.field2type:
                raise ValueError(f'Field [{field}] not defined in dataset.')
            for feat_name in self.feat_name_list:
                feat = getattr(self, feat_name)
                if field in feat:
                    # gather all possible field values
                    field_values = []
                    for index, row in feat.iterrows():
                        if not isinstance(row[field], Iterable) and row[field] != 0 and row[field] not in field_values:
                            field_values.append(row[field])
                        elif isinstance(row[field], Iterable) and len(row[field]) != 0:
                            for i in row[field]:
                                if i not in field_values:
                                    field_values.append(i)
                    random_indices = random.sample(range(1, len(feat) - 1),
                                                   round(transform_percents[field] * len(feat) - 1))
                    for i in random_indices:
                        field_value_choices = field_values[:]
                        if not isinstance(feat.iloc[i, feat.columns.get_loc(field)], Iterable):
                            # remove current value and replace with another chosen at random
                            field_value_choices.remove(feat.iloc[i, feat.columns.get_loc(field)])
                            feat.iloc[i, feat.columns.get_loc(field)] = random.choice(field_value_choices)
                        elif isinstance(feat.iloc[i, feat.columns.get_loc(field)], Iterable):
                            for j in feat.iloc[i, feat.columns.get_loc(field)]:
                                field_value_choices.remove(j)
                            # remove iterable and replace with ONE randomly chosen value
                            feat.iloc[i, feat.columns.get_loc(field)] = np.array([[random.choice(field_value_choices)]])
        return field_values

    def _transform_by_field_value_structured(self):
        """
        Transforms field/feature in structured manner.

        (1) If feature value is a single value (float, int), then the value is replaced with a value within x% of the
        current value. For example, age = 30, x = 10% --> may be replaced with age = 32.
        (2) If feature value is an iterable (list, numpy array), then x% of the values are dropped.
        For example, genre = [Horror, Drama, Romance], x = 33% --> may be replaced with genre = [Horror, Romance]
        """
        transform_percents = self.config['DropeFraction_or_variance_transform_val']

        if transform_percents is None:
            return []
        self.logger.debug(set_color('_transform_by_field_value', 'blue') + f': val={transform_percents}')

        for field in transform_percents:
            if field not in self.field2type:
                raise ValueError(f'Field [{field}] not defined in dataset.')
            for feat_name in self.feat_name_list:
                feat = getattr(self, feat_name)
                if field in feat:
                    random_indices = random.sample(range(1, len(feat) - 1),
                                                   round(transform_percents[field] * len(feat) - 1))
                    for i in random_indices:
                        if not isinstance(feat.iloc[i, feat.columns.get_loc(field)], Iterable):
                            # replaces current value with random integer within x% of current value
                            random_value = random.randint(
                                round((1 - transform_percents[field]) * feat.iloc[i, feat.columns.get_loc(field)]),
                                round((1 + transform_percents[field]) * feat.iloc[i, feat.columns.get_loc(field)]))
                            feat.iloc[i, feat.columns.get_loc(field)] = random_value
                        elif isinstance(feat.iloc[i, feat.columns.get_loc(field)], Iterable) and len(
                                feat.iloc[i, feat.columns.get_loc(field)]) > 1:
                            # randomly sample x% from iterable/list and remove them
                            dropped_values = random.sample(list(feat.iloc[i, feat.columns.get_loc(field)]),
                                                           round(transform_percents[field] *
                                                                 len(feat.iloc[i, feat.columns.get_loc(field)])))
                            for item in dropped_values:
                                feat.iat[i, feat.columns.get_loc(field)] = np.array(
                                    feat.iloc[i, feat.columns.get_loc(field)][
                                        feat.iloc[i, feat.columns.get_loc(field)] != item])

    def _transform_by_field_value_delete_feat(self):
        """
        Transforms field by "deleting" x% of feature values. Since the feature value cannot be truly deleted,
        we instead remove x% of feature values and replace with the average value of the feature.
        """
        delete_percent = self.config['DeleteFraction_transform_val']
        if delete_percent is None:
            return []

        self.logger.debug(set_color('_transform_by_field_value', 'blue') + f': val={delete_percent}')
        for field in delete_percent:
            if field not in self.field2type:
                raise ValueError(f'Field [{field}] not defined in dataset.')
            value_list = []
            for feat_name in self.feat_name_list:
                feat = getattr(self, feat_name)
                if field in feat:
                    # compute average value of feature/field
                    for i in range(len(feat)):
                        value_list.append(feat.iloc[i, feat.columns.get_loc(field)])
                    avg_value = np.mean(value_list)

            for feat_name in self.feat_name_list:
                feat = getattr(self, feat_name)
                if field in feat:
                    random_indices = random.sample(range(1, len(feat) - 1),
                                                   round(delete_percent[field] * len(feat) - 1))
                    for i in random_indices:
                        if not isinstance(feat.iloc[i, feat.columns.get_loc(field)], Iterable):
                            # replace with average value of feature
                            feat.iloc[i, feat.columns.get_loc(field)] = avg_value

    def _make_data_more_sparse(self):
        val1 = self.config['selected_user_spars_data']
        val2 = self.config['fraction_spars_data']
        user_D = {}
        item_D = {}

        for line in range(len(self.inter_feat)):
            user_id = self.inter_feat.iloc[line]["user_id"]
            item_id = self.inter_feat.iloc[line]["item_id"]

            if user_id not in user_D:
                user_D[user_id] = []
            user_D[user_id].append(item_id)
            if item_id not in item_D:
                item_D[item_id] = []
            item_D[item_id].append(user_id)

        for user_id in user_D:
            if len(user_D[user_id]) > val1:
                selected_item_id = random.sample(user_D[user_id], round(val2 * len(user_D[user_id])))
                for item in selected_item_id:
                    self.inter_feat.drop(self.inter_feat.loc[self.inter_feat['user_id'] == user_id].loc[
                                             self.inter_feat['item_id'] == item].index, inplace=True)

    def _transform_interactions_random(self):
        transform_fraction = self.config['transform_inter']
        if transform_fraction is None:
            return []

        random_rating = 0
        possible_values = [0.0, 1.0]
        random_rows = random.sample(list(self.inter_feat.index), round(transform_fraction * len(self.inter_feat)))
        for index in random_rows:
            if self.config['MODEL_TYPE'] == ModelType.GENERAL or self.config['MODEL_TYPE'] == ModelType.TRADITIONAL:
                transform_col = "rating"
                get_random_rating = True
                while get_random_rating:
                    random_rating = round(random.uniform(possible_values[0], possible_values[1]), 2)
                    if random_rating != self.inter_feat[transform_col].loc[index]:
                        get_random_rating = False
                self.inter_feat[transform_col].loc[index] = random_rating
            if self.config['MODEL_TYPE'] == ModelType.CONTEXT:
                transform_col = "label"
                if self.inter_feat[transform_col].loc[index] == 1.0:
                    self.inter_feat[transform_col].loc[index] = 0.0
                else:
                    self.inter_feat[transform_col].loc[index] = 1.0

    @staticmethod
    def _get_user_or_item_subset(feat_file, field, val_list):
        return {val: list(feat_file[feat_file[field] == val]) for val in val_list}

    def create_distribution(self):
        dist_shift = self.config['distribution_shift']
        if dist_shift is None:
            return []

        for field in dist_shift:
            distribution_dict = dist_shift[field]
            # supports distribution dict of size 2 only
            assert (len(distribution_dict) == 2)
            if field not in self.field2type:
                raise ValueError(f'Field [{field}] not defined in dataset.')
            if sum(list(distribution_dict.values())) != 1:
                raise ValueError(f'Distribution needs to add up to 1.')
            if self.field2type[field] not in {FeatureType.TOKEN}:
                raise ValueError(f'Currently only works for Token types.')
            for feat_name in self.feat_name_list:
                feat = getattr(self, feat_name)
                if field in feat:
                    user_val_dict = {}
                    user_val_counts = {}
                    user_val_original_proportions = {}
                    unique_vals = list(feat[field].unique())
                    for val in unique_vals:
                        user_val_dict[val] = list(feat[feat[field] == val][self.uid_field])
                        user_val_counts[val] = len(
                            [i for i in self.inter_feat[self.uid_field] if i in user_val_dict[val]])
                    for val, proportion in distribution_dict.items():
                        if val != 0.0:
                            token_val = self.field2token_id[field][val]
                            user_val_original_proportions[val] = user_val_counts[token_val] / len(self.inter_feat)
                    no_change_val = 0
                    no_change_quantity = 0
                    for val, proportion in distribution_dict.items():
                        token_val = self.field2token_id[field][val]
                        if proportion >= user_val_original_proportions[val]:
                            no_change_val = val
                            no_change_new_proportion = proportion
                            no_change_quantity = user_val_counts[token_val]
                    num_new_test = int(no_change_quantity / no_change_new_proportion)
                    num_other_class = num_new_test - no_change_quantity
                    for val, proportion in distribution_dict.items():
                        token_val = self.field2token_id[field][val]
                        if val != no_change_val:
                            original_val = user_val_counts[token_val]
                            drop_indices = np.random.choice(
                                self.inter_feat.index[self.inter_feat[self.uid_field].isin(user_val_dict[token_val])],
                                original_val - num_other_class, replace=False)
                            self.inter_feat = self.inter_feat.drop(drop_indices)
                            new_quantity = len(
                                [i for i in self.inter_feat[self.uid_field] if i in user_val_dict[token_val]])

    @staticmethod
    def create_distribution_slice(train, test):
        print("Preparing distributional test slice.")
        train.get_training_distribution_statistics()
        slice_test = copy.deepcopy(test)
        slice_test.create_distribution()
        # slice_test.get_training_distribution_statistics()
        # slice_test._filter_inter_by_user_or_item()
        slice_test._reset_index()
        slice_test._user_item_feat_preparation()
        return slice_test

    def get_training_distribution_statistics(self):
        dist_slice = self.config['distribution_shift']
        if dist_slice is None:
            print("No Training Stats Computed")
            return []

        for field in dist_slice:
            user_dict = {}
            for feat_name in self.feat_name_list:
                feat = getattr(self, feat_name)
                if field in feat:
                    unique_vals = list(feat[field].unique())
                    for val in unique_vals:
                        user_dict[val] = list(feat[feat[field] == val][self.uid_field])
            dist = {}
            for val in user_dict:
                if val != 0.0:
                    dist[val] = len(self.inter_feat[self.inter_feat[self.uid_field].isin(user_dict[val])])
            print("Training Distribution:")
            for val in user_dict:
                if val != 0.0:
                    print("Val: ", self.field2id_token[field][int(val)], "Percent: ",
                          dist[val] / sum(list(dist.values())))

    def get_attack_statistics(self, train):
        print("Interaction Transformation Robustness Test Summary")

    def get_distribution_shift_statistics(self, train, test):
        print("Distribution Shift Robustness Test Summary")

    def get_transformation_statistics(self, test):
        # TODO: improve printed information
        print("Transformation of Features Robustness Test Summary")
        print("Original Test Size: ", len(test.inter_feat))
        print("Original Test Users: ", len(test.inter_feat[self.uid_field].unique()))
        print("Original Test Features Distribution")

        print("Transformed Test Size: ", len(self.inter_feat))
        print("Transformed Test Users: ", len(self.inter_feat[self.uid_field].unique()))
        print("Transformed Test Features Distribution")

    def get_sparsity_statistics(self, train):
        print("Sparsity Robustness Test Summary")
        print("Original Train Size: ", len(train.inter_feat))
        print("Original Train Users: ", len(train.inter_feat[self.uid_field].unique()))
        print("Sparsified Train Size: ", len(self.inter_feat))
        print("Sparsified Train Users: ", len(self.inter_feat[self.uid_field].unique()))

    @staticmethod
    def create_transformed_test(test):
        print("Preparing test set transformation.")
        transformed_test = copy.deepcopy(test)
        transformed_test.read_transform_features()
        transformed_test._transform_by_field_value_random()
        transformed_test._transform_by_field_value_structured()
        transformed_test._transform_by_field_value_delete_feat()
        transformed_test.get_transformation_statistics(test)
        return transformed_test

    @staticmethod
    def create_transformed_train(train):
        print("Preparing training set transformation.")
        transformed_train = copy.deepcopy(train)
        transformed_train.read_transform_interactions()
        transformed_train._transform_interactions_random()
        transformed_train.get_attack_statistics(train)
        return transformed_train

    def read_transform_interactions(self):
        transform_config = self.config.final_config_dict["transform_interactions"]

        if transform_config is None:
            print("No transformation configs.")
            return None

        if "fraction_transformed" in transform_config:
            self.config.final_config_dict["transform_inter"] = transform_config["fraction_transformed"]
        else:
            print("No transformation percent specified.")
            return None

    def read_sparsify(self):
        sparsify_config = self.config.final_config_dict["sparsify"]

        if sparsify_config is None:
            print("No sparsity configs.")
            return None

        if "min_user_inter" in sparsify_config:
            min_val = sparsify_config["min_user_inter"]
            self.config.final_config_dict['selected_user_spars_data'] = min_val
        else:
            self.config.final_config_dict['selected_user_spars_data'] = 0

        if "fraction_removed" in sparsify_config:
            fraction = sparsify_config["fraction_removed"]
            self.config.final_config_dict["fraction_spars_data"] = fraction
        else:
            print("No sparsity fraction specified.")
            return None

    @staticmethod
    def create_sparse_train(train):
        print("Preparing sparsified training data set.")
        sparse_train = copy.deepcopy(train)
        sparse_train.read_sparsify()
        sparse_train._make_data_more_sparse()
        sparse_train.get_sparsity_statistics(train)
        return sparse_train

    def _filter_by_inter_num(self, train):
        ban_users = self._get_illegal_ids_by_inter_num(dataset=train, field=self.uid_field, feat=self.user_feat,
                                                       max_num=self.config['max_user_inter_num'],
                                                       min_num=self.config['min_user_inter_num'])
        ban_items = self._get_illegal_ids_by_inter_num(dataset=train, field=self.iid_field, feat=self.item_feat,
                                                       max_num=self.config['max_item_inter_num'],
                                                       min_num=self.config['min_item_inter_num'])

        if len(ban_users) == 0 and len(ban_items) == 0:
            return

        if self.user_feat is not None:
            dropped_user = self.user_feat[self.uid_field].isin(ban_users)
            self.user_feat.drop(self.user_feat.index[dropped_user], inplace=True)

        if self.item_feat is not None:
            dropped_item = self.item_feat[self.iid_field].isin(ban_items)
            self.item_feat.drop(self.item_feat.index[dropped_item], inplace=True)

        dropped_inter = pd.Series(False, index=self.inter_feat.index)
        if self.uid_field:
            dropped_inter |= self.inter_feat[self.uid_field].isin(ban_users)
        if self.iid_field:
            dropped_inter |= self.inter_feat[self.iid_field].isin(ban_items)
        self.logger.debug('[{}] dropped interactions'.format(len(dropped_inter)))
        self.inter_feat.drop(self.inter_feat.index[dropped_inter], inplace=True)

    def _get_illegal_ids_by_inter_num(self, dataset, field, feat, max_num=None, min_num=None):
        """
        Overloaded from RecBole. This version uses *train* interactions for slicing.
        """
        self.logger.debug('\n get_illegal_ids_by_inter_num:\n\t field=[{}], max_num=[{}], min_num=[{}]'.format(
            field, max_num, min_num
        ))

        if field is None:
            return set()
        if max_num is None and min_num is None:
            return set()

        max_num = max_num or np.inf
        min_num = min_num or -1

        ids = dataset[field].values
        inter_num = Counter(ids)
        ids = {id_ for id_ in inter_num if inter_num[id_] < min_num or inter_num[id_] > max_num}

        if feat is not None:
            for id_ in feat[field].values:
                if inter_num[id_] < min_num:
                    ids.add(id_)
        self.logger.debug('[{}] illegal_ids_by_inter_num, field=[{}]'.format(len(ids), field))
        return ids

    def _drop_by_value(self, val, cmp):
        """
        Overloaded _drop_by_value function from RecBole Dataset base class.
        Here we enable filtering for any field type (not just floats). We also
        enable dropping of categorical features. This function is called by
        _filter_by_field_value() in RecBole.

        Args:
            val (dict):
            cmp (Callable):

        Returns:
            filter_field (list): field names used in comparison.

        """

        if val is None:
            return []

        self.logger.debug(set_color('drop_by_value', 'blue') + f': val={val}')
        filter_field = []
        for field in val:
            if field not in self.field2type:
                raise ValueError(f'Field [{field}] not defined in dataset.')
            for feat_name in self.feat_name_list:
                feat = getattr(self, feat_name)
                if field in feat:
                    if self.field2type[field] == FeatureType.TOKEN_SEQ:
                        raise NotImplementedError
                    if self.field2type[field] == FeatureType.TOKEN:
                        # tokens are mapped to new values by __init__()
                        if isinstance(val[field], str):
                            feat.drop(feat.index[cmp(feat[field].values, self.field2token_id[field][val[field]])],
                                      inplace=True)
                        else:
                            def convert_to_orig_val(x):
                                if int(x) == 0:
                                    return 0.0
                                else:
                                    try:
                                        return float(self.field2id_token[field][int(x)])
                                    except:
                                        return 0.0

                            original_tokens = np.array([convert_to_orig_val(i) for i in feat[field].values])
                            feat.drop(feat.index[cmp(original_tokens, float(val[field]))], inplace=True)
                    if self.field2type[field] in {FeatureType.FLOAT, FeatureType.FLOAT_SEQ}:
                        feat.drop(feat.index[cmp(feat[field].values, val[field])], inplace=True)
            filter_field.append(field)
        return filter_field

    def get_slice_statistics(self, test):
        print("Slice Robustness Test Summary")
        print("Original Test Size: ", len(test.inter_feat))
        print("Original Test Users: ", len(test.inter_feat[self.uid_field].unique()))
        print("Subpopulation Size: ", len(self.inter_feat))
        print("Subpopulation Users: ", len(self.inter_feat[self.uid_field].unique()))

    def create_slice(self, test, train):
        slice_config = self.config.final_config_dict["slice"]
        slice_test = copy.deepcopy(test)
        print("Preparing subpopulation of Test set.")
        if "by_feature" in slice_config:
            slice_test = self.create_slice_by_feature(slice_test)
        if "by_inter" in slice_config:
            slice_test = self.create_slice_by_inter(slice_test, train)
        slice_test._reset_index()
        slice_test._user_item_feat_preparation()
        slice_test.get_slice_statistics(test)
        return slice_test

    def create_slice_by_inter(self, slice_test, train):
        print("Preparing test set slice based on training set interactions.")
        slice_test.read_slice_by_inter()
        slice_test._filter_by_inter_num(train)
        return slice_test

    def read_slice_by_inter(self):
        feature_config = self.config.final_config_dict["slice"]["by_inter"]

        if feature_config is None:
            print("No interaction subset specified.")
            return None

        if "user" in feature_config:
            user_inter = feature_config["user"]
            assert (type(user_inter) == dict)
            if "min" in user_inter:
                min_val = user_inter["min"]
                self.config.final_config_dict["min_user_inter_num"] = min_val
            if "max" in user_inter:
                max_val = user_inter["max"]
                self.config.final_config_dict["max_user_inter_num"] = max_val
        if "item" in feature_config:
            item_inter = feature_config["item"]
            assert (type(item_inter) == dict)
            if "min" in item_inter:
                min_val = item_inter["min"]
                self.config.final_config_dict["min_item_inter_num"] = min_val
            if "max" in item_inter:
                max_val = item_inter["max"]
                self.config.final_config_dict["max_item_inter_num"] = max_val

    def create_slice_by_feature(self, slice_test):
        print("Preparing test set slice based on feature values.")
        slice_test.read_slice_by_feature()
        slice_test._filter_by_field_value()
        slice_test._filter_inter_by_user_or_item()
        return slice_test

    def read_slice_by_feature(self):
        feature_config = self.config.final_config_dict["slice"]["by_feature"]

        if feature_config is None:
            print("No feature values specified.")
            return None

        for field in feature_config:
            for feat_name in self.feat_name_list:
                feat = getattr(self, feat_name)
                if field in feat:
                    if field not in self.field2type:
                        raise ValueError(f'Field [{field}] not defined in dataset.')
                    slice_specs = feature_config[field]
                    if type(slice_specs) == dict:
                        if "min" in slice_specs:
                            min_dict = {field: slice_specs["min"]}
                            if self.config.final_config_dict["lowest_val"] is None:
                                self.config.final_config_dict["lowest_val"] = min_dict
                            else:
                                self.config.final_config_dict["lowest_val"].update(min_dict)
                        if "max" in slice_specs:
                            max_dict = {field: slice_specs["max"]}
                            if self.config.final_config_dict["highest_val"] is None:
                                self.config.final_config_dict["highest_val"] = max_dict
                            else:
                                self.config.final_config_dict["highest_val"].update(max_dict)
                        if "equal" in slice_specs:
                            equal_dict = {field: slice_specs["equal"]}
                            if self.config.final_config_dict["equal_val"] is None:
                                self.config.final_config_dict["equal_val"] = equal_dict
                            else:
                                self.config.final_config_dict["equal_val"].update(equal_dict)
                    else:
                        print("Incorrect config format.")
                        return None

    def read_transform_features(self):
        feature_config = self.config.final_config_dict["transform_features"]

        if feature_config is None:
            print("No feature transformation specified.")
            return None

        if "structured" in feature_config:
            self.config.final_config_dict['DropeFraction_or_variance_transform_val'] = {}
            for field in feature_config["structured"]:
                percent = feature_config["structured"][field]
                self.config.final_config_dict['DropeFraction_or_variance_transform_val'].update({field: percent})
        elif "random" in feature_config:
            self.config.final_config_dict['transform_val'] = {}
            for field in feature_config["random"]:
                percent = feature_config["random"][field]
                self.config.final_config_dict['transform_val'].update({field: percent})
        else:
            print("Transformation of features incorrectly specified.")
            return None

    def create_robustness_datasets(self, train, valid, test):
        final_config = self.config.final_config_dict
        robustness_testing_datasets = {}

        if "slice" in final_config:
            robustness_testing_datasets["slice"] = self.create_slice(test, train)

        if "sparsify" in final_config:
            robustness_testing_datasets["sparsity"] = self.create_sparse_train(train)

        if "transform_features" in final_config:
            robustness_testing_datasets['transformation_test'] = self.create_transformed_test(test)

        if "transform_interactions" in final_config:
            robustness_testing_datasets['transformation_train'] = self.create_transformed_train(train)

        if "distribution_shift" in final_config:
            robustness_testing_datasets['distributional_slice'] = self.create_distribution_slice(train, test)

        return robustness_testing_datasets

    def build(self, eval_setting):
        """
        Overloads RecBole build. Our version builds train, valid, test
        and modified versions of train, valid, test as needed according to the
        robustness tests requested in the robustness_dict.
        Args:
            eval_setting (EvalSetting):

        Returns:
            original_datasets (list): list containing original train, valid, test datasets
            robustness_testing_datasets (dict): {robustness test name: modified dataset} key value pairs

        """
        if self.benchmark_filename_list is not None:
            raise NotImplementedError()

        ordering_args = eval_setting.ordering_args
        if ordering_args['strategy'] == 'shuffle':
            self.inter_feat = sk_shuffle(self.inter_feat)
            self.inter_feat = self.inter_feat.reset_index(drop=True)
        elif ordering_args['strategy'] == 'by':
            raise NotImplementedError()

        group_field = eval_setting.group_field
        split_args = eval_setting.split_args

        if split_args['strategy'] == 'by_ratio':
            original_datasets = self.split_by_ratio(split_args['ratios'], group_by=group_field)
        elif split_args['strategy'] == 'by_value':
            raise NotImplementedError()
        elif split_args['strategy'] == 'loo':
            original_datasets = self.leave_one_out(group_by=group_field, leave_one_num=split_args['leave_one_num'])
        else:
            original_datasets = self

        train, valid, test = original_datasets
        robustness_testing_datasets = self.create_robustness_datasets(train, valid, test)

        for data in list(robustness_testing_datasets.values()) + original_datasets:
            if data is not None:
                data.inter_feat = data.inter_feat.reset_index(drop=True)
                data._change_feat_format()
                if ordering_args['strategy'] == 'shuffle':
                    torch.manual_seed(self.config['seed'])
                    data.shuffle()
                elif ordering_args['strategy'] == 'by':
                    data.sort(by=ordering_args['field'], ascending=ordering_args['ascending'])

        return original_datasets, robustness_testing_datasets

In [None]:
config = Config(model="DCN", dataset="ml-100k",
                config_dict = {
                    'distribution_shift': {'gender': {"M": .9, "F": .1}},
                    'slice': {'by_feature': {'age': {'min': 40}}},
                    'sparsify': {'fraction_removed': 0.05},
                    # 'transform_features': {'fraction_removed': {'occupation': 0.2}},
                    'transform_interactions': {'fraction_transformed': 0.2},
                    })
init_seed(config['seed'], config['reproducibility'])
data = RobustnessGymDataset(config)
datasets, robust_dict = data.build(EvalSetting(config))
print(robust_dict)

Preparing subpopulation of Test set.
Preparing test set slice based on feature values.
Slice Robustness Test Summary
Original Test Size:  10000
Original Test Users:  927
Subpopulation Size:  2637
Subpopulation Users:  286
Preparing sparsified training data set.
Sparsity Robustness Test Summary
Original Train Size:  80000
Original Train Users:  943
Sparsified Train Size:  75994
Sparsified Train Users:  943
Preparing training set transformation.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Interaction Transformation Robustness Test Summary
Preparing distributional test slice.
Training Distribution:
Val:  M Percent:  0.74395
Val:  F Percent:  0.25605
{'slice': [1;35mml-100k[0m
[1;34mThe number of users[0m: 944
[1;34mAverage actions of users[0m: 9.22027972027972
[1;34mThe number of items[0m: 1683
[1;34mAverage actions of items[0m: 3.124407582938389
[1;34mThe number of inters[0m: 2637
[1;34mThe sparsity of the dataset[0m: 99.8340206652769%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'timestamp', 'age', 'gender', 'occupation', 'release_year', 'class', 'label'], 'sparsity': [1;35mml-100k[0m
[1;34mThe number of users[0m: 944
[1;34mAverage actions of users[0m: 80.58748674443267
[1;34mThe number of items[0m: 1683
[1;34mAverage actions of items[0m: 46.14086217364906
[1;34mThe number of inters[0m: 75994
[1;34mThe sparsity of the dataset[0m: 95.21674874366799%
[1;34mRemain Fields[0m: ['user_id', 'item_id', 'timestamp', 'age', 'gender', 'occupation

In [None]:
def create_dataset(config):
    """
    Initializes RobustnessGymDataset for each recommendation system type in RecBole.
    Args:
        config (Config): Config file indicating MODEL_TYPE and model.

    Returns:
        RobustnessGymDataset instance.
    """
    dataset_module = importlib.import_module('recbole.data.dataset')
    if hasattr(dataset_module, config['model'] + 'Dataset'):
        return getattr(dataset_module, config['model'] + 'Dataset')(config)
    else:
        model_type = config['MODEL_TYPE']
        if model_type == ModelType.SEQUENTIAL:
            from recbole.data.dataset import SequentialDataset
            SequentialDataset.__bases__ = (RobustnessGymDataset,)
            return SequentialDataset(config)
        elif model_type == ModelType.KNOWLEDGE:
            from recbole.data.dataset import KnowledgeBasedDataset
            KnowledgeBasedDataset.__bases__ = (RobustnessGymDataset,)
            return KnowledgeBasedDataset(config)
        elif model_type == ModelType.SOCIAL:
            from recbole.data.dataset import SocialDataset
            SocialDataset.__bases__ = (RobustnessGymDataset,)
            return SocialDataset(config)
        elif model_type == ModelType.DECISIONTREE:
            from recbole.data.dataset import DecisionTreeDataset
            DecisionTreeDataset.__bases__ = (RobustnessGymDataset,)
            return DecisionTreeDataset(config)
        else:
            return RobustnessGymDataset(config)


def get_transformed_train(config, train_kwargs, train_dataloader, robustness_testing_datasets):
    """
    Converts training data set created by transformations into dataloader object. Uses same config
    settings as original training data.

    Args:
        train_kwargs (dict): Training dataset config
        train_dataloader (Dataloader): Training dataloader
        config (Config): General config
        robustness_testing_datasets (dict): Modified datasets resulting from robustness tests

    Returns:
        transformed_train (Dataloader)
    """
    transformed_train = None
    if "transformation_train" in robustness_testing_datasets:
        transformation_kwargs = {
            'config': config,
            'dataset': robustness_testing_datasets['transformation_train'],
            'batch_size': config['train_batch_size'],
            'dl_format': config['MODEL_INPUT_TYPE'],
            'shuffle': True,
        }
        try:
            transformation_kwargs['sampler'] = train_kwargs['sampler']
            transformation_kwargs['neg_sample_args'] = train_kwargs['neg_sample_args']
            transformed_train = train_dataloader(**transformation_kwargs)
        except:
            transformed_train = train_dataloader(**transformation_kwargs)

    return transformed_train


def get_sparsity_train(config, train_kwargs, train_dataloader, robustness_testing_datasets):
    """
    Converts training data set created by sparsity into dataloader object. Uses same config
    settings as original training data.

    Args:
        train_kwargs (dict): Training dataset config
        train_dataloader (Dataloader): Training dataloader
        config (Config): General config
        robustness_testing_datasets (dict): Modified datasets resulting from robustness tests

    Returns:
        sparsity_train (Dataloader)

    """
    sparsity_train = None
    if "sparsity" in robustness_testing_datasets:
        sparsity_kwargs = {
            'config': config,
            'dataset': robustness_testing_datasets['sparsity'],
            'batch_size': config['train_batch_size'],
            'dl_format': config['MODEL_INPUT_TYPE'],
            'shuffle': True,
        }
        try:
            sparsity_kwargs['sampler'] = train_kwargs['sampler']
            sparsity_kwargs['neg_sample_args'] = train_kwargs['neg_sample_args']
            sparsity_train = train_dataloader(**sparsity_kwargs)
        except:
            sparsity_train = train_dataloader(**sparsity_kwargs)

    return sparsity_train


def get_distributional_slice_test(eval_kwargs, test_kwargs, test_dataloader, robustness_testing_datasets):
    """

    Args:
        test_dataloader:
        test_kwargs:
        eval_kwargs (dict):
        test_dataloader (Dataloader):
        robustness_testing_datasets (dict):

    Returns:

    """
    slice_test = None
    if 'distributional_slice' in robustness_testing_datasets:
        slice_kwargs = {'dataset': robustness_testing_datasets['distributional_slice']}
        if 'sampler' in test_kwargs:
            slice_kwargs['sampler'] = test_kwargs['sampler']
        slice_kwargs.update(eval_kwargs)
        slice_test = test_dataloader(**slice_kwargs)

    return slice_test


def get_slice_test(eval_kwargs, test_kwargs, test_dataloader, robustness_testing_datasets):
    """

    Args:
        test_dataloader:
        test_kwargs:
        eval_kwargs (dict):
        test_dataloader (Dataloader):
        robustness_testing_datasets (dict):

    Returns:

    """
    slice_test = None
    if 'slice' in robustness_testing_datasets:
        slice_kwargs = {'dataset': robustness_testing_datasets['slice']}
        if 'sampler' in test_kwargs:
            slice_kwargs['sampler'] = test_kwargs['sampler']
        slice_kwargs.update(eval_kwargs)
        slice_test = test_dataloader(**slice_kwargs)

    return slice_test


def get_transformation_test(eval_kwargs, test_kwargs, test_dataloader, robustness_testing_datasets):
    """

    Args:
        test_dataloader:
        test_kwargs:
        eval_kwargs (dict):
        test_dataloader (Dataloader):
        robustness_testing_datasets (dict):

    Returns:

    """
    transformation_test = None
    if 'transformation' in robustness_testing_datasets:
        transformation_kwargs = {'dataset': robustness_testing_datasets['transformation']}
        if 'sampler' in test_kwargs:
            transformation_kwargs['sampler'] = test_kwargs['sampler']
        transformation_kwargs.update(eval_kwargs)
        transformation_test = test_dataloader(**transformation_kwargs)

    return transformation_test


def data_preparation(config, dataset, save=False):
    """
    Builds datasets, including datasets built by applying robustness tests, configures train, validation, test
    sets, converts to tensors. Overloads RecBole data_preparation - we include the preparation of the robustness test
    train/test/valid sets here.

    Args:
        config (Config):
        dataset (RobustnessGymDataset):
        save (bool):

    Returns:

    """
    model_type = config['MODEL_TYPE']
    model = config['model']
    es = EvalSetting(config)

    original_datasets, robustness_testing_datasets = dataset.build(es)
    train_dataset, valid_dataset, test_dataset = original_datasets
    phases = ['train', 'valid', 'test']
    sampler = None
    logger = getLogger()
    train_neg_sample_args = config['train_neg_sample_args']
    eval_neg_sample_args = es.neg_sample_args

    # Training
    train_kwargs = {
        'config': config,
        'dataset': train_dataset,
        'batch_size': config['train_batch_size'],
        'dl_format': config['MODEL_INPUT_TYPE'],
        'shuffle': True,
    }

    if train_neg_sample_args['strategy'] != 'none':
        if dataset.label_field in dataset.inter_feat:
            raise ValueError(
                f'`training_neg_sample_num` should be 0 '
                f'if inter_feat have label_field [{dataset.label_field}].'
            )
        if model_type != ModelType.SEQUENTIAL:
            sampler = Sampler(phases, original_datasets, train_neg_sample_args['distribution'])
        else:
            sampler = RepeatableSampler(phases, dataset, train_neg_sample_args['distribution'])
        if model not in ["MultiVAE", "MultiDAE", "MacridVAE", "CDAE", "ENMF", "RaCT", "RecVAE"]:
            train_kwargs['sampler'] = sampler.set_phase('train')
            train_kwargs['neg_sample_args'] = train_neg_sample_args
        if model_type == ModelType.KNOWLEDGE:
            kg_sampler = KGSampler(dataset, train_neg_sample_args['distribution'])
            train_kwargs['kg_sampler'] = kg_sampler

    dataloader = get_data_loader('train', config, train_neg_sample_args)
    logger.info(
        set_color('Build', 'pink') + set_color(f' [{dataloader.__name__}]', 'yellow') + ' for ' +
        set_color('[train]', 'yellow') + ' with format ' + set_color(f'[{train_kwargs["dl_format"]}]', 'yellow')
    )
    if train_neg_sample_args['strategy'] != 'none':
        logger.info(
            set_color('[train]', 'pink') + set_color(' Negative Sampling', 'blue') + f': {train_neg_sample_args}'
        )
    else:
        logger.info(set_color('[train]', 'pink') + set_color(' No Negative Sampling', 'yellow'))
    logger.info(
        set_color('[train]', 'pink') + set_color(' batch_size', 'cyan') + ' = ' +
        set_color(f'[{train_kwargs["batch_size"]}]', 'yellow') + ', ' + set_color('shuffle', 'cyan') + ' = ' +
        set_color(f'[{train_kwargs["shuffle"]}]\n', 'yellow')
    )

    train_data = dataloader(**train_kwargs)
    transformed_train = get_transformed_train(config, train_kwargs, dataloader, robustness_testing_datasets)
    sparsity_train = get_sparsity_train(config, train_kwargs, dataloader, robustness_testing_datasets)

    # Evaluation
    eval_kwargs = {
        'config': config,
        'batch_size': config['eval_batch_size'],
        'dl_format': InputType.POINTWISE,
        'shuffle': False,
    }
    valid_kwargs = {'dataset': valid_dataset}
    test_kwargs = {'dataset': test_dataset}

    if eval_neg_sample_args['strategy'] != 'none':
        if dataset.label_field in dataset.inter_feat:
            raise ValueError(
                f'It can not validate with `{es.es_str[1]}` '
                f'when inter_feat have label_field [{dataset.label_field}].'
            )
        if sampler is None:
            if model_type != ModelType.SEQUENTIAL:
                sampler = Sampler(phases, original_datasets, eval_neg_sample_args['distribution'])
            else:
                sampler = RepeatableSampler(phases, dataset, eval_neg_sample_args['distribution'])
        else:
            sampler.set_distribution(eval_neg_sample_args['distribution'])
        eval_kwargs['neg_sample_args'] = eval_neg_sample_args
        valid_kwargs['sampler'] = sampler.set_phase('valid')
        test_kwargs['sampler'] = sampler.set_phase('test')

    valid_kwargs.update(eval_kwargs)
    test_kwargs.update(eval_kwargs)

    dataloader = get_data_loader('evaluation', config, eval_neg_sample_args)
    logger.info(
        set_color('Build', 'pink') + set_color(f' [{dataloader.__name__}]', 'yellow') + ' for ' +
        set_color('[evaluation]', 'yellow') + ' with format ' + set_color(f'[{eval_kwargs["dl_format"]}]', 'yellow')
    )
    logger.info(es)
    logger.info(
        set_color('[evaluation]', 'pink') + set_color(' batch_size', 'cyan') + ' = ' +
        set_color(f'[{eval_kwargs["batch_size"]}]', 'yellow') + ', ' + set_color('shuffle', 'cyan') + ' = ' +
        set_color(f'[{eval_kwargs["shuffle"]}]\n', 'yellow')
    )

    valid_data = dataloader(**valid_kwargs)
    test_data = dataloader(**test_kwargs)

    transformed_test = None
    if 'transformation_test' in robustness_testing_datasets:
        transformed_test_kwargs = test_kwargs
        transformed_test_kwargs['dataset'] = robustness_testing_datasets['transformation_test']
        transformed_test = dataloader(**transformed_test_kwargs)

    slice_test = get_slice_test(eval_kwargs, test_kwargs, dataloader, robustness_testing_datasets)
    distributional_slice_test = get_distributional_slice_test(eval_kwargs, test_kwargs, dataloader,
                                                              robustness_testing_datasets)

    if save:
        save_split_dataloaders(config, dataloaders=(train_data, valid_data, test_data))

    robustness_testing_data = {'slice': slice_test,
                               'distributional_slice': distributional_slice_test,
                               'transformation_train': transformed_train,
                               'transformation_test': transformed_test,
                               'sparsity': sparsity_train}

    return train_data, valid_data, test_data, robustness_testing_data


def get_config_dict(robustness_tests, base_config_dict):
    """
    Combines robustness_test and train_config_dict into a single config_dict.

    Args:
        robustness_tests (dict): robustness test config dict
        base_config_dict (dict): train/data/eval/model/hyperparam config dict

    Returns:
        config_dict (dict): config dict
    """
    config_dict = {}
    if robustness_tests is not None:
        if base_config_dict is not None:
            config_dict = {**robustness_tests, **base_config_dict}
        else:
            config_dict = robustness_tests
    else:
        if base_config_dict is not None:
            config_dict = base_config_dict
    return config_dict


def train_and_test(model, dataset, robustness_tests=None, base_config_dict=None, save_model=True):
    """
    Train a recommendation model and run robustness tests.
    Args:
        model (str): Name of model to be trained.
        dataset (str): Dataset name; must match the dataset's folder name located in 'data_path' path.
        base_config_dict: Configuration dictionary. If no config passed, takes default values.
        save_model (bool): Determines whether or not to externally save the model after training.
        robustness_tests (dict): Configuration dictionary for robustness tests.

    Returns:

    """

    config_dict = get_config_dict(robustness_tests, base_config_dict)
    config = Config(model=model, dataset=dataset, config_dict=config_dict)
    init_seed(config['seed'], config['reproducibility'])

    logger = getLogger()
    if len(logger.handlers) != 0:
        logger.removeHandler(logger.handlers[1])
    init_logger(config)

    logger.info(config)

    # dataset filtering
    dataset = create_dataset(config)
    logger.info(dataset)

    # dataset splitting
    train_data, valid_data, test_data, robustness_testing_data = data_preparation(config, dataset, save=True)

    for robustness_test in robustness_testing_data:
        if robustness_testing_data[robustness_test] is not None:
            logger.info(set_color('Robustness Test', 'yellow') + f': {robustness_test}')

    # model loading and initialization
    model = get_model(config['model'])(config, train_data).to(config['device'])
    logger.info(model)

    # trainer loading and initialization
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)

    # model training
    best_valid_score, best_valid_result = trainer.fit(
        train_data, valid_data, saved=save_model, show_progress=config['show_progress']
    )

    # model evaluation
    test_result = trainer.evaluate(test_data, load_best_model=save_model,
                                   show_progress=config['show_progress'])
    logger.info(set_color('best valid ', 'yellow') + f': {best_valid_result}')
    logger.info(set_color('test result', 'yellow') + f': {test_result}')

    test_result_transformation, test_result_sparsity, \
    test_result_slice, test_result_distributional_slice = None, None, None, None

    if robustness_testing_data['slice'] is not None:
        test_result_slice = trainer.evaluate(robustness_testing_data['slice'], load_best_model=save_model,
                                             show_progress=config['show_progress'])
        logger.info(set_color('test result for slice', 'yellow') + f': {test_result_slice}')

    if robustness_testing_data['distributional_slice'] is not None:
        test_result_distributional_slice = trainer.evaluate(robustness_testing_data['distributional_slice'],
                                                            load_best_model=save_model,
                                                            show_progress=config['show_progress'])
        logger.info(set_color('test result for distributional slice', 'yellow') + f': '
                                                                                  f'{test_result_distributional_slice}')

    if robustness_testing_data['transformation_test'] is not None:
        test_result_transformation = trainer.evaluate(robustness_testing_data['transformation_test'],
                                                      load_best_model=save_model,
                                                      show_progress=config['show_progress'])
        logger.info(set_color('test result for transformation on test', 'yellow') + f': {test_result_transformation}')

    if robustness_testing_data['transformation_train'] is not None:
        transformation_model = get_model(config['model'])(config, robustness_testing_data['transformation_train']).to(
            config['device'])
        logger.info(transformation_model)
        transformation_trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, transformation_model)
        best_valid_score_transformation, best_valid_result_transformation = transformation_trainer.fit(
            robustness_testing_data['transformation_train'], valid_data, saved=save_model,
            show_progress=config['show_progress'])
        test_result_transformation = transformation_trainer.evaluate(test_data, load_best_model=save_model,
                                                                     show_progress=config['show_progress'])
        logger.info(
            set_color('best valid for transformed training set', 'yellow') + f': {best_valid_result_transformation}')
        logger.info(set_color('test result for transformed training set', 'yellow') + f': {test_result_transformation}')

    if robustness_testing_data['sparsity'] is not None:
        sparsity_model = get_model(config['model'])(config, robustness_testing_data['sparsity']).to(config['device'])
        logger.info(sparsity_model)
        sparsity_trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, sparsity_model)
        best_valid_score_sparsity, best_valid_result_sparsity = sparsity_trainer.fit(
            robustness_testing_data['sparsity'], valid_data, saved=save_model,
            show_progress=config['show_progress'])
        test_result_sparsity = sparsity_trainer.evaluate(test_data, load_best_model=save_model,
                                                         show_progress=config['show_progress'])
        logger.info(set_color('best valid for sparsified training set', 'yellow') + f': {best_valid_result_sparsity}')
        logger.info(set_color('test result for sparsified training set', 'yellow') + f': {test_result_sparsity}')

    logger.handlers.clear()
    shutdown()
    del logger

    return {
        'test_result': test_result,
        'distributional_test_result': test_result_distributional_slice,
        'transformation_test_result': test_result_transformation,
        'sparsity_test_result': test_result_sparsity,
        'slice_test_result': test_result_slice
    }


def test(model, dataset, model_path, dataloader_path=None, robustness_tests=None, base_config_dict=None):
    """
    Test a pre-trained model from file path. Note that the only robustness test applicable here
    is slicing.
    Args:
        model (str): Name of model.
        dataset (str): Name of dataset.
        model_path (str): Path to saved model.
        robustness_tests (dict): Configuration dictionary for robustness tests.
        base_config_dict (dict): Configuration dictionary for data/model/training/evaluation.

    Returns:

    """
    config_dict = get_config_dict(robustness_tests, base_config_dict)
    config = Config(model=model, dataset=dataset, config_dict=config_dict)
    init_seed(config['seed'], config['reproducibility'])

    # logger initialization
    logger = getLogger()
    if len(logger.handlers) != 0:
        logger.removeHandler(logger.handlers[1])
    init_logger(config)

    # dataset filtering
    dataset = create_dataset(config)
    logger.info(dataset)

    # dataset splitting
    if dataloader_path is None:
        train_data, _, test_data, robustness_testing_data = data_preparation(config, dataset, save=False)
    else:
        train_data, valid_data, test_data = pickle.load(open(SAVED_DIR + dataloader_path, "rb"))
        robustness_testing_data = {"slice": None, "transformation": None, "sparsity": None}

    # model loading and initialization
    model = get_model(config['model'])(config, train_data).to(config['device'])
    logger.info(model)

    # trainer loading and initialization
    trainer = get_trainer(config['MODEL_TYPE'], config['model'])(config, model)

    # model evaluation
    test_result = trainer.evaluate(test_data, load_best_model=True, model_file=model_path,
                                   show_progress=config['show_progress'])
    logger.info(set_color('test result', 'yellow') + f': {test_result}')

    test_result_slice = None
    if robustness_testing_data['slice'] is not None:
        test_result_slice = trainer.evaluate(robustness_testing_data['slice'], load_best_model=True,
                                             model_file=model_path,
                                             show_progress=config['show_progress'])
        logger.info(set_color('test result for slice', 'yellow') + f': {test_result_slice}')

    return {
        'test_result': test_result,
        'slice_test_result': test_result_slice
    }

In [None]:
all_results = {}
for model in ["BPR"]:
    dataset = "ml-100k"
    base_config_dict = {
        'data_path': DATASETS_DIR,
        'show_progress': False,
        'save_dataset': True,
        'load_col': {'inter': ['user_id', 'item_id', 'rating', 'timestamp'],
                        'user': ['user_id', 'age', 'gender', 'occupation'],
                        'item': ['item_id', 'release_year', 'class']}
    }
    robustness_dict = {
                    'distribution_shift': {'gender': {"M": .9, "F": .1}},
                    'slice': {'by_feature': {'age': {'min': 40}}},
                    'sparsify': {'fraction_removed': 0.05},
                    # 'transform_features': {'fraction_removed': {'occupation': 0.2}},
                    'transform_interactions': {'fraction_transformed': 0.2},
                    }
    results = train_and_test(model=model, dataset=dataset, robustness_tests=robustness_dict,
                                base_config_dict=base_config_dict)

14 Jan 06:48    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 2020
state = INFO
reproducibility = True
data_path = /usr/local/lib/python3.7/dist-packages/recbole/config/../dataset_example/ml-100k
show_progress = False

Training Hyper Parameters:
checkpoint_dir = saved
epochs = 300
train_batch_size = 2048
learner = adam
learning_rate = 0.001
training_neg_sample_num = 1
training_neg_sample_distribution = uniform
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
draw_loss_pic = False
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_setting = RO_RS,full
group_by_user = True
split_ratio = [0.8, 0.1, 0.1]
leave_one_num = 2
real_time_process = False
metrics = ['Recall', 'MRR', 'NDCG', 'Hit', 'Precision']
topk = [10]
valid_metric = MRR@10
eval_batch_size = 4096
metric_decimal_place = 4

Dataset Hyper Parameters:
field_separator = 	
seq_separator =  
USER_ID_FIELD = user_id
ITEM_ID_FIELD = item_id
RATING_FIELD = rating
TIME_FIELD = timestamp


Preparing subpopulation of Test set.
Preparing test set slice based on feature values.
Slice Robustness Test Summary
Original Test Size:  9596
Original Test Users:  943
Subpopulation Size:  2535
Subpopulation Users:  293
Preparing sparsified training data set.
Sparsity Robustness Test Summary
Original Train Size:  80808
Original Train Users:  943
Sparsified Train Size:  76783
Sparsified Train Users:  943
Preparing training set transformation.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


Interaction Transformation Robustness Test Summary
Preparing distributional test slice.
Training Distribution:
Val:  M Percent:  0.7423274923274923
Val:  F Percent:  0.25767250767250766


14 Jan 06:49    INFO  Build [GeneralNegSampleDataLoader] for [train] with format [InputType.PAIRWISE]
14 Jan 06:49    INFO  [train] Negative Sampling: {'strategy': 'by', 'by': 1, 'distribution': 'uniform'}
14 Jan 06:49    INFO  [train] batch_size = [2048], shuffle = [True]

14 Jan 06:49    INFO  Build [GeneralFullDataLoader] for [evaluation] with format [InputType.POINTWISE]
14 Jan 06:49    INFO  Evaluation Setting:
	Group by user_id
	Ordering: {'strategy': 'shuffle'}
	Splitting: {'strategy': 'by_ratio', 'ratios': [0.8, 0.1, 0.1]}
	Negative Sampling: {'strategy': 'full', 'distribution': 'uniform'}
14 Jan 06:49    INFO  [evaluation] batch_size = [4096], shuffle = [False]

14 Jan 06:49    INFO  Saved split dataloaders: saved/ml-100k-for-BPR-dataloader.pth
14 Jan 06:49    INFO  Robustness Test: slice
14 Jan 06:49    INFO  Robustness Test: distributional_slice
14 Jan 06:49    INFO  Robustness Test: transformation_train
14 Jan 06:49    INFO  Robustness Test: sparsity
14 Jan 06:49    INFO  B

In [None]:
results

{'distributional_test_result': {'hit@10': 0.6877,
  'mrr@10': 0.4116,
  'ndcg@10': 0.2593,
  'precision@10': 0.174,
  'recall@10': 0.2333},
 'slice_test_result': {'hit@10': 0.727,
  'mrr@10': 0.4078,
  'ndcg@10': 0.2644,
  'precision@10': 0.1737,
  'recall@10': 0.2497},
 'sparsity_test_result': {'hit@10': 0.7529,
  'mrr@10': 0.4637,
  'ndcg@10': 0.282,
  'precision@10': 0.1931,
  'recall@10': 0.2362},
 'test_result': {'hit@10': 0.755,
  'mrr@10': 0.46,
  'ndcg@10': 0.2811,
  'precision@10': 0.1933,
  'recall@10': 0.2357},
 'transformation_test_result': {'hit@10': 0.7646,
  'mrr@10': 0.4621,
  'ndcg@10': 0.2825,
  'precision@10': 0.194,
  'recall@10': 0.2404}}

> **References**
1. Zohreh Ovaisi, Shelby Heinecke, Jia Li, Yongfeng Zhang, Elena Zheleva, Caiming Xiong, RGRecSys: A Toolkit for Robustness Evaluation of Recommender Systems. WSDM, 2022 - https://arxiv.org/abs/2201.04399.
2. Source code - https://github.com/salesforce/RGRecSys.