# Validation

    a. Accuracy: NDCG@10 for all "faithful" users, and for "core" users
    b. Coverage NDCG@10 for all "faithful" users
    c. Novelty, with Popularity of items defined on the full training set
    d. Item fairness: Interaction Segmentation as defined in 3.c (percentiles) on the full training set, according to a [60, 30, 10] percentile split. Keyword: [Short Head, Mid Tail, Long Tail].
    e. User fairness: Activity Segmentation as defined in 3.b (percentiles), according to a [60, 30, 10] percentage of users split. Keyword: [active,  semi-active, inactive]

In [1]:
from recbole.quick_start import load_data_and_model
import pandas as pd
from tqdm import tqdm
from recbole.utils.case_study import full_sort_topk
import os
from recsyslearn.accuracy.metrics import NDCG
from recsyslearn.dataset.utils import find_relevant_items
from recsyslearn.beyond_accuracy.metrics import Coverage
from recsyslearn.beyond_accuracy.metrics import Novelty
from recsyslearn.fairness.metrics import KullbackLeibler
from recsyslearn.dataset.segmentations import PopularityPercentage
from recsyslearn.dataset.segmentations import InteractionSegmentation
# from recsyslearn.dataset.segmentations import ActivitySegmentation
import numpy as np
import json

In [2]:
from recsyslearn.fairness.metrics import FairnessMetric

In [3]:
from recsyslearn.fairness.utils import eff_matrix
from recsyslearn.utils import check_columns_exist

In [None]:
class ActivitySegmentation(Segmentation):

    """
    Segmentation of users based on their number of interaction.
    """

    @classmethod
    def segment(cls, dataset: pd.DataFrame, proportions=None, min_interaction: int = 0) -> pd.DataFrame:
        """
        Segmentation of users based on their interactions with different items.
        Parameters
        ----------
        dataset : pd.DataFrame
            The complete dataset.
        proportions : list, default [0.8, 0.2]
            The proportion of interactions wanted for every group.
        min_interaction : int, default 0
            The minimum number of interaction allowed per user. Users below this threshold will be removed.
        Raises
        ------
        SegmentationNotSupportedException
            If len(proportion) not in (1, 2, 3).
        WrongProportionsException
            If sum(proportion) is not 1, which means it doesn't cover all the items/users.
        Return
        ------
        DataFrame with users and belonging group.
        """

        if proportions is None:
            proportions = [0.1, 0.9]

        if len(proportions) == 1:
            return dataset

        if len(proportions) not in (2, 3):
            raise SegmentationNotSupportedException(
                "Number of supported group is between 1 and 3.")

        if np.sum(proportions * 10) / 10 != 1:
            raise WrongProportionsException()

        user_groups = dataset.groupby('user').size().reset_index(name='count')
        user_groups = user_groups.loc[user_groups['count']
                                      >= min_interaction, :]

        user_groups.loc[:, 'count'] = user_groups.loc[:, 'count'].apply(
            lambda x: x + np.random.choice(list(range(10))))
        user_groups = user_groups.sort_values('count', ascending=False)
        user_groups.loc[:, 'count'] = np.arange(user_groups.shape[0]) + 1
        first_thr = np.rint(proportions[0] * user_groups.shape[0])
        second_thr = np.rint(proportions[1] * user_groups.shape[0]) + first_thr
        first_thr = first_thr if first_thr > 0 else 1
        first_group = user_groups.loc[user_groups['count'] <=
            first_thr, 'user']
        second_group = user_groups.loc[user_groups['count'].lt(
            second_thr), 'user']

        conditions = [user_groups['user'].isin(
            first_group), user_groups['user'].isin(second_group)]
        choices = (1, 2)
        default = len(proportions)
        user_groups.loc[:, 'group'] = np.select(
            conditions, choices, default=default)

        return user_groups[['user', 'group']].astype({'user': str, 'group': str})

In [4]:
class UserLevelEntropy(FairnessMetric):

    """
    Entropy evaluator for recommender systems, returning an entropy value for each user.
    """

    def evaluate(self, top_n: pd.DataFrame, rel_matrix: pd.DataFrame = None) -> float:
        """
        Compute the entropy of a model by using its recommendation list.


        Parameters
        ----------
        top_n : pd.DataFrame
            Top N recommendations' lists for every user with items or users already segmented.

        rel_matrix : pd.DataFrame, default None
            Relevant items for users. It could be, for example, the items with a rating >= threshold.


        Raises
        ------
        ColumnsNotExistException
            If top_n not in the form ('user', 'item', 'rank', 'group').


        Return
        ------
        The computed entropy.
        """

        check_columns_exist(top_n, ['user', 'item', 'rank', 'group'])

        top_n = eff_matrix(
            top_n, rel_matrix) if rel_matrix is not None else top_n

        top_n = top_n.groupby(['user', 'group'], as_index=False).sum()
        top_n['sum'] = [top_n[top_n.user == user]['rank'].sum() for user in top_n.user.values]

        top_n['rank'] = top_n['rank'] / top_n['sum']
        top_n['rank'] = - top_n['rank'] * np.log2(top_n['rank'])

        top_n = top_n[['user', 'rank']].groupby(['user'], as_index=False).sum()

        return top_n

In [5]:
def chunker(seq, size):
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))

In [6]:
harmful_list = [0.00, 0.01, 0.05, 0.10]
ITEM_TARGET = [0.6, 0.3, 0.1]
ITEM_TARGET_DF = pd.DataFrame()
ITEM_TARGET_DF['group'] = list(range(1, len(ITEM_TARGET) + 1))
ITEM_TARGET_DF['target_representation'] = ITEM_TARGET


USER_TARGET = [0.6, 0.3, 0.1]
USER_TARGET_DF = pd.DataFrame()
USER_TARGET_DF['group'] = list(range(1, len(USER_TARGET) + 1))
USER_TARGET_DF['target_representation'] = USER_TARGET


# DATASET_LIST = ['ml-100k']
DATASET_LIST = ['ml-100k', 'lastfm']
BASE_FOLDER = '/home/marta/jku/activity_fair/'

metrics_df = pd.DataFrame(columns=[
    'model',
    'harm',
    'dataset',
    'NDCG@10_full',
    'NDCG@10_core',
    'coverage',
    'novelty',
    'kl_item',
    'kl_user',
])
for DATASET in DATASET_LIST:
    FULL_DATASETS_FOLDER = BASE_FOLDER + f'datasets/full_datasets/{DATASET}/'
    BINARIZED_CORE_INTERACTION_FILE = FULL_DATASETS_FOLDER + f'{DATASET}_bin_core.inter'
    binarized = pd.read_csv(BINARIZED_CORE_INTERACTION_FILE, sep='\t')
    # print(binarized.info())
    binarized.columns = ['user', 'item']
    

    ITEM_GROUPS = InteractionSegmentation().segment(
        binarized, 
        proportions=ITEM_TARGET,
    )
    ITEM_GROUPS = ITEM_GROUPS.astype({
        'item': int,
    })
    USER_GROUPS = ActivitySegmentation().segment(
        binarized, 
        proportions=USER_TARGET,
    )
    USER_GROUPS = USER_GROUPS.astype({
        'user': str,
    })
    
    ### 
    harmful = 0.10
    DS_STRING = f'{DATASET}_harm{str(int(100*harmful)).zfill(2)}'
    TEST_DATA_FILE = f'{BASE_FOLDER}/test/{DS_STRING}.tsv'
    TEST_DATASET_CORE = pd.read_csv(TEST_DATA_FILE, sep='\t')
    POS_ITEMS_TEST_CORE = find_relevant_items(TEST_DATASET_CORE).astype({
        'user': str,
    })
    test_users_core = POS_ITEMS_TEST_CORE.user.values

    for harmful in harmful_list:
    
        DS_STRING = f'{DATASET}_harm{str(int(100*harmful)).zfill(2)}'
        
        row_dict = {}
        row_dict['model'] = 'bpr'
        row_dict['dataset'] = DATASET
        row_dict['harm'] = harmful
        
        
        MODEL_FOLDER = f'{BASE_FOLDER}saved/{DS_STRING}/'
        top_k_df = pd.read_csv(f'{MODEL_FOLDER}/best_model.tsv', sep='\t')
        #print(top_k_df.info())
        top_k_df = top_k_df.astype({
            'user': str,
            'item': int,
        })
        
        TEST_DATA_FILE = f'{BASE_FOLDER}/test/{DS_STRING}.tsv'
        TEST_DATASET_RANK = pd.read_csv(TEST_DATA_FILE, sep='\t')
        POS_ITEMS_TEST = find_relevant_items(TEST_DATASET_RANK).astype({
            'user': str,
        })
        test_users = top_k_df.user.values

        POS_ITEMS_TEST = POS_ITEMS_TEST.astype({
            'user': str,
        })
        POS_ITEMS_TEST = POS_ITEMS_TEST[POS_ITEMS_TEST.user.isin(test_users)]
        
        # NDCG Full
        ndcg = NDCG().evaluate(
            top_n=top_k_df,
            pos_items=POS_ITEMS_TEST,
            ats=(10,),
        ).mean()
        # print(top_k_df.info())
        # print(POS_ITEMS_TEST.info())
        row_dict['NDCG@10_full'] = ndcg['NDCG@10']
        
        
        # NDCG Core
        POS_ITEMS_TEST_CORE = POS_ITEMS_TEST[POS_ITEMS_TEST.user.isin(test_users_core)]
        top_k_core_df = top_k_df[top_k_df.user.isin(test_users_core)]
        
        ndcg = NDCG().evaluate(
            top_n=top_k_core_df,
            pos_items=POS_ITEMS_TEST_CORE,
            ats=(10,),
        ).mean()
        # print(top_k_df.info())
        # print(POS_ITEMS_TEST.info())
        row_dict['NDCG@10_core'] = ndcg['NDCG@10']
        
        
        # COVERAGE
        # print(binarized.info())
        items = list(binarized['item'].unique())
        coverage = Coverage.evaluate(
            top_n=top_k_df,
            items=items,
        )
        row_dict['coverage'] = coverage
        
        # NOVELTY

        ITEM_POPULARITY = PopularityPercentage().segment(binarized, group='item')
        popularity_top_k = top_k_df.merge(ITEM_POPULARITY, how='inner', left_on='item', right_on='item')
        novelty = Novelty.evaluate(
            top_n=popularity_top_k,
            popularity_definition='percentage',
        )
        row_dict['novelty'] = novelty

        # Item KL
        item_segmented_top_k = top_k_df.merge(ITEM_GROUPS, how='inner', left_on='item', right_on='item')
        item_segmented_top_k['rank'] = item_segmented_top_k['rank'].astype(float)
        item_segmented_top_k['group'] = item_segmented_top_k['group'].astype(int)

        item_kl = KullbackLeibler().evaluate(
            top_n=item_segmented_top_k,
            target_representation=ITEM_TARGET_DF,
        )
        row_dict['kl_item'] = item_kl
        # User KL
        user_segmented_top_k = top_k_df.merge(USER_GROUPS, how='inner', left_on='user', right_on='user')
        user_segmented_top_k['rank'] = user_segmented_top_k['rank'].astype(float)
        user_segmented_top_k['group'] = user_segmented_top_k['group'].astype(int)

        user_segmented_testset = top_k_df.merge(USER_GROUPS, how='inner', left_on='user', right_on='user')
        user_segmented_top_k['rank'] = user_segmented_top_k['rank'].astype(float)
        user_segmented_top_k['group'] = user_segmented_top_k['group'].astype(int)

        user_segmented_rel_matrix = TEST_DATASET_RANK.astype({'user': object}).merge(USER_GROUPS, how='inner', left_on='user',
                                                              right_on='user')

        user_segmented_rel_matrix['group'] = user_segmented_rel_matrix['group'].astype(int)
        
        user_kl = KullbackLeibler().evaluate(
            top_n=user_segmented_top_k,
            target_representation=USER_TARGET_DF,
            rel_matrix=user_segmented_rel_matrix,
        )
        row_dict['kl_user'] = user_kl

        metrics_df.loc[len(metrics_df)] = row_dict

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


In [7]:
pd.reset_option("display.precision")
metrics_df

Unnamed: 0,model,harm,dataset,NDCG@10_full,NDCG@10_core,coverage,novelty,kl_item,kl_user
0,bpr,0.0,ml-100k,0.308688,0.319263,0.530496,8.951056,0.196501,0.0
1,bpr,0.01,ml-100k,0.300252,0.306165,0.472681,8.891897,0.263559,0.0
2,bpr,0.05,ml-100k,0.295771,0.305701,0.479034,8.905236,0.240036,0.0
3,bpr,0.1,ml-100k,0.311504,0.311504,0.517154,8.944851,0.219525,0.0
4,bpr,0.0,lastfm,0.176455,0.177332,0.169202,9.488832,0.526923,0.0
5,bpr,0.01,lastfm,0.174362,0.171891,0.170803,9.494341,0.528596,0.0
6,bpr,0.05,lastfm,0.180102,0.179386,0.167486,9.864206,0.473751,0.0
7,bpr,0.1,lastfm,0.178875,0.178875,0.156793,9.663178,0.508998,0.0
