# Gini-w, MME, IBO, IWO, OD

In [None]:
import warnings
warnings.simplefilter("ignore")

import pickle
import pandas as pd
import numpy as np

import os

from recbole.config import Config
from recbole.evaluator.evaluator import Evaluator
    
import torch

In [None]:
#take struct from a dataset
dataset = "Amazon_Luxury_Beauty" 
model_name = "Pop"
list_file = os.listdir("../struct/")
file_for_dataset = [x for x in list_file if dataset in x]
assert len(file_for_dataset) == 1


def get_measure(num_item:int, curr_rec, curr_rel, k, metrics=["IBOIWO","MME_IIF_AIF"]):
    with open("../struct/"+file_for_dataset[0],"rb") as f:
        struct = pickle.load(f)
    config = Config(
                    model=model_name, 
                    dataset=dataset, 
                    config_file_list=["../RecBole/recbole/properties/overall.yaml"],
                    config_dict={
                        "topk":k,
                        "metrics":metrics}
                                )
    evaluator = Evaluator(config)
    struct.set("data.num_items", num_item+1) #because -1 in metrics.py
    struct.set("rec.items", curr_rec)
    struct.set("rec.topk", curr_rel)
    struct.set("rec.score",None) #we don't need this

    pos_items = torch.where(curr_rel[:,:num_item]==1, curr_rec, np.nan)
    pos_items = np.asarray([x[~np.isnan(x)].astype(int) for x in pos_items.numpy()])
    struct.set("data.pos_items",pos_items.copy())

    return evaluator.evaluate(struct)

# Check Gini-w & OD

In [None]:
import itertools
permute_item = list(itertools.permutations([1, 2,3]))
rel = [1,1,1,1,1,1]
k = 3
num_item = 3


for Ru1 in permute_item:
    for Ru2 in permute_item:
        curr_rec = np.array([Ru1,Ru2])
        curr_rec = torch.Tensor(curr_rec).int()
        curr_rel = np.array(rel).reshape(curr_rec.shape[0],num_item) #relevance of item at the recommendation list to user
        curr_rel = torch.Tensor(curr_rel)

        num_pos_item = torch.Tensor(curr_rel).sum(axis=1)
        num_pos_item = num_pos_item[:,np.newaxis]

        curr_rel = torch.cat([curr_rel, num_pos_item], dim=1).int()
        result = get_measure(num_item, curr_rec, curr_rel, k, metrics=["FairWORel"])

        print(curr_rec, result[f"Gini-w_ori@{k}"])


nan
tensor([[1, 2, 3],
        [1, 2, 3]], dtype=torch.int32) 0.156426242
nan
tensor([[1, 2, 3],
        [1, 3, 2]], dtype=torch.int32) 0.1359453927
nan
tensor([[1, 2, 3],
        [2, 1, 3]], dtype=torch.int32) 0.0986939703
nan
tensor([[1, 2, 3],
        [2, 3, 1]], dtype=torch.int32) 0.078213121
nan
tensor([[1, 2, 3],
        [3, 1, 2]], dtype=torch.int32) 0.078213121
nan
tensor([[1, 2, 3],
        [3, 2, 1]], dtype=torch.int32) 0.0372514224
nan
tensor([[1, 3, 2],
        [1, 2, 3]], dtype=torch.int32) 0.1359453927
nan
tensor([[1, 3, 2],
        [1, 3, 2]], dtype=torch.int32) 0.156426242
nan
tensor([[1, 3, 2],
        [2, 1, 3]], dtype=torch.int32) 0.078213121
nan
tensor([[1, 3, 2],
        [2, 3, 1]], dtype=torch.int32) 0.0372514224
nan
tensor([[1, 3, 2],
        [3, 1, 2]], dtype=torch.int32) 0.0986939703
nan
tensor([[1, 3, 2],
        [3, 2, 1]], dtype=torch.int32) 0.078213121
nan
tensor([[2, 1, 3],
        [1, 2, 3]], dtype=torch.int32) 0.0986939703
nan
tensor([[2, 1, 3],
        

In [None]:
import itertools
k = 3
num_item = 3

permute_item = list(itertools.permutations([1,2,3]))
permute_rel = list(itertools.product([0,1], repeat=2*num_item))

result_list = []

for rel in permute_rel:
    if not any(rel):
        continue

    for Ru1 in permute_item:
        for Ru2 in permute_item:
            curr_rec = np.array([Ru1,Ru2])
            curr_rec = torch.Tensor(curr_rec).int()
            curr_rel = np.array(rel).reshape(curr_rec.shape[0],num_item) #relevance of item at the recommendation list to user
            curr_rel = torch.Tensor(curr_rel)

            num_pos_item = torch.Tensor(curr_rel).sum(axis=1)
            num_pos_item = num_pos_item[:,np.newaxis]

            curr_rel = torch.cat([curr_rel, num_pos_item], dim=1).int()
            result = get_measure(num_item, curr_rec, curr_rel, k, metrics=["MDG_OD"])
            
            result_list.append([rel, curr_rec, result[f"OD-E_ori@{k}"], result[f"OD-F_ori@{k}"]])

In [None]:
df_result_OD = pd.DataFrame(result_list).dropna()

Unnamed: 0,0,1,2,3
216,"(0, 0, 0, 1, 1, 1)","[[tensor(1, dtype=torch.int32), tensor(2, dtyp...",0.666667,0.333333
217,"(0, 0, 0, 1, 1, 1)","[[tensor(1, dtype=torch.int32), tensor(2, dtyp...",0.579380,0.333333
218,"(0, 0, 0, 1, 1, 1)","[[tensor(1, dtype=torch.int32), tensor(2, dtyp...",0.420620,0.333333
219,"(0, 0, 0, 1, 1, 1)","[[tensor(1, dtype=torch.int32), tensor(2, dtyp...",0.333333,0.333333
220,"(0, 0, 0, 1, 1, 1)","[[tensor(1, dtype=torch.int32), tensor(2, dtyp...",0.333333,0.333333
...,...,...,...,...
2263,"(1, 1, 1, 1, 1, 1)","[[tensor(3, dtype=torch.int32), tensor(2, dtyp...",0.166667,0.166667
2264,"(1, 1, 1, 1, 1, 1)","[[tensor(3, dtype=torch.int32), tensor(2, dtyp...",0.166667,0.166667
2265,"(1, 1, 1, 1, 1, 1)","[[tensor(3, dtype=torch.int32), tensor(2, dtyp...",0.210310,0.210310
2266,"(1, 1, 1, 1, 1, 1)","[[tensor(3, dtype=torch.int32), tensor(2, dtyp...",0.289690,0.289690


In [None]:
df_result_OD.iloc[df_result_OD[2].argmin()][2]

0.0793801643

In [None]:
df_result_OD.iloc[df_result_OD[3].argmin()][3]

0.0793801643

# Check if MME, IBO, IWO = 0 or 1 is possible for k=n=3, m=2

In [None]:
import itertools
permute_item = list(itertools.permutations([1, 2, 3]))
permute_rel = list(itertools.product([0,1], repeat=6))

In [None]:
max_mme = dict()
min_mme = dict()
max_IBO = dict()
min_IBO = dict()
max_IWO = dict()
min_IWO = dict()

k = 3
num_item = 3


In [None]:
for rel in permute_rel:
#for given relevance value, find permutation that produces minimum MME:
    print(f"Finding best and worst permutation for {rel}")
    max_mme_this_rel = -np.inf
    max_IBO_this_rel = -np.inf
    max_IWO_this_rel = -np.inf
    min_IBO_this_rel = np.inf
    min_mme_this_rel = np.inf
    min_IWO_this_rel = np.inf
    for Ru1 in permute_item:
        for Ru2 in permute_item:
            curr_rec = np.array([Ru1,Ru2])
            curr_rec = torch.Tensor(curr_rec).int()
            curr_rel = np.array(rel).reshape(curr_rec.shape[0],num_item) #relevance of item at the full recommendation list to user
            curr_rel = torch.Tensor(curr_rel)

            num_pos_item = torch.Tensor(curr_rel).sum(axis=1)
            num_pos_item = num_pos_item[:,np.newaxis]

            curr_rel = torch.cat([curr_rel, num_pos_item], dim=1).int()
            result = get_measure(num_item, curr_rec, curr_rel, k)
            
            if result[f"MME_ori@{k}"] > max_mme_this_rel:
                max_mme_this_rel = result[f"MME_ori@{k}"]
            if result[f"IBO_ori@{k}"] > max_IBO_this_rel:
                max_IBO_this_rel = result[f"IBO_ori@{k}"]
            if result[f"IWO_ori@{k}"] > max_IWO_this_rel:
                max_IWO_this_rel = result[f"IWO_ori@{k}"]

            if result[f"MME_ori@{k}"] < min_mme_this_rel:
                min_mme_this_rel = result[f"MME_ori@{k}"]
            if result[f"IBO_ori@{k}"] < min_IBO_this_rel:
                min_IBO_this_rel = result[f"IBO_ori@{k}"]
            if result[f"IWO_ori@{k}"] < min_IWO_this_rel:
                min_IWO_this_rel = result[f"IWO_ori@{k}"]

    max_mme[rel] = max_mme_this_rel
    max_IBO[rel] = max_IBO_this_rel
    max_IWO[rel] = max_IWO_this_rel
    min_mme[rel] = min_mme_this_rel
    min_IBO[rel] = min_IBO_this_rel
    min_IWO[rel] = min_IWO_this_rel

Finding best and worst permutation for (0, 0, 0, 0, 0, 0)
Finding best and worst permutation for (0, 0, 0, 0, 0, 1)
Finding best and worst permutation for (0, 0, 0, 0, 1, 0)
Finding best and worst permutation for (0, 0, 0, 0, 1, 1)
Finding best and worst permutation for (0, 0, 0, 1, 0, 0)
Finding best and worst permutation for (0, 0, 0, 1, 0, 1)
Finding best and worst permutation for (0, 0, 0, 1, 1, 0)
Finding best and worst permutation for (0, 0, 0, 1, 1, 1)
Finding best and worst permutation for (0, 0, 1, 0, 0, 0)
Finding best and worst permutation for (0, 0, 1, 0, 0, 1)
Finding best and worst permutation for (0, 0, 1, 0, 1, 0)
Finding best and worst permutation for (0, 0, 1, 0, 1, 1)
Finding best and worst permutation for (0, 0, 1, 1, 0, 0)
Finding best and worst permutation for (0, 0, 1, 1, 0, 1)
Finding best and worst permutation for (0, 0, 1, 1, 1, 0)
Finding best and worst permutation for (0, 0, 1, 1, 1, 1)
Finding best and worst permutation for (0, 1, 0, 0, 0, 0)
Finding best a

In [None]:
df_max_mme = pd.Series(max_mme)
df_max_IBO = pd.Series(max_IBO)
df_max_IWO = pd.Series(max_IWO)
df_min_mme = pd.Series(min_mme)
df_min_IWO = pd.Series(min_IWO)
df_min_IBO = pd.Series(min_IBO)

Most unfair (Max MME) for this case, across all permutations of relevance and recommendation list

In [None]:
df_max_mme.max()

0.3888888889

In [None]:
7/18

0.3888888888888889

Most unfair IBO (min) and IWO (max) for this case, across all permutations of relevance and recommendation list

In [None]:
df_min_IBO.min() #OK

0.0

In [None]:
df_max_IWO.max() #OK

1.0

Most fair IBO (max) and IWO (min) for this case, across all permutations of relevance and recommendation list

In [None]:
df_max_IBO.max() #not ok

0.6666666667

In [None]:
df_min_IWO.min() #not ok

0.3333333333

Most fair MME (min), and most unfair IBO (min), IWO (max) for the case where all items are relevant to all users

In [None]:
df_min_mme[(1,1,1,1,1,1)]

0.0555555556

In [None]:
df_min_mme[(0,0,1,0,0,1)]

0.1388888889

In [None]:
5/36

0.1388888888888889

In [None]:
1/18

0.05555555555555555

In [None]:
df_max_IWO[(1,1,1,1,1,1)]

0.6666666667

# Is IBO/IWO always fair?

In [None]:
import itertools

IWO_result = dict()
k = 2
num_item = 3

permute_item = list(itertools.permutations([i+1 for i in range(num_item)]))

for rel in [(1,1,0,1,1,0)]:

    for Ru1 in permute_item:
        for Ru2 in permute_item:
            
            curr_rec = np.array([Ru1,Ru2])
            curr_rec = torch.Tensor(curr_rec).int()
            curr_rel = np.array(rel).reshape(curr_rec.shape[0],num_item)
            curr_rel = torch.Tensor(curr_rel)

            num_pos_item = torch.Tensor(curr_rel).sum(axis=1)
            num_pos_item = num_pos_item[:,np.newaxis]

            curr_rel = torch.cat([curr_rel, num_pos_item], dim=1).int()
            result = get_measure(num_item, curr_rec, curr_rel, k,metrics=["IBOIWO"])

            IWO_result[str([Ru1,Ru2])] = result[f"IWO_ori@{k}"]

In [None]:
pd.Series(IWO_result).sort_values().unique()

array([ 0., nan])

In [None]:
import itertools

IBO_result = dict()

k = 1
num_item = 3

permute_item = list(itertools.permutations([i+1 for i in range(num_item)]))

for rel in [(1,0,0,1,0,0,1,0,0)]:
    for Ru1 in permute_item:
        for Ru2 in permute_item:
            for Ru3 in permute_item:
            
                curr_rec = np.array([Ru1,Ru2,Ru3])
                curr_rec = torch.Tensor(curr_rec).int()
                curr_rel = np.array(rel).reshape(curr_rec.shape[0],num_item)
                curr_rel = torch.Tensor(curr_rel)

                num_pos_item = torch.Tensor(curr_rel).sum(axis=1)
                num_pos_item = num_pos_item[:,np.newaxis]

                curr_rel = torch.cat([curr_rel, num_pos_item], dim=1).int()
                result = get_measure(num_item, curr_rec, curr_rel, k,metrics=["IBOIWO"])
                IBO_result[str([Ru1,Ru2,Ru3])] = result[f"IBO_ori@{k}"]

pd.Series(IBO_result).sort_values().unique()

array([ 1., nan])

# Why does IBO increase for ml-1m?

In [6]:
import pickle
from recbole.config import Config
import pandas as pd
import numpy as np
from collections import Counter

list_dataset = [
    "lastfm",
    "ml-1m",
]

item_count = {
    "lastfm":2823,
    "ml-1m":3307,
}


item_with_no_rel_user = dict()

for dataset in list_dataset:
    item_with_no_rel_user[dataset] = []  
    config = Config(
                model="Pop", 
                dataset=dataset, 
                config_file_list=["../RecBole/recbole/properties/overall.yaml"],
                )


    with open(f"../train_val_test/{dataset}_test.pickle","rb") as f:
        data = pickle.load(f)
    test = pd.DataFrame(data)

    item_id = config.final_config_dict["ITEM_ID_FIELD"]

    test = test.groupby("user_id")\
        .agg(lambda x: [x for x in x])\
        [item_id]
    

    count_rel_user_per_item = Counter(test.sum())

    for item_id in range(1, item_count[dataset]+1):
        if item_id not in count_rel_user_per_item:
            item_with_no_rel_user[dataset].append(item_id)

In [33]:
import torch
list_dataset = [
    "lastfm",
    "ml-1m",
]

model_list = [
    "BPR",
    ]

window = 5
total_k = 10

filenames_of_best_model = [
 'filename_best_model_for_lastfm.pickle',
 'filename_best_model_for_ml-1m.pickle',
 ]

for dataset in list_dataset:

    filename = [f for f in filenames_of_best_model if dataset in f][0]

    with open("../results/"+filename,"rb") as f:
        models = pickle.load(f)

    #non-pop model
    for model in models:

        model_name = model.split("-")[0]
        if model_name not in model_list:
            continue

        with open(f"../hendrix/struct/struct_{dataset}_"+model,"rb") as f:
            struct = pickle.load(f)
        item_matrix = struct.get('rec.items')
        rec_mat = struct.get('rec.topk')

        for k in range(0,5,1):
            updated_rec_mat_wo_last = rec_mat[:,k:k+window]
          
            updated_item_matrix = item_matrix[:,k:k+window]

            # num_rel_item = updated_rec_mat_wo_last.sum().items()
            num_unique_rel_item = torch.where(updated_rec_mat_wo_last.bool(),updated_item_matrix,0).unique().shape[0] - 1
            #get unique items
            unique_items = updated_item_matrix.unique()
            num_item_with_no_rel_user = np.isin(unique_items, item_with_no_rel_user[dataset]).sum()

            print(f"{dataset}, {model_name}, {k}:{k+window}; num_unique_rel_item: {num_unique_rel_item}; num_item_with_no_rel_user: {num_item_with_no_rel_user}")

lastfm, BPR, 0:5; num_unique_rel_item: 324; num_item_with_no_rel_user: 143
lastfm, BPR, 1:6; num_unique_rel_item: 308; num_item_with_no_rel_user: 157
lastfm, BPR, 2:7; num_unique_rel_item: 303; num_item_with_no_rel_user: 168
lastfm, BPR, 3:8; num_unique_rel_item: 291; num_item_with_no_rel_user: 165
lastfm, BPR, 4:9; num_unique_rel_item: 276; num_item_with_no_rel_user: 186
ml-1m, BPR, 0:5; num_unique_rel_item: 464; num_item_with_no_rel_user: 0
ml-1m, BPR, 1:6; num_unique_rel_item: 484; num_item_with_no_rel_user: 0
ml-1m, BPR, 2:7; num_unique_rel_item: 506; num_item_with_no_rel_user: 1
ml-1m, BPR, 3:8; num_unique_rel_item: 505; num_item_with_no_rel_user: 1
ml-1m, BPR, 4:9; num_unique_rel_item: 519; num_item_with_no_rel_user: 1


# Is IBO+IWO = 1 ?

In [None]:
from recbole.evaluator.evaluator import Evaluator
from recbole.config import Config
import torch
import pickle
import pandas as pd
import numpy as np

list_dataset = [
    "ml-1m",
    "lastfm"
]

model_list = [
    "BPR",
    ]

window = 5
total_k = 10

filenames_of_best_model = [
 'filename_best_model_for_ml-1m.pickle',
 'filename_best_model_for_lastfm.pickle',
 ]

for dataset in list_dataset:
    config = Config(
                model="Pop", 
                dataset=dataset, 
                config_file_list=["../RecBole/recbole/properties/overall.yaml"],
                config_dict={"topk":5, "metrics":["IBOIWO"] 
                }
                )

    with open(f"../train_val_test/{dataset}_test.pickle","rb") as f:
        data = pickle.load(f)
    test = pd.DataFrame(data)

    item_id = config.final_config_dict["ITEM_ID_FIELD"]

    test = test.groupby("user_id")\
        .agg(lambda x: [x for x in x])\
        [item_id]
    evaluator = Evaluator(config)

    filename = [f for f in filenames_of_best_model if dataset in f][0]

    with open("../results/"+filename,"rb") as f:
        models = pickle.load(f)

    #non-pop model
    for model in models:

        model_name = model.split("-")[0]
        if model_name not in model_list:
            continue

        with open(f"../hendrix/struct/struct_{dataset}_"+model,"rb") as f:
            struct = pickle.load(f)

        item_matrix = struct.get('rec.items')
        rec_mat = struct.get('rec.topk')
        struct.set("data.pos_items",test.apply(np.array).values)

        for k in range(0,5,1):
            print(f"Doing {dataset}, {model_name}, {k}:{k+window}")

            #slice item_matrix and relevance_matrix
            updated_item_matrix = item_matrix[:,k:k+window]
            updated_rec_mat = torch.cat((rec_mat[:,k:k+window], rec_mat[:,-1:]), axis=1)

            struct.set("rec.items",updated_item_matrix)
            struct.set("rec.topk",updated_rec_mat)

            evaluator = Evaluator(config)
            result = evaluator.evaluate(struct)
            print(result)