In [1]:
import warnings
warnings.simplefilter("ignore")

import pickle
import pandas as pd
import numpy as np

import os

from recbole.config import Config
from recbole.evaluator.evaluator import Evaluator
    
import torch
import time

# Actual ($km$ items)

## Insert most fair and relevant

In [None]:
k = 10

for num_user in [
    100, 
    500,
   1000, 

]:

    num_item = (num_user * k) 

    print(f"There are {num_user} users, {num_item} items, and k={k}")

    #initialize user-k recommendation matrix by recommending the same k items to all users
    same_k_items = np.arange(k)+1 #item index starts from 1
    curr_rec = np.tile(same_k_items, (num_user,1))
    curr_rec = torch.Tensor(curr_rec).int()

    incoming_k_items = np.arange(num_item).reshape(num_user,k) + 1
    incoming_k_items = torch.Tensor(incoming_k_items).int()

    #initialize user-item_at_k relevance matrix (all irrelevant)
    curr_rel = torch.zeros(num_user,k+1,dtype=torch.int32) #+1 because recbole saves number of rel items at the last column
    curr_rel[0, :-1] = 1 #first user already has relevant item
    curr_rel[:,-1] = k #each user has exactly k relevant items

    incoming_k_relevance = torch.ones(num_user,k)

    #take struct from a dataset
    dataset = "Amazon_Luxury_Beauty" 
    model_name = "Pop"
    list_file = os.listdir("../struct/")
    file_for_dataset = [x for x in list_file if dataset in x]
    assert len(file_for_dataset) == 1

    with open("../struct/"+file_for_dataset[0],"rb") as f:
        struct = pickle.load(f)

    config = Config(
                    model=model_name, 
                    dataset=dataset, 
                    config_file_list=["../RecBole/recbole/properties/overall.yaml"],
                    config_dict={
                        "topk":k,
                        "metrics":[
                                "RelMetrics",
                                "FairWORel",          
                                "IBOIWO",
                                "IAA_true",
                                "MME_IIF_AIF"
                                ]}
                                )

    evaluator = Evaluator(config)
    struct.set("data.num_items", num_item+1) #because -1 in metrics.py
    struct.set("rec.items", curr_rec)
    struct.set("rec.topk", curr_rel)
    struct.set("rec.score",None)
    struct.set("data.pos_items",incoming_k_items.numpy()) #incoming items are also the only relevant items, but numpy datatype

    insertion_result = dict()
    insertion_result["0"] = evaluator.evaluate(struct)

    for pos_rank in range(k-1,-1, -1):
        print(f"Inserting fair and rel items for all users at {pos_rank}")
        curr_rec[:,pos_rank] = incoming_k_items[:,pos_rank]
        curr_rel[:,pos_rank] = incoming_k_relevance[:,pos_rank]

        struct.set("rec.items", curr_rec)
        struct.set("rec.topk", curr_rel)
  
        start_time = time.time()
        result = evaluator.evaluate(struct)
        print("total time taken: ", time.time() - start_time)
        
        insertion_result[f"{k-pos_rank}"] = result

    with open(f'artificial_insert_fair_user_{str(num_user).zfill(4)}_exact_km.pickle', 'wb') as f:
        pickle.dump(insertion_result, f, pickle.HIGHEST_PROTOCOL)
    

## Insert most unfair and irrelevant

In [None]:
k = 10

for num_user in [
    100, 
    500,
   1000, 
]:

    num_item = num_user * k #+ k 

    print(f"There are {num_user} users, {num_item} items, and k={k}")

    #initialize user-k recommendation matrix by recommending different m*k items to all users
    diff_k_items = np.arange(k*num_user).reshape(num_user,k) + 1 #index starts from 1
    curr_rec = torch.Tensor(diff_k_items).int()
 
    same_k_items = np.arange(k) + 1 #item index starts from 1
    incoming_k_items = np.tile(same_k_items, (num_user,1))
    incoming_k_items = torch.Tensor(incoming_k_items).int()

    #initialize user-item_at_k relevance matrix (all relevant)
    curr_rel = torch.ones(num_user,k+1,dtype=torch.int32) #+1 because recbole saves number of rel items at the last column
    curr_rel[:,-1] = k #each user has exactly k relevant items

    incoming_k_relevance = torch.zeros(num_user,k) #all incoming items are irrelevant
    incoming_k_relevance[0] = 1 #first user will still receive relevant item


    #take struct from a dataset
    dataset = "Amazon_Luxury_Beauty" 
    model_name = "Pop"
    list_file = os.listdir("../struct/")
    file_for_dataset = [x for x in list_file if dataset in x]
    assert len(file_for_dataset) == 1

    with open("../struct/"+file_for_dataset[0],"rb") as f:
        struct = pickle.load(f)

    config = Config(
                    model=model_name, 
                    dataset=dataset, 
                    config_file_list=["../RecBole/recbole/properties/overall.yaml"],
                    config_dict={
                        "topk":k,
                        "metrics":[
                                "RelMetrics",
                                "FairWORel",          
                                "IBOIWO",
                                "IAA_true",
                                "MME_IIF_AIF"
                                ]}
                                )

    evaluator = Evaluator(config)
    struct.set("data.num_items", num_item+1) #because -1 in metrics.py
    struct.set("rec.items", curr_rec)
    struct.set("rec.topk", curr_rel)
    struct.set("rec.score",None)
    struct.set("data.pos_items",curr_rec.numpy().copy()) #current items are also the only relevant items, but numpy datatype

    insertion_result = dict()
    insertion_result["0"] = evaluator.evaluate(struct)

    for pos_rank in range(k-1,-1, -1):
        print(f"Inserting unfair and irrelevant items for all users at {pos_rank}")
        curr_rec[:,pos_rank] = incoming_k_items[:,pos_rank]
        curr_rel[:,pos_rank] = incoming_k_relevance[:,pos_rank]

        struct.set("rec.items", curr_rec)
        struct.set("rec.topk", curr_rel)
  
        start_time = time.time()
        result = evaluator.evaluate(struct)
        print("total time taken: ", time.time() - start_time)
        
        insertion_result[f"{k-pos_rank}"] = result

    with open(f'artificial_insert_unfair_user_{str(num_user).zfill(4)}_exact_km.pickle', 'wb') as f:
        pickle.dump(insertion_result, f, pickle.HIGHEST_PROTOCOL)
    