In [1]:
import warnings
warnings.simplefilter("ignore")

import pickle
import numpy as np
import pandas as pd

import os

from recbole.config import Config
from recbole.evaluator.evaluator import Evaluator
    
import torch
import time

## Insert most fair and relevant (total $km$ items)

In [2]:
def get_rel_items_not_in_rec(u, incoming_k_items, curr_rec):
    mask = ~torch.isin(incoming_k_items[u],curr_rec[u])
    return incoming_k_items[u][mask]

def get_remaining_items(u, all_item_id, curr_rec, rel_not_in_rec):
    mask = ~torch.isin(all_item_id, curr_rec[u])
    item_not_in_rec = all_item_id[mask]
    
    mask_rel_not_in_rec = ~torch.isin(item_not_in_rec, rel_not_in_rec[u])
    remaining_items = item_not_in_rec[mask_rel_not_in_rec]
    return remaining_items

In [None]:
k = 10
path = "artificial_insertion_extra"

for num_user in [
   1000,  
]:

    num_item = (num_user * k) 
    all_item_id = torch.arange(num_item) + 1

    print(f"There are {num_user} users, {num_item} items, and k={k}")

    #initialize user-k recommendation matrix by recommending the same k items to all users
    same_k_items = np.arange(k)+1 #item index starts from 1
    curr_rec = np.tile(same_k_items, (num_user,1))
    curr_rec = torch.Tensor(curr_rec).int()

    incoming_k_items = np.arange(num_item).reshape(num_user,k) + 1
    incoming_k_items = torch.Tensor(incoming_k_items).int()

    #initialize user-item_at_k relevance matrix (all irrelevant)
    curr_rel = torch.zeros(num_user,k+1,dtype=torch.int32) #+1 because recbole saves number of rel items at the last column
    curr_rel[0, :-1] = 1 #first user already has relevant items
    curr_rel[:,-1] = k #each user has exactly k relevant items

    incoming_k_relevance = torch.ones(num_user,k)

    #take struct from a dataset
    dataset = "Amazon-lb" 
    model_name = "Pop"
    list_file = os.listdir("../struct/")
    file_for_dataset = [x for x in list_file if dataset in x]
    assert len(file_for_dataset) == 1

    with open("../struct/"+file_for_dataset[0],"rb") as f:
        struct = pickle.load(f)

    config = Config(
                    model=model_name, 
                    dataset=dataset, 
                    config_file_list=["../RecBole/recbole/properties/overall.yaml"],
                    config_dict={
                        "topk":k,
                        "metrics":[
                                "FixedIAAinsert",
                                "FixedIFDrerank",
                                "FixedIIF",
                                ]}
                                )

    evaluator = Evaluator(config)
    struct.set("data.num_items", num_item+1) #because -1 in metrics.py
    struct.set("rec.items", curr_rec)
    struct.set("rec.topk", curr_rel)
    struct.set("rec.score",torch.empty((num_user, num_item+1))) #needed for FixedIAAinsert, +1 to add a dummy col for pred_rel (pred_rel only taken from 1: onwards)
    struct.set("data.pos_items", incoming_k_items.numpy()) #incoming items are also the only relevant items, but numpy datatype
    struct.set("data.name", "artificial") 
    
    rel_not_in_rec = [get_rel_items_not_in_rec(u, incoming_k_items, curr_rec) for u, _ in enumerate(incoming_k_items)]
    remaining_items = [get_remaining_items(u, all_item_id, curr_rec, rel_not_in_rec) for u, _ in enumerate(curr_rec)]
    full_rec_mat = torch.stack([torch.cat([curr_rec[u],remaining_items[u],rel_not_in_rec[u]]) for u, _ in enumerate(curr_rec)])
    struct.set("rec.all_items", full_rec_mat)

    insertion_result = dict()
    result = evaluator.evaluate(struct)
    insertion_result["0"] =  result
    with open(f'{path}/fair_user_{str(num_user).zfill(4)}_exact_km_0.pickle', 'wb') as f:
        pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)

    for pos_rank in range(k-1,-1, -1):
        print(f"Inserting fair and rel items for all users at {pos_rank}")
        curr_rec[:,pos_rank] = incoming_k_items[:,pos_rank]
        curr_rel[:,pos_rank] = incoming_k_relevance[:,pos_rank]

        struct.set("rec.items", curr_rec)
        struct.set("rec.topk", curr_rel)

        #for IAA and IFD that need full ranking
        rel_not_in_rec = [get_rel_items_not_in_rec(u, incoming_k_items, curr_rec) for u, _ in enumerate(incoming_k_items)]
        remaining_items = [get_remaining_items(u, all_item_id, curr_rec, rel_not_in_rec) for u, _ in enumerate(curr_rec)]
        full_rec_mat = torch.stack([torch.cat([curr_rec[u],remaining_items[u],rel_not_in_rec[u]]) for u, _ in enumerate(curr_rec)])
        struct.set("rec.all_items", full_rec_mat)
        print(full_rec_mat)
  
        start_time = time.time()
        result = evaluator.evaluate(struct)
        print(result)
        print("total time taken: ", time.time() - start_time)
        
        insertion_result[f"{k-pos_rank}"] = result

        #dump/save per pos_rank
        with open(f'{path}/fair_user_{str(num_user).zfill(4)}_exact_km_{k-pos_rank}.pickle', 'wb') as f:
            pickle.dump(result, f, pickle.HIGHEST_PROTOCOL)

    with open(f'{path}/fair_user_{str(num_user).zfill(4)}_exact_km.pickle', 'wb') as f:
        pickle.dump(insertion_result, f, pickle.HIGHEST_PROTOCOL)
        