In [1]:
%cd ../

h:\Codes\fairreceval


In [2]:
import pickle
import pandas as pd
import numpy as np
from collections import Counter
import os

import warnings
warnings.simplefilter("ignore")

from recbole.config import Config
from recbole.evaluator.evaluator import Evaluator
    
import torch
from collections import OrderedDict


def load_dataset(dataset, list_k):

    with open(f"train_val_test/{dataset}_train.pickle","rb") as f:
        data = pickle.load(f)
    train = pd.DataFrame(data)

    with open(f"train_val_test/{dataset}_valid.pickle","rb") as f:
        data = pickle.load(f)
    val = pd.DataFrame(data)

    with open(f"train_val_test/{dataset}_test.pickle","rb") as f:
        data = pickle.load(f)
    test = pd.DataFrame(data)

    config = Config(
                model="Pop", 
                dataset=dataset, 
                config_file_list=["RecBole/recbole/properties/overall.yaml"],
                config_dict={"topk":list_k,"metrics":"FairWORel"}
                )
    evaluator = Evaluator(config)

    item_id = config.final_config_dict["ITEM_ID_FIELD"]

    train = train.groupby("user_id")\
        .agg(lambda x: [x for x in x])\
        [item_id]

    val = val.groupby("user_id")\
        .agg(lambda x: [x for x in x])\
        [item_id]

    test = test.groupby("user_id")\
        .agg(lambda x: [x for x in x])\
        [item_id]

    df = pd.DataFrame()
    df["train"] = train.apply(set)
    df["valid"] = val.apply(set)
    df["pure_test"] = test.apply(set)

    df_test = df[~df.pure_test.isna()]

    df = df.applymap(lambda x: set() if type(x) == float else x)
    df_test = df_test.applymap(lambda x: set() if type(x) == float else x)
    return df, df_test, test, evaluator


def get_least_pop_filtered_item(df, user_id, k):
    avail_item = df.loc[user_id, "recommendable_items"]

    if len(avail_item) == 0:
        user_with_no_item_to_recommend.append(user_id)
    else:
        freq_count = curr_rec_count.copy()
        #filter those that are not available
        for key in freq_count.keys():
            if key not in avail_item:
                freq_count[key] = 0
        freq_count = +freq_count #remove zero and negative counts
        for item in avail_item:
            if item not in freq_count.keys():
                freq_count[item] = 0

        #add avail_item as keys with 0 values if not in the freq_count yet
        n_least_common = freq_count.most_common()[:-k-1:-1]

    return n_least_common

def check():
    #check that there is no intersection between the items in train_val of a user with their recommendation list
    check = pd.DataFrame()
    check["train"] = df_test.reset_index(drop=True).train
    check["valid"] = df_test.reset_index(drop=True).valid
    check["curr_test"] = temp_df.non_repeatable_fair.apply(set)
    check = check.dropna()
    assert check.apply(lambda x: x.train.intersection(x.curr_test), axis=1).apply(len).value_counts().shape[0] == 1
    assert check.apply(lambda x: x.valid.intersection(x.curr_test), axis=1).apply(len).value_counts().shape[0] == 1

    #ensure the k recommended items are unique
    assert all(check.curr_test.apply(len) == k) 


In [3]:
list_k = [
    1,2,3,5,10,15,20
    ]
max_k = max(list_k)
all_results = pd.DataFrame()
for dataset in [
            "Amazon_Luxury_Beauty",
            "lastfm", 
            "ml-1m",
            "book-crossing",
            "Amazon_Industrial_and_Scientific",
            "Amazon_Digital_Music",
            ]:

    print(f"Doing {dataset}")

    df, df_test, test, evaluator = load_dataset(dataset, list_k)
    items_in_train = set(df.train.apply(list).sum())
    items_in_val  = set(df.valid.apply(list).sum())
    items_in_test  = set(df_test.pure_test.apply(list).sum())

    # Repeatable recommendation Most unfair
    all_items = items_in_train | items_in_val | items_in_test #add sets

    list_all_items = list(all_items)

    df_test["unfair_repeatable_rec"] = pd.Series(dtype=object)
    df_test["unfair_repeatable_rec"] = df_test["unfair_repeatable_rec"].apply(lambda x: list_all_items[:max_k])
    list_file = os.listdir("struct/")
    file_for_dataset = [x for x in list_file if dataset in x]
    assert len(file_for_dataset) == 1

    with open("struct/"+file_for_dataset[0],"rb") as f:
        struct = pickle.load(f)

    rec = torch.Tensor(df_test.unfair_repeatable_rec.apply(lambda x: x).to_list())
    struct.set("rec.items",rec)

    most_unfair_result_repeatable = evaluator.evaluate(struct)

    # NON-REPEATABLE Most unfair
    items_in_train_val = df.train.apply(list).sum() + df.valid.apply(list).sum()
    test_item_not_in_train_val = set(df.pure_test.apply(list).sum()) - set(items_in_train_val)
    print("Number of items in test that have not been recommended in train or val: ", len(test_item_not_in_train_val))

    freq_count = Counter(items_in_train_val)
    for item in test_item_not_in_train_val:
        freq_count[item] = 0
    
    #get $k$ least popular items in train+val
    least_recommended_freq_count = freq_count.most_common()[::-1]

    least_recommended_items = [item[0] for item in least_recommended_freq_count]

    df_test["recommendable_items"] = df_test.apply(lambda x: [item for item in least_recommended_items if item not in x.train and item not in x.valid], axis=1)
    #most_unfair

    df_test["unfair_nonrepeatable_rec"] = pd.Series(dtype=object)
    df_test["unfair_nonrepeatable_rec"] = df_test["recommendable_items"].apply(lambda x: x[:max_k])

    rec = torch.Tensor(df_test.unfair_nonrepeatable_rec.to_list())
    struct.set("rec.items",rec)

    most_unfair_result_nonrepeatable = evaluator.evaluate(struct)

    ## Most fair
    most_fair_result_repeatable = OrderedDict()
    most_fair_result_nonrepeatable = OrderedDict()
    m = len(df_test)
    for k in list_k:
        print(k)
        
        slots = k*m
        multiples = slots//len(list_all_items) + 1
        config = Config(
                model="Pop", 
                dataset=dataset, 
                config_file_list=["RecBole/recbole/properties/overall.yaml"],
                config_dict={"topk":[k],"metrics":"FairWORel"}
                )
        evaluator = Evaluator(config)

        #REPEATABLE
        rec_list = np.array(list_all_items*multiples)[:slots].reshape(m, k)
        rec = torch.Tensor(rec_list)
        struct.set("rec.items",torch.clone(rec))

        result = evaluator.evaluate(struct)
        most_fair_result_repeatable.update(result)

        #NONREPEATABLE
        user_with_no_item_to_recommend = []
        temp = pd.Series(rec_list.tolist())
        temp_df = pd.DataFrame()
        temp_df["repeatable_fair"] = temp
        temp_df["recommendable_items"] = df_test["recommendable_items"].reset_index(drop=True)

        temp_df["non_repeatable_fair"] = pd.Series([[]]*len(temp_df))
        
        for i, row in temp_df.iterrows():
            if i == 0:
                init_rec = temp_df.at[i,"recommendable_items"][:k]
                temp_df.at[i,"non_repeatable_fair"] = init_rec
                curr_rec_count = Counter(init_rec)
            else:
                items_to_add_and_count = get_least_pop_filtered_item(temp_df, i, k)
                items_to_add = [x[0] for x in items_to_add_and_count]
                temp_df.at[i, "non_repeatable_fair"] = items_to_add

                #update curr_rec_count
                for_update = Counter(items_to_add)
                curr_rec_count.update(for_update)  

            
        check()   


        rec_list = temp_df.non_repeatable_fair.to_list()
        rec = torch.Tensor(rec_list)
        struct.set("rec.items",torch.clone(rec))

        result = evaluator.evaluate(struct)
        most_fair_result_nonrepeatable.update(result)
        if len(user_with_no_item_to_recommend) > 0:
            print(user_with_no_item_to_recommend)

    result_df = pd.DataFrame(columns=["most_unfair_repeatable","most_fair_repeatable"])
    result_df.most_fair_repeatable = most_fair_result_repeatable
    result_df.most_unfair_repeatable = most_unfair_result_repeatable
    result_df["most_fair_nonrepeatable"] = most_fair_result_nonrepeatable
    result_df["most_unfair_nonrepeatable"] = most_unfair_result_nonrepeatable

    
    #save result_df for each dataset
    result_df = result_df.reset_index()
    if dataset == "Amazon_Luxury_Beauty":
        dataset = "amazon-lb"
    elif dataset == "Amazon_Industrial_and_Scientific":
        dataset = "amazon-is"
    elif dataset == "book-crossing":
        dataset = "book-x"
    elif dataset == "Amazon_Digital_Music":
        dataset = "amazon-dm"
    result_df.index = [dataset]*len(result_df)
    result_df.rename(columns={"index":"measure"},inplace=True)
    result_df.index.set_names("dataset", inplace=True)
    all_results = all_results.append(result_df)

Doing Amazon_Luxury_Beauty
Number of items in test that have not been recommended in train or val:  1
1
2
3
5
10
15
20
Doing lastfm
Number of items in test that have not been recommended in train or val:  0
1
2
3
5
10
15
20
Doing ml-1m
Number of items in test that have not been recommended in train or val:  0
1
2
3
5
10
15
20
Doing book-crossing
Number of items in test that have not been recommended in train or val:  0
1
2
3
5
10
15
20
Doing Amazon_Industrial_and_Scientific
Number of items in test that have not been recommended in train or val:  1
1
2
3
5
10
15
20
Doing Amazon_Digital_Music
Number of items in test that have not been recommended in train or val:  1
1
2
3
5
10
15
20


In [5]:
k = [sublist[1] for sublist in all_results.measure.str.split("@")]
all_results["k"] = k
all_results.measure = all_results.measure.str.replace("@\d*","",regex=True)
ori_our = [sublist[1] for sublist in all_results.measure.str.split("_")]
all_results["version"] = ori_our
all_results.measure = all_results.measure.str.replace("_.*","",regex=True)


In [6]:
all_results.to_excel("most_fair_unfair_results.xlsx")

In [7]:
original_order = all_results["measure"].unique()

grouped = all_results.set_index([all_results.index,"k","version","measure"])
grouped.index.set_names(['dataset','k', 'version'], level=[0,1,2], inplace=True)


In [8]:
grouped.to_excel("grouped_most_fair_unfair_all.xlsx")

In [9]:
pd.options.display.float_format = "{:,.5f}".format

In [10]:
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,most_unfair_repeatable,most_fair_repeatable,most_fair_nonrepeatable,most_unfair_nonrepeatable
dataset,k,version,measure,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
amazon-lb,1,ori,Jain,0.00126,0.98355,0.98355,0.00126
amazon-lb,1,our,Jain,0.00000,1.00000,1.00000,0.00000
amazon-lb,1,ori,QF,0.00126,1.00000,1.00000,0.00126
amazon-lb,1,our,QF,0.00000,1.00000,1.00000,0.00000
amazon-lb,1,ori,Ent,,0.99889,0.99889,
...,...,...,...,...,...,...,...
amazon-dm,20,our,Gini,1.00000,0.00000,0.00000,1.00000
amazon-dm,20,ori,Gini-w,0.99836,0.05001,0.04973,0.99836
amazon-dm,20,ori,FSat,0.00211,1.00000,1.00000,0.00222
amazon-dm,20,our,FSat,0.00000,1.00000,1.00000,0.00011
