# **IMPORT LIBS**

## Import

In [1]:
import numpy as np
from openrec.tf1.legacy.utils import ImplicitDataset
from openrec.tf1.legacy.recommenders import CML, BPR, PMF
from tqdm.notebook import tqdm
import numpy as np
import math
import pandas as pd
import os
import pickle

In [2]:
import sys
sys.path.append('../Lib')  # Adjusts path to include the Lib directory where utilities.py is located

In [3]:
from helper import *

# **GENERATE THE DATASET**

## Init

In [4]:
# Set the seed for reproducibility
seed = 2384795
np.random.seed(seed=seed)

# Preparing folder for output data
output_name = f"./generated_data/"
if os.path.exists(output_name) == False:
    os.makedirs(output_name)

# **MODEL CHOICE**

In [5]:
# Prevent tensorflow from using cached embeddings
import tensorflow as tf
tf.compat.v1.reset_default_graph()
tf.set_random_seed(seed)




In [6]:
# Here I won't comment anything, we are just using the code provided by the authors of the paper

raw_data = dict()
raw_data['train_data'] = np.load(output_name + "training_arr.npy")
raw_data['test_data_pos_biased'] = np.load(output_name + "biased-test_arr_pos.npy")
raw_data['test_data_neg_biased'] = np.load(output_name + "biased-test_arr_neg.npy")
raw_data['test_data_pos_unbiased'] = np.load(output_name + "unbiased-test_arr_pos.npy")
raw_data['test_data_neg_unbiased'] = np.load(output_name + "unbiased-test_arr_neg.npy")
raw_data['max_user'] = 7177
raw_data['max_item'] = 10729
batch_size = 8000
test_batch_size = 1000
display_itr = 1000

train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train')

MODEL_CLASS = CML
MODEL_PREFIX = "cml"
DATASET_NAME = "KuaiRec"
OUTPUT_FOLDER = output_name
OUTPUT_PATH = OUTPUT_FOLDER + MODEL_PREFIX + "-" + DATASET_NAME + "/"
OUTPUT_PREFIX = str(OUTPUT_PATH) + str(MODEL_PREFIX) + "-" + str(DATASET_NAME)


if os.path.exists(OUTPUT_PATH) == False:
    os.makedirs(OUTPUT_PATH)


# **EVALUATION**

In [7]:
# Load data
raw_data = dict()
raw_data['train_data'] = np.load(output_name + "training_arr.npy")
raw_data['test_data_pos_biased'] = np.load(output_name + "biased-test_arr_pos.npy")
raw_data['test_data_neg_biased'] = np.load(output_name + "biased-test_arr_neg.npy")
raw_data['test_data_pos_unbiased'] = np.load(output_name + "unbiased-test_arr_pos.npy")
raw_data['test_data_neg_unbiased'] = np.load(output_name + "unbiased-test_arr_neg.npy")
raw_data['max_user'] = 7177
raw_data['max_item'] = 10729
batch_size = 8000
test_batch_size = 1000
display_itr = 1000

# Load data
train_dataset = ImplicitDataset(raw_data['train_data'], raw_data['max_user'], raw_data['max_item'], name='Train')
test_dataset_pos_biased = ImplicitDataset(raw_data['test_data_pos_biased'], raw_data['max_user'], raw_data['max_item'])
test_dataset_neg_biased = ImplicitDataset(raw_data['test_data_neg_biased'], raw_data['max_user'], raw_data['max_item'])
test_dataset_pos_unbiased = ImplicitDataset(raw_data['test_data_pos_unbiased'], raw_data['max_user'], raw_data['max_item'])
test_dataset_neg_unbiased = ImplicitDataset(raw_data['test_data_neg_unbiased'], raw_data['max_user'], raw_data['max_item'])

In [8]:
GAMMAS = [1.5,2,2.5,3]

# Calculate Propensities

In [9]:
propensities = calculate_propensities(7176,10728, output_name+"training_arr.npy",normalize=True)
propensities

{1.5: array([[9.60897519e-04, 6.16416046e-05, 7.01853525e-04, ...,
         6.16416046e-05, 3.48697573e-04, 0.00000000e+00],
        [9.60897519e-04, 6.16416046e-05, 7.01853525e-04, ...,
         6.16416046e-05, 3.48697573e-04, 0.00000000e+00],
        [9.60897519e-04, 6.16416046e-05, 7.01853525e-04, ...,
         6.16416046e-05, 3.48697573e-04, 0.00000000e+00],
        ...,
        [9.60897519e-04, 6.16416046e-05, 7.01853525e-04, ...,
         6.16416046e-05, 3.48697573e-04, 0.00000000e+00],
        [9.60897519e-04, 6.16416046e-05, 7.01853525e-04, ...,
         6.16416046e-05, 3.48697573e-04, 0.00000000e+00],
        [9.60897519e-04, 6.16416046e-05, 7.01853525e-04, ...,
         6.16416046e-05, 3.48697573e-04, 0.00000000e+00]]),
 2: array([[2.39448702e-04, 8.86847043e-06, 1.64246371e-04, ...,
         8.86847043e-06, 7.09477635e-05, 0.00000000e+00],
        [2.39448702e-04, 8.86847043e-06, 1.64246371e-04, ...,
         8.86847043e-06, 7.09477635e-05, 0.00000000e+00],
        [2.394487

## Biased Evaluation

# **COMPUTE RESULTS**

Compute AOA and unbiased evaluator metrics with biased testset.

In [10]:
biased_results = dict()

biased_results["AOA"] = aoa(OUTPUT_PREFIX+"-test-pos-biased_evaluate_partial.pickle", OUTPUT_PREFIX+"-test-neg-biased_evaluate_partial.pickle", output_name+"training_arr.npy", K=10)

for gamma in GAMMAS:
    key = "UB_" + str(gamma).replace(".","")
    biased_results[key] = eq(OUTPUT_PREFIX+"-test-pos-biased_evaluate_partial.pickle", OUTPUT_PREFIX+"-test-neg-biased_evaluate_partial.pickle", output_name+"training_arr.npy", propensities[gamma], K=10)

Compute AOA and unbiased evaluator metrics with unbiased testset.

In [11]:
unbiased_results = dict()

# unbiased_results["STRATIFIED_15"] = stratified(OUTPUT_PREFIX+"-test-pos-unbiased_evaluate_partial.pickle", OUTPUT_PREFIX+"-test-neg-unbiased_evaluate_partial.pickle", output_name+"training_arr.npy", gamma=1.5, K=4, partition=100)
unbiased_results["AOA"] = aoa(OUTPUT_PREFIX+"-test-pos-unbiased_evaluate_partial.pickle", OUTPUT_PREFIX+"-test-neg-unbiased_evaluate_partial.pickle", output_name+"training_arr.npy", K=10)
for gamma in GAMMAS:
    key = "UB_" + str(gamma).replace(".","")
    unbiased_results[key] = eq(OUTPUT_PREFIX+"-test-pos-unbiased_evaluate_partial.pickle", OUTPUT_PREFIX+"-test-neg-unbiased_evaluate_partial.pickle", output_name+"training_arr.npy", propensities[gamma], K=10)

Get partitions.

In [12]:
# Get number of items
num_items =  raw_data['max_item']

# Get the n_p partitions
n_p = 200
nums = np.arange(1, num_items+1)
partitions = np.random.choice(nums, n_p, replace=False)

# Visualize
partitions

array([10393,  3122,  3945,  5355,  5477,  6413,  3649,  4348,  1189,
        1815,  6342,  8656,  5778,  8103,  6939,  5165,  6961,  8447,
        7271,  4144,  8969,  7957,  6721,  5714,  9512,  3462,  6610,
        8165,   630,  1460, 10510,  9667,   938, 10454,   615,  6336,
        9287,  9686,  5768,   823,  8662,  5922,  4994,  6879,  4765,
        8393,  3452,  4764,  2047,  4189,  9741,  5288,  8022,  4579,
        3263,  4572,  3963,   754,  2072,  1867,  2822,  9926,  7794,
        3833,  4800,  5253, 10281,  9049, 10707,  7062,   203,  5673,
        8982,  4057,  6177,   527,  4346,  3661,  4005,  1802,  5220,
        3027,  6612,  9276,  6622,  3516,  5349,  6256,  7421,  2157,
       10658,  8175,  6753,  2406,  9792,   685,  2389,  4269,   117,
          54,  4645,  1299,  4217,  6501,  8253,  8186,  9435,  3473,
        5281,  7925,  1612,  3317,  6570,  5574,  2147, 10084,  4962,
        7940,  5767,  2091,  7764,  9680,  2412,  5290,  1952,  4227,
        1668,  9403,

Compute the partition which minimizes the sum of AUC and Recall

In [13]:
# Compute biased and unbiased results with stratified for each partition
# and store biased and unbiased results such that the sum of AUC and Recall is minimized

# Value of gamma to use for minimization
gamma = 1.5

# To print :)
key = "STRATIFIED_" + str(gamma).replace(".","")
print(key)

unbiased_results[key] = {}
biased_results[key] = {}
best_partition = np.random.choice(nums, 1)[0]
best_score = float('inf')

history = np.full(10729, np.inf)  # Adjusted to match the size of nums

for p in tqdm(partitions):
    # Fetch stratified results; these functions need to be defined or replaced with actual logic
    temp_unbiased = stratified(OUTPUT_PREFIX + "-test-pos-unbiased_evaluate_partial.pickle",
                               OUTPUT_PREFIX + "-test-neg-unbiased_evaluate_partial.pickle",
                               output_name + "training_arr.npy", propensities[gamma], K=10, partition=p)
    temp_biased = stratified(OUTPUT_PREFIX + "-test-pos-biased_evaluate_partial.pickle",
                             OUTPUT_PREFIX + "-test-neg-biased_evaluate_partial.pickle",
                             output_name + "training_arr.npy", propensities[gamma], K=10, partition=p)

    # Calculate combined score
    combined_score = temp_unbiased['bias'] + temp_unbiased['concentration'] + \
                     temp_biased['bias'] + temp_biased['concentration']

    history[p-1] = combined_score  # Store the combined score

    # Update the best_partition and best_score if the current partition's score is lower
    if combined_score < best_score:
        best_score = combined_score
        best_partition = p

print(f"Best partition: {best_partition} with combined score: {best_score}")
print(f"Minimum score from history: {np.min(history)}")


STRATIFIED_15


  0%|          | 0/200 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
# Additional outputs for verification
print("Detailed scores from history:", history)

So, for the chosen value of gamma, the best partition is...

In [None]:
# Visualize
best_partition

Compute stratified metrics with unbiased testset.

In [None]:
for gamma in GAMMAS:
    key = "STRATIFIED_" + str(gamma).replace(".","")
    unbiased_results[key] = stratified(OUTPUT_PREFIX+"-test-pos-unbiased_evaluate_partial.pickle", OUTPUT_PREFIX+"-test-neg-unbiased_evaluate_partial.pickle", output_name+"training_arr.npy", propensities[gamma], K=10, partition=best_partition)
    biased_results[key] = stratified(OUTPUT_PREFIX+"-test-pos-biased_evaluate_partial.pickle", OUTPUT_PREFIX+"-test-neg-biased_evaluate_partial.pickle", output_name+"training_arr.npy", propensities[gamma], K=10, partition=best_partition)

This version uses the linspace of items instead of linspace of propensities to make the partition.

In [None]:
for gamma in GAMMAS:
    key = "STRATIFIED_v2_" + str(gamma).replace(".","")
    unbiased_results[key] = stratified_2(OUTPUT_PREFIX+"-test-pos-unbiased_evaluate_partial.pickle", OUTPUT_PREFIX+"-test-neg-unbiased_evaluate_partial.pickle", output_name+"training_arr.npy", propensities[gamma], K=10, partition=best_partition)
    biased_results[key] = stratified_2(OUTPUT_PREFIX+"-test-pos-biased_evaluate_partial.pickle", OUTPUT_PREFIX+"-test-neg-biased_evaluate_partial.pickle", output_name+"training_arr.npy", propensities[gamma], K=10, partition=best_partition)


Prepare table for results.

In [None]:
rows = 0
columns = len(biased_results.keys())

for key in biased_results.keys():
    rows = max(rows, len(biased_results[key].keys()))

for key in unbiased_results.keys():
    rows = max(rows, len(biased_results[key].keys()))

rows, columns

In [None]:
# Init dictionary
mae_results = dict()

# Get the names of the rows
list_biased_res = list(biased_results.keys())

# Init results
results_array = np.zeros((rows,columns))

Fill the table with the MAE results.

In [None]:
# For each row
for i in range(len(list_biased_res)):
    key = list_biased_res[i]

    # For each column
    for j in range(len(list(biased_results[key].keys()))):
        key_2 = list(biased_results[key].keys())[j]

        # Compute MAE
        results_array[j][i] = abs(biased_results[key][key_2] - unbiased_results[key][key_2])

# Make it a DataFrame
mae_df = pd.DataFrame(columns=list(biased_results.keys()), data=results_array)
metric_values = list(biased_results[list(biased_results.keys())[0]].keys())
mae_df.insert(0, "metric", metric_values)

# **RESULTS**

In [None]:
# Visualize
mae_df.head()

Unnamed: 0,metric,AOA,UB_15,UB_2,UB_25,UB_3,STRATIFIED_15,STRATIFIED_2,STRATIFIED_25,STRATIFIED_3,STRATIFIED_v2_15,STRATIFIED_v2_2,STRATIFIED_v2_25,STRATIFIED_v2_3
0,auc,0.15724,0.27763,0.29181,0.29922,0.30254,0.29681,0.47593,1.36697,6.12093,0.27763,0.29181,0.29922,0.30254
1,recall,0.01971,0.00915,0.00802,0.00703,0.00623,0.00915,0.00802,0.00703,0.00623,0.00915,0.00802,0.00703,0.00623
2,biased_bias,To be compute,Nope,Nope,Nope,Nope,3451663.26356,20184762.59991,93130045.83054,433559092.77438,0.0,0.0,0.0,0.0
3,biased_concentration,Nope,Nope,Nope,Nope,Nope,113435.15748,66868.45999,43364.51613,32965.15099,154463.5961,153588.14088,153037.84968,152671.0668
4,unbiased_bias,To be compute,Nope,Nope,Nope,Nope,19434.09558,190039.46594,936784.57082,5960091.41135,0.0,0.0,0.0,0.0
