##### This code is based on "CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems" by ["Mohammadmehdi Naghiaei, Hossein A. Rahmani, and Yashar Deldjoo"], available at: [rahmanidashti/CPFairRecSys: Official Codes](https://github.com/rahmanidashti/CPFairRecSys). 

Some parts of this code (functions, parameters, algorithms) are taken directly from the original work. Additional modifications, new functions, and changes have been made by Reza Shafiloo as part of this project.


## Install Recommendation library

In [None]:
! pip install cornac

## Import packages

In [None]:
# import packages
import os
import numpy as np
from collections import defaultdict
from tqdm import tqdm

from itertools import product
from sys import stdout as out

import pandas as pd
import ast  # Safely evaluate string to dictionary if necessary
import copy

from tqdm import tqdm
import random
import math
from ast import literal_eval

import cornac
from cornac.eval_methods import BaseMethod, RatioSplit
from cornac.models import MostPop, UserKNN, ItemKNN, MF, PMF, BPR, NeuMF, WMF, HPF, CVAE, VAECF, NMF
from cornac.metrics import Precision, Recall, NDCG, AUC, MAP, FMeasure, MRR
from cornac.data import Reader
from cornac.utils import cache

## Configuration 

In [None]:
# dataset congfig
ds_names = ["Food"]
ds_users = ['005']
ds_items = ['020']

###
no_user_groups = 2
no_item_groups = 2
topk = 50  # this is not a length of recommendation ist, it is only the first topk items for the optimisation

## Load `Cornac` data and model

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
# reading the train, test, and val sets
def read_data(dataset):
    """
    Read the train, test, and tune file using Cornac reader class
  
    Parameters
    ----------
    dataset : the name of the dataset
      example: 'MovieLens100K'
  
    Returns
    ----------
    train_data:
      The train set that is 70% of interactions
    tune_data:
      The tune set that is 10% of interactions
    test_data:
      The test set that is 20% of interactions
    """
    reader = Reader()
    train_data = reader.read(fpath=f"datasets/{dataset}/{dataset}_train.txt", fmt='UIR', sep='\t')
    tune_data = reader.read(fpath=f"datasets/{dataset}/{dataset}_tune.txt", fmt='UIR', sep='\t')
    test_data = reader.read(fpath=f"datasets/{dataset}/{dataset}_test.txt", fmt='UIR', sep='\t')
    return train_data, tune_data, test_data

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
# load data into Cornac evaluation method
def load_data(train_data, test_data):
    """
    load data into Cornac evaluation method
  
    Parameters
    ----------
    train_data:
      train_data from Reader Class
    test_data:
      test_data from Reader Class
  
    Returns
    ----------
    eval_method:
      Instantiation of a Base evaluation method using the provided train and test sets
    """
    # exclude_unknowns (bool, default: False) – Whether to exclude unknown users/items in evaluation.
    # Instantiate a Base evaluation method using the provided train and test sets
    eval_method = BaseMethod.from_splits(
        train_data=train_data, test_data=test_data, rating_threshold=5, exclude_unknowns=True, verbose=True
    )

    return eval_method

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
# running the cornac
def run_model(eval_method):
    """
    running the cornac
  
    Parameters
    ----------
    eval_method:
      Cornac's evaluation protocol
  
    Returns
    ----------
    exp:
      Cornac's Experiment
    """

    models = [
        # MostPop(),
        # UserKNN(k=20, similarity="cosine", weighting="bm25", name="UserKNN-BM25"),
        # ItemKNN(k=20, similarity="cosine", name="ItemKNN-Cosine"),
        # BPR(k=50, max_iter=200, learning_rate=0.001, lambda_reg=0.001, verbose=True),
        # WMF(k=50, max_iter=50, learning_rate=0.001, lambda_u=0.01, lambda_v=0.01, verbose=True, seed=123),
        # HPF(k=50, seed=123, hierarchical=False, name="PF"),
        VAECF(k=10, autoencoder_structure=[20], act_fn="tanh", likelihood="mult", n_epochs=100, batch_size=100,
              learning_rate=0.001, beta=1.0, seed=123, use_gpu=True, verbose=False),
        # NeuMF(num_factors=9, layers=[32, 16, 8], act_fn="tanh", num_epochs=5, num_neg=3, batch_size=256, lr=0.001, seed=42, verbose=True)
    ]

    # define metrics to evaluate the models
    metrics = [
        AUC(), MAP(), MRR(), NDCG(k=10), Recall(k=10)
        # Precision(k=5), Precision(k=10), Precision(k=20),  Precision(k=50),
        # Recall(k=5), Recall(k=10), Recall(k=20), Recall(k=50),
        # FMeasure(k=5), FMeasure(k=10), FMeasure(k=20), FMeasure(k=50),
        # NDCG(k=5), NDCG(k=10), NDCG(k=20), NDCG(k=50)
    ]

    # put it together in an experiment, voilà!
    exp = cornac.Experiment(eval_method=eval_method, models=models, metrics=metrics)
    exp.run()

    return exp


## Load user and item groups

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
# Create a set of IDs for each users group
# Creat a matrix U which shows the user and the groups of the user
def read_user_groups(user_group_fpath: str, gid) -> set:
    """
    Read the user groups lists
  
    Parameters
    ----------
    user_group_fpath:
      The path of the user group file
  
    U (global variabvle):
      The global matrix of users and their group
  
    Returns
    ----------
    user_ids:
      The set of user ids corresponding to the group
    """

    user_group = open(user_group_fpath, 'r').readlines()
    user_ids = set()
    for eachline in user_group:
        uid = eachline.strip()
        # convert uids to uidx
        uid = eval_method.train_set.uid_map[uid]
        uid = int(uid)
        user_ids.add(uid)
        U[uid][gid] = 1
    return user_ids

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
# read test data
def read_ground_truth(test_file):
    """
    Read test set data
  
    Parameters
    ----------
    test_file:
      The test set data
  
    Returns
    ----------
    ground_truth:
      A dictionary includes user with actual items in test data
    """
    ground_truth = defaultdict(set)
    truth_data = open(test_file, 'r').readlines()
    for eachline in truth_data:
        uid, iid, _ = eachline.strip().split()

        # convert uids to uidx
        uid = eval_method.train_set.uid_map[uid]
        # convert iids to iidx
        iid = eval_method.train_set.iid_map[iid]

        uid, iid = int(uid), int(iid)
        ground_truth[uid].add(iid)

    return ground_truth

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
# read train data
def read_train_data(train_file):
    """
    Read test set data
  
    Parameters
    ----------
    train_file:
      The train_file set data
  
    Returns
    ----------
    train_checkins:
      A dictionary includes user with items in train data
    pop: dictionary
      A dictionary of all items alongside of its occurrences counter in the training data
      example: {1198: 893, 1270: 876, 593: 876, 2762: 867}
    """
    train_checkins = defaultdict(set)
    pop_items = dict()
    train_data = open(train_file, 'r').readlines()

    for eachline in train_data:
        uid, iid, _ = eachline.strip().split()
        print(eachline.strip().split())
        # convert uids to uidx
        uid = eval_method.train_set.uid_map[uid]
        # convert iids to iidx
        iid = eval_method.train_set.iid_map[iid]

        uid, iid = int(uid), int(iid)
        # a dictionary of popularity of items
        if iid in pop_items.keys():
            pop_items[iid] += 1
        else:
            pop_items[iid] = 1
        train_checkins[uid].add(iid)
    return train_checkins, pop_items

## Metrics

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
def catalog_coverage(predicted: list, catalog: list) -> float:
    """
    Computes the catalog coverage for k lists of recommendations
    Parameters
    ----------
    predicted : a list of lists
        Ordered predictions
        example: [['X', 'Y', 'Z'], ['X', 'Y', 'Z']]
    catalog: list
        A list of all unique items in the training data
        example: ['A', 'B', 'C', 'X', 'Y', Z]
    k: integer
        The number of observed recommendation lists
        which randomly choosed in our offline setup
    Returns
    ----------
    catalog_coverage:
        The catalog coverage of the recommendations as a percent rounded to 2 decimal places
    ----------
    Metric Defintion:
    Ge, M., Delgado-Battenfeld, C., & Jannach, D. (2010, September).
    Beyond accuracy: evaluating recommender systems by coverage and serendipity.
    In Proceedings of the fourth ACM conference on Recommender systems (pp. 257-260). ACM.
    """
    predicted_flattened = [p for sublist in predicted for p in sublist]
    L_predictions = len(set(predicted_flattened))
    # print("L_predictions:",L_predictions)
    # print("len(catalog):",len(catalog))
    catalog_coverage = round(L_predictions / (len(catalog) * 1.0) * 100, 2)
    # output: precent (%)
    return catalog_coverage

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
def novelty(predicted: list, pop: dict, u: int, k: int) -> float:
    """
    Computes the novelty for a list of recommended items for a user
    Parameters
    ----------
    predicted : a list of recommedned items
        Ordered predictions
        example: ['X', 'Y', 'Z']
    pop: dictionary
        A dictionary of all items alongside of its occurrences counter in the training data
        example: {1198: 893, 1270: 876, 593: 876, 2762: 867}
    u: integer
        The number of users in the training data
    k: integer
        The length of recommended lists per user
    Returns
    ----------
    novelty:
        The novelty of the recommendations in system level
    mean_self_information:
        The novelty of the recommendations in recommended top-N list level
    ----------
    Metric Defintion:
    Zhou, T., Kuscsik, Z., Liu, J. G., Medo, M., Wakeling, J. R., & Zhang, Y. C. (2010).
    Solving the apparent diversity-accuracy dilemma of recommender systems.
    Proceedings of the National Academy of Sciences, 107(10), 4511-4515.
    """
    self_information = 0
    for item in predicted:
        if item in pop.keys():
            item_popularity = pop[item] / u
            item_novelty_value = np.sum(-np.log2(item_popularity))
        else:
            item_novelty_value = 0
        self_information += item_novelty_value
    novelty_score = self_information / k
    return novelty_score

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
def precisionk(actual, predicted):
    return 1.0 * len(set(actual) & set(predicted)) / len(predicted)

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
def recallk(actual, predicted):
    return 1.0 * len(set(actual) & set(predicted)) / len(actual)

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
def ndcgk(actual, predicted):
    idcg = 1.0
    dcg = 1.0 if predicted[0] in actual else 0.0
    for i, p in enumerate(predicted[1:]):
        if p in actual:
            dcg += 1.0 / np.log(i + 2)
        idcg += 1.0 / np.log(i + 2)
    return dcg / idcg

## Load User and Item Matrices

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.

# Here we saved the results of scores and you can read them from repo to do your experiments.
# S is a matrix to store user's scores on each item
# P incldues the indecies of topk ranked items
# Sprime saves the scores of topk ranked items

def load_ranking_matrices(model, total_users, total_items, topk):
    S = np.zeros((total_users, total_items))
    P = np.zeros((total_users, topk))
    
    print(model.name)
    for uid in tqdm(range(total_users)):
        S[uid] = model.score(uid)
        P[uid] = np.array(list(reversed(model.score(uid).argsort()))[:topk])


    return S, P

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
# Ahelp is a binary matrix in which an element of its is 1 if the corresponding element in P (which is an item index) is in ground truth.
# Actually is shows whether the rankied item in P is included in ground truth or not.

def load_ground_truth_index(total_users, topk, P, train_checkins):
    Ahelp = np.zeros((total_users, topk))
    for uid in tqdm(range(total_users)):
        for j in range(topk):
            # convert user_ids to user_idx
            # convert item_ids to item_idx
            if P[uid][j] in train_checkins[uid]:
                Ahelp[uid][j] = 1
    return Ahelp

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
# create a set of IDs for each users group
def read_item_groups(item_group_fpath: str, gid) -> set:
    item_group = open(item_group_fpath, 'r').readlines()
    item_ids = set()
    for eachline in item_group:
        iid = eachline.strip()
        # convert iids to iidx
        iid = eval_method.train_set.iid_map[iid]
        iid = int(iid)
        item_ids.add(iid)
        I[iid][gid] = 1
    return item_ids

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
def read_item_index(total_users, topk, no_item_groups):
    Ihelp = np.zeros((total_users, topk, no_item_groups))
    for uid in range(total_users):
        for lid in range(topk):
            # convert item_ids to item_idx
            if P[uid][lid] in shorthead_item_ids:
                Ihelp[uid][lid][0] = 1
            elif P[uid][lid] in longtail_item_ids:
                Ihelp[uid][lid][1] = 1
    return Ihelp

In [None]:

# New script  created by Reza Shafiloo
# This script loads user training data and item metadata, merges them, and processes the merged data to 
# count item ratings for each provider. It then aggregates these counts into a provider-level inventory 
# represented as dictionaries and adds this information back into the item metadata DataFrame.
# Finally, it converts the inventory data between string and dictionary formats for further processing.
# Loading the data
TrainingData = pd.read_csv('datasets/'+ds_names[0]+'/'+ds_names[0]+'_train.txt', sep="\t",
                           names=["User-ID", "IID", "Rating"], index_col=False,
                           encoding="unicode-escape", on_bad_lines='skip')

itemMeta = pd.read_csv('datasets/'+ds_names[0]+'/'+ds_names[0]+'_meta.txt', sep="\t",
                       names=["IID", "Provider"], index_col=False,
                       encoding="unicode-escape", on_bad_lines='skip')

# Merge TrainingData with itemMeta
merged_data = pd.merge(TrainingData, itemMeta, on="IID", how="left")

# Group by Provider and IID, and count ratings for each IID
item_rating_counts = merged_data.groupby(['Provider', 'IID']).size().reset_index(name='RatingCount')

# Aggregate these counts at the provider level into a dictionary (IID, RatingCount)
providers_inventory = item_rating_counts.groupby('Provider').apply(
    lambda x: dict(zip(x['IID'], x['RatingCount']))
).reset_index(name='itemsAndRatings')

# Add this aggregated data as a new column in itemMeta DataFrame
itemMeta = itemMeta.merge(providers_inventory, on='Provider', how='left')

# Convert the 'itemsAndRatings' column from dictionary to string representation
itemMeta['itemsAndRatings'] = itemMeta['itemsAndRatings'].apply(lambda x: str(x) if isinstance(x, dict) else x)

# Convert string representation back to dictionary using literal_eval (if needed for further processing)
itemMeta['itemsAndRatings'] = itemMeta['itemsAndRatings'].apply(literal_eval)



## Evaluation

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
def metric_per_group(group, W):
    NDCG10 = list()
    Pre10 = list()
    Rec10 = list()
    Novelty10 = list()
    predicted = list()
    All_Predicted = list()
    for uid in tqdm(group):
        decision_vars = [W[uid][j].x for j in W[uid]]
        if uid in ground_truth.keys():
            for j in range(50):
                if W[uid][j].x == 1:
                    predicted.append(P[uid][j])
            copy_predicted = predicted[:]
            All_Predicted.append(copy_predicted)
            NDCG = ndcgk(actual=ground_truth[uid], predicted=predicted)
            Pre = precisionk(actual=ground_truth[uid], predicted=predicted)
            Rec = recallk(actual=ground_truth[uid], predicted=predicted)
            Novelty = novelty(predicted=predicted, pop=pop_items, u=eval_method.total_users, k=10)

            NDCG10.append(NDCG)
            Pre10.append(Pre)
            Rec10.append(Rec)
            Novelty10.append(Novelty)

            # cleaning the predicted list for a new user
            predicted.clear()

    catalog = catalog_coverage(predicted=All_Predicted, catalog=pop_items.keys())
    return round(np.mean(NDCG10), 5), round(np.mean(Pre10), 5), round(np.mean(Rec10), 5), round(np.mean(Novelty10),
                                                                                                5), catalog

In [None]:
# Function from Original Work by Hossein A. Rahmani rahmanidashti/CPFairRecSys: [Official Codes] CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# This function has been used without modifications.
def metric_on_all(W):
    predicted_user = list()
    NDCG_all = list()
    PRE_all = list()
    REC_all = list()
    Novelty_all = list()
    All_Predicted = list()

    for uid in tqdm(range(eval_method.total_users)):

        if uid in ground_truth.keys():
            for j in range(50):
                if W[uid][j].x == 1:
                    predicted_user.append(P[uid][j])
            
            copy_predicted = predicted_user[:]
            All_Predicted.append(copy_predicted)

            NDCG_user = ndcgk(actual=ground_truth[uid], predicted=predicted_user)
            PRE_user = precisionk(actual=ground_truth[uid], predicted=predicted_user)
            REC_user = recallk(actual=ground_truth[uid], predicted=predicted_user)
            Novelty_user = novelty(predicted=predicted_user, pop=pop_items, u=eval_method.total_users, k=10)

            NDCG_all.append(NDCG_user)
            PRE_all.append(PRE_user)
            REC_all.append(REC_user)
            Novelty_all.append(Novelty_user)

            # cleaning the predicted list for a new user
            predicted_user.clear()

    catalog = catalog_coverage(predicted=All_Predicted, catalog=pop_items.keys())
    return round(np.mean(NDCG_all), 5), round(np.mean(PRE_all), 5), round(np.mean(REC_all), 5), round(
        np.mean(Novelty_all), 5), catalog

## Optimization Functions

In [None]:
# New function created by Reza Shafiloo
#  Updates the provider inventories by incrementing the count of items based on the recommendation list.
# For each item in the recommendation list, it finds the provider that owns the item and updates the count.
# If the item is not found in any provider's inventory, a message is printed.
    
def update_provider_inventory(recommendation_list):
    for item_id in recommendation_list:
        # Convert item_id to string if it's not already
        item_id = item_id
        # Iterate over the list of providers
        found = False
        for provider_idx, provider_dict in enumerate(providers):
            if item_id in provider_dict:  # Check if this item belongs to the current provider's dictionary
                provider_dict[item_id] += 1  # Increment the count of this item
                found = True
                
                break  # Stop searching once the provider is found
        if not found:
            print(f"Item {item_id} not found in any provider's inventory")


In [None]:
# New function created by Reza Shafiloo
# Calculates the deviation from item fairness (DIF) for each item in the recommendation list.
# It temporarily adds the item to each provider's inventory, calculates the entropy and the 
# difference from the optimal entropy (assuming a uniform distribution), and then restores 
# the inventory to its original state. The differences are stored in a list and returned.
    
def calculate_deviation_from_item_fairness(recommendation_list, providers):
    DIF_list = []
    

    for item_id in recommendation_list:
        # Iterate through each provider
        for provider_idx, provider_dict in enumerate(providers):
            if item_id in provider_dict:
                # Temporarily increase the count for this item
                provider_dict[item_id] += 1
                # Calculate the new entropy with this item added
                total_recommendations = sum(provider_dict.values())
                entropy = 0
                for count in provider_dict.values():
                    p = count / total_recommendations
                    entropy -= p * math.log2(p) if p > 0 else 0

                # Calculate the optimal entropy (assuming uniform distribution)
                if total_recommendations > 0:
                    optimal_entropy = math.log2(len(provider_dict)) if len(provider_dict) > 0 else 0
                else:
                    optimal_entropy = 0

                # Calculate the difference from optimal entropy
                difference_from_optimal = optimal_entropy - entropy

                # Store the difference for this provider
                DIF_list.append(difference_from_optimal)

                # Reset the item count to its original state
                provider_dict[item_id] -= 1

    return DIF_list

In [None]:

# New function created by Reza Shafiloo
# Calculates the deviation from provider fairness (DPF) for each item in the recommendation list.
# The function temporarily updates each provider's inventory to include the item, calculates the entropy and the difference from the optimal entropy (assuming uniform distribution), and then resets the inventory to its original state. The results are stored in a list and returned.
    
def calculate_deviation_from_provider_fairness(recommendation_list, providers):
    DPF_list = []
    total_length = sum(sum(provider_dict.values()) for provider_dict in providers)

    # Iterate over each item in the recommendation list
    for item_id in recommendation_list:
        # Convert item_id to string if it's not already
        item_id = item_id

        # Temporarily update each provider's inventory if it contains the item
        for provider_dict in providers:
            if item_id in provider_dict:
                # Temporarily increase the count for this item
                provider_dict[item_id] += 1

                entropy = 0
                for updatedInventory in providers:
                    count = sum(updatedInventory.values())
                    p = count / (total_length + 1)
                    entropy -= p * math.log2(p) if p > 0 else 0

                # Calculate the optimal entropy (assuming uniform distribution)
                optimal_entropy = math.log2(len(providers)) if len(provider_dict) > 0 else 0

                # Calculate the difference from optimal entropy
                difference_from_optimal = optimal_entropy - entropy

                # Store the difference for this provider
                DPF_list.append(difference_from_optimal)

                # Reset the item count to its original state
                provider_dict[item_id] -= 1

    return DPF_list

In [None]:

# New Class created by Reza Shafiloo
# Represents a decision variable for selecting items in the optimization process.
# Initializes with a default value of 0, indicating that the item is not selected.
    
class DecisionVariable:
    def __init__(self):
        self.x = 0  # Initialize the decision variable as 0 (not selected)

In [None]:

# New function created by Reza Shafiloo
# Optimizes the recommendation process based on the specified fairness criteria ('N', 'I', 'P', 'C', 'CI', 'CP', 'IP', 'CIP'). It adjusts item scores using different fairness measures, selects the top-k items for each user,and updates user, group, and provider metrics accordingly. The function returns the decision variables and item group metrics.
    
def CIP_Recommendation_optimizer(fairness='N', cepsilon=0.000005, iepsilon=0.0000005, pepsilon=0.0000005):
    recommendations = []
    V1 = set(range(total_users))
    V3 = set(range(no_user_groups))  # User groups
    V4 = set(range(no_item_groups))  # Item groups

    group_dcg = {k: 0 for k in V3}
    group_ndcg = {k: 0 for k in V3}
    group_precision = {k: 0 for k in V3}
    group_recall = {k: 0 for k in V3}

    item_group = {k: 0 for k in V4}
    
    W = {uid: {j: DecisionVariable() for j in range(len(P[uid]))} for uid in range(total_users)}


    for uid in tqdm(range(total_users), leave=False):

        V2 = set(range(len(P[uid])))  # Top items for the user 'uid'
        # W = {j: 0 for j in V2}  # Binary decision variable for each item
        DIF = calculate_deviation_from_item_fairness(P[uid], providers)
        DPF = calculate_deviation_from_provider_fairness(P[uid], providers)

        # Set the objective function to maximize the sum of scores of selected items
        scores = {P[uid][j]: S[uid][j] for j in V2}
        if fairness == 'N':
            adjusted_scores = {P[uid][j]: S[uid][j] for j in V2}
        elif fairness == 'I':
            adjusted_scores = {P[uid][j]: S[uid][j] - iepsilon * DIF[j] for j in V2}
        elif fairness == 'P':
            adjusted_scores = {P[uid][j]: S[uid][j] - pepsilon * DPF[j] for j in V2}
        elif fairness == 'C':
            adjusted_scores = {P[uid][j]: S[uid][j] - cepsilon * (
                    (group_ndcg[1] + Ahelp[uid][j] * U[uid][1]) - (group_ndcg[0] + Ahelp[uid][j] * U[uid][0])) for j
                               in V2}
        elif fairness == 'CI':
            adjusted_scores = {P[uid][j]: S[uid][j] - cepsilon * (
                    (group_ndcg[1] + Ahelp[uid][j] * U[uid][1]) - (group_ndcg[0] + Ahelp[uid][j] * U[uid][
                0])) - iepsilon * DIF[j] for j in V2}
        elif fairness == 'CP':
            adjusted_scores = {P[uid][j]: S[uid][j] - cepsilon * (
                    (group_ndcg[1] + Ahelp[uid][j] * U[uid][1]) - (group_ndcg[0] + Ahelp[uid][j] * U[uid][
                0])) - pepsilon * DPF[j] for j in V2}
        elif fairness == 'IP':
            adjusted_scores = {P[uid][j]: S[uid][j] - iepsilon * DIF[j] - pepsilon * DPF[j] for j in V2}
        elif fairness == 'CIP':
            adjusted_scores = {P[uid][j]: S[uid][j] - iepsilon * DIF[j] - pepsilon * DPF[j] - cepsilon * (
                    (group_ndcg[1] + Ahelp[uid][j] * U[uid][1]) - (group_ndcg[0] + Ahelp[uid][j] * U[uid][0])) for j
                               in V2}

        # Select the top-k items based on scores
        selected_items = sorted(adjusted_scores, key=adjusted_scores.get, reverse=True)[:min(10, len(P[uid]))]
        for j in range(len(P[uid])):
            if P[uid][j] in selected_items:
                W[uid][j].x = 1
        # decision_vars = [W[uid][j].x for j in W[uid]]
        k = 10
        # Update user metrics and group metrics
        user_dcg = sum(W[uid][j].x * Ahelp[uid][j] for j in V2)
        user_precision = sum(W[uid][j].x * Ahelp[uid][j] for j in V2) / k
        user_recall = sum(W[uid][j].x * (j in train_checkins[uid]) for j in V2) / len(train_checkins[uid])

        # Update group metrics
        for k in V3:
            group_ndcg[k] += user_dcg * U[uid][k]
            group_precision[k] += user_precision * U[uid][k]
            group_recall[k] += user_recall * U[uid][k]

        for k in V4:
            item_group[k] += sum(W[uid][j].x * Ihelp[uid][j][k] for j in V2)

        recommendations.append(selected_items)
        update_provider_inventory(selected_items)
    return W, item_group 


In [None]:
# New function created by Reza Shafiloo
# This function imulates the update of provider inventories based on the recommendation lists generated.
#This function works on a deep copy of the original provider inventory (PI) to ensure 
#the original data remains unchanged. It returns both the original and updated inventories.


def simulate_inventory_update(W, PI):
    # Work on a deep copy of the providers inventory to ensure the original remains unchanged
    temp_PI = copy.deepcopy(PI)

    updated_inventories = []  # List to store copies of updated dictionaries
    RecLists = []  # List to store final recommendation lists
    
    # Generate recommendation lists based on provided data
    for uid in tqdm(range(eval_method.total_users)):
        if uid in ground_truth.keys():
            filtered_item_df = itemMeta[itemMeta['iid'].isin(ground_truth[uid])]
            mask = [int(var) for var in W[uid]]
            
            # Create the sublist using the converted mask
            sublist = [item for item, m in zip(ground_truth[uid], mask) if m == 1]
            filtered_item_df = itemMeta[itemMeta['iid'].isin(sublist)]
            RecLists.append(filtered_item_df['iid'].tolist())
    
    IID_counts = {}
    for recommendation in RecLists:
        for IID in recommendation:
            if IID in IID_counts:
                IID_counts[IID] += 1
            else:
                IID_counts[IID] = 1
    print("IID_counts:",IID_counts)
    # Update the copy of the provider inventory with the counts
    for i, provider_entry in enumerate(temp_PI):
        try:
            if isinstance(provider_entry, str):
                provider_dict = ast.literal_eval(provider_entry)
            else:
                provider_dict = provider_entry

            if not isinstance(provider_dict, dict):
                raise ValueError("Provider inventory entry is not a dictionary or convertible string!")

            for IID in list(provider_dict.keys()):
                if IID in IID_counts:
                    provider_dict[IID] = provider_dict.get(IID, 0) + IID_counts[IID]

            # Store the updated dictionary back into the temporary inventory
            temp_PI[i] = provider_dict

        except Exception as e:
            print(f"Error processing inventory entry at index {i}: {e}")

    # Return the unchanged original inventory and the updated copy
    return PI, temp_PI

In [None]:
# New function created by Reza Shafiloo
# Calculates the Gini coefficients for each provider's inventory to measure inequality.
# The function also calculates the overall Gini coefficient for the total inventory across all providers
#and returns the mean Gini coefficient along with individual Gini values.

def calculate_gini_coefficients(inventories):
    def gini(array):
        """Calculate the Gini coefficient of a numpy array."""
        from numpy import sort, mean, arange

        # All values are treated equally, arrays must be 1d:
        array = array.flatten()
        if any(array < 0):
            array = array[array >= 0]  # Remove negative values for calculation
        if len(array) == 0:
            return 0  # Return 0 if array is empty
        array = sort(array)
        index = arange(1, array.shape[0] + 1)
        n = array.shape[0]
        return ((2 * index - n - 1) * array).sum() / (n * array.sum())

    gini_indices = []

    # Calculate Gini index for each provider's inventory
    for inventory in inventories:
        if isinstance(inventory, dict):
            values = list(inventory.values())
            gini_index = gini(np.array(values))
            gini_indices.append(gini_index)
        else:
            print("Inventory data is not a dictionary. Skipping...")
    
    all_inventories = []

    # Aggregate all inventory values into one list
    for inventory in inventories:
        if isinstance(inventory, dict):
            sumValue = sum(list(inventory.values()))
            all_inventories.append(sumValue)  # Extend the list of all inventories
        else:
            print("Inventory data is not a dictionary. Skipping...")

    # Calculate Gini index for the total inventory across all providers
    total_gini_index = gini(np.array(all_inventories))

    # Calculate and return the mean of all Gini indices
    mean_gini_index = np.mean(gini_indices) if gini_indices else 0
    return total_gini_index,mean_gini_index, gini_indices

## Run

In [None]:
# Modified version of a function from rahmanidashti/CPFairRecSys: [Official Codes] 
# CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# by Hossein A. Rahmani.
# Modifications: In this function, we have added evaluation of the Gini index for provider and item inventories, 
# and these evaluations are included in the final results.
def write_results():
    gini_provider, mean_gini_inventory, individual_ginis = calculate_gini_coefficients(providers)
    print("mean_gini ",fair_mode," provider eps ", provider_eps," item eps ", item_eps," user eps ", user_eps,':', 
          " mean_gini_inventory: ",mean_gini_inventory," gini_provider: ",gini_provider)
    ndcg_ac, pre_ac, rec_ac, novelty_ac, coverage_ac = metric_per_group(group=active_user_ids, W=W)
    ndcg_iac, pre_iac, rec_iac, novelty_iac, coverage_iac = metric_per_group(group=inactive_user_ids, W=W)
    ndcg_all, pre_all, rec_all, novelty_all, coverage_all = metric_on_all(W=W)
    # print("item_group", item_group[0])
    if fair_mode == 'N':
        results.write(
            f"{dataset},{model.name},{u_group}%,{i_group}%,{fair_mode},-,-,-,{gini_provider},{mean_gini_inventory},{ndcg_all},{ndcg_ac},{ndcg_iac},{pre_all},{pre_ac},{pre_iac},{rec_all},{rec_ac},{rec_iac},{novelty_all},{novelty_ac},{novelty_iac},{coverage_all},{coverage_ac},{coverage_iac},{item_group[0]},{item_group[1]},{eval_method.total_users * 10}=={item_group[0] + item_group[1]}")
    elif fair_mode == 'C':
        results.write(
            f"{dataset},{model.name},{u_group}%,{i_group}%,{fair_mode},{format(user_eps, '.7f')},-,-,{gini_provider},{mean_gini_inventory},{ndcg_all},{ndcg_ac},{ndcg_iac},{pre_all},{pre_ac},{pre_iac},{rec_all},{rec_ac},{rec_iac},{novelty_all},{novelty_ac},{novelty_iac},{coverage_all},{coverage_ac},{coverage_iac},{item_group[0]},{item_group[1]},{eval_method.total_users * 10}=={item_group[0] + item_group[1]}")
    elif fair_mode == 'I':
        results.write(
            f"{dataset},{model.name},{u_group}%,{i_group}%,{fair_mode},-,{format(item_eps, '.7f')},-,{gini_provider},{mean_gini_inventory},{ndcg_all},{ndcg_ac},{ndcg_iac},{pre_all},{pre_ac},{pre_iac},{rec_all},{rec_ac},{rec_iac},{novelty_all},{novelty_ac},{novelty_iac},{coverage_all},{coverage_ac},{coverage_iac},{item_group[0]},{item_group[1]},{eval_method.total_users * 10}=={item_group[0] + item_group[1]}")
    elif fair_mode == 'P':
        results.write(
            f"{dataset},{model.name},{u_group}%,{i_group}%,{fair_mode},-,-,{format(provider_eps, '.7f')},{gini_provider},{mean_gini_inventory},{ndcg_all},{ndcg_ac},{ndcg_iac},{pre_all},{pre_ac},{pre_iac},{rec_all},{rec_ac},{rec_iac},{novelty_all},{novelty_ac},{novelty_iac},{coverage_all},{coverage_ac},{coverage_iac},{item_group[0]},{item_group[1]},{eval_method.total_users * 10}=={item_group[0] + item_group[1]}")
    elif fair_mode == 'CI':
        results.write(
            f"{dataset},{model.name},{u_group}%,{i_group}%,{fair_mode},{format(user_eps, '.7f')},{format(item_eps, '.7f')},-,{gini_provider},{mean_gini_inventory},{ndcg_all},{ndcg_ac},{ndcg_iac},{pre_all},{pre_ac},{pre_iac},{rec_all},{rec_ac},{rec_iac},{novelty_all},{novelty_ac},{novelty_iac},{coverage_all},{coverage_ac},{coverage_iac},{item_group[0]},{item_group[1]},{eval_method.total_users * 10}=={item_group[0] + item_group[1]}")
    elif fair_mode == 'CP':
        results.write(
            f"{dataset},{model.name},{u_group}%,{i_group}%,{fair_mode},{format(user_eps, '.7f')},-,{format(provider_eps, '.7f')},{gini_provider},{mean_gini_inventory},{ndcg_all},{ndcg_ac},{ndcg_iac},{pre_all},{pre_ac},{pre_iac},{rec_all},{rec_ac},{rec_iac},{novelty_all},{novelty_ac},{novelty_iac},{coverage_all},{coverage_ac},{coverage_iac},{item_group[0]},{item_group[1]},{eval_method.total_users * 10}=={item_group[0] + item_group[1]}")
    elif fair_mode == 'IP':
        results.write(
            f"{dataset},{model.name},{u_group}%,{i_group}%,{fair_mode},-,{format(item_eps, '.7f')},{format(provider_eps, '.7f')},{gini_provider},{mean_gini_inventory},{ndcg_all},{ndcg_ac},{ndcg_iac},{pre_all},{pre_ac},{pre_iac},{rec_all},{rec_ac},{rec_iac},{novelty_all},{novelty_ac},{novelty_iac},{coverage_all},{coverage_ac},{coverage_iac},{item_group[0]},{item_group[1]},{eval_method.total_users * 10}=={item_group[0] + item_group[1]}")
    elif fair_mode == 'CIP':
        results.write(
            f"{dataset},{model.name},{u_group}%,{i_group}%,{fair_mode},{format(user_eps, '.7f')},{format(item_eps, '.7f')},{format(provider_eps, '.7f')},{gini_provider},{mean_gini_inventory},{ndcg_all},{ndcg_ac},{ndcg_iac},{pre_all},{pre_ac},{pre_iac},{rec_all},{rec_ac},{rec_iac},{novelty_all},{novelty_ac},{novelty_iac},{coverage_all},{coverage_ac},{coverage_iac},{item_group[0]},{item_group[1]},{eval_method.total_users * 10}=={item_group[0] + item_group[1]}")
    results.write('\n')

In [None]:

# New function created by Reza Shafiloo
# Function to add an internal item index (iid) to the item metadata

def addItemIndex():
    itemMeta["iid"] = 0
    # print("iid_map:",eval_method.train_set.iid_map)
    for index, row in itemMeta.iterrows():
        originalId = row["IID"]
        itemMeta.at[index, "iid"] = eval_method.train_set.iid_map.get(str(originalId), 0)
    return 0

In [None]:

# New function created by Reza Shafiloo
# Function to add a new column with item IDs converted to internal IDs (iids) for provider inventories

def add_iid_column():

    # Create a dictionary from real world Id to iid for quick lookup
    RWID_to_iid = pd.Series(itemMeta['iid'].values, index=itemMeta['IID']).to_dict()

    # Function to convert real world Id dictionary to iid-based dictionary
    def item_to_iid_converter(item_dict):
        iid_dict = {}
        for RWID, count in item_dict.items():
            # Convert real world Id to iid using the lookup dictionary
            iid = RWID_to_iid.get(RWID)
            if iid is not None:
                iid_dict[iid] = count
            else:
                print(f"No iid found for RWID: {RWID}")
        return iid_dict

    # Convert the 'itemsAndRatings' column from string to dictionary if necessary and apply conversion
    providers_inventory['itemsAndRatings_iid'] = providers_inventory['itemsAndRatings'].apply(
        lambda x: item_to_iid_converter(ast.literal_eval(x) if isinstance(x, str) else x)
    )



In [None]:
# Modified version of a function from rahmanidashti/CPFairRecSys: [Official Codes] 
# CPFair: Personalized Consumer and Producer Fairness Re-ranking for Recommender Systems (SIGIR2022) (github.com) 
# by Hossein A. Rahmani.
# Modifications: 
# 1. Added a function for integrating providers' inventory into the provider dataset.
# 2. Modified the optimizer function to accept modes: ['N', 'C', 'I', 'P', 'CI', 'CP', 'IP', 'CIP'].
# 3. Updated providers' inventories after each run for Gini index calculation and then reset inventories 
#    to their initial state before running the algorithm in different modes to evaluate their impact on provider inventories.

# 1: Iterate over the datasets
for dataset in ds_names:
    print(f"Datasets: {dataset}")
    # read train, tune, test datasets
    train_data, tune_data, test_data = read_data(dataset=dataset)
    # load data into Cornac and create eval_method
    eval_method = load_data(train_data=train_data, test_data=test_data)
    total_users = eval_method.total_users
    total_items = eval_method.total_items
    # load train_checkins and pop_items dictionary
    train_checkins, pop_items = read_train_data(train_file=f"datasets/{dataset}/{dataset}_train.txt")
    # load ground truth dict
    ground_truth = read_ground_truth(test_file=f"datasets/{dataset}/{dataset}_test.txt")
    # run Cornac models and create experiment object including models' results
    exp = run_model(eval_method=eval_method)
    # Integrate inventory into the provider dataset
    addItemIndex()            
    add_iid_column()
    Train_List_providers=providers_inventory["itemsAndRatings_iid"].tolist()
    providers=copy.deepcopy(Train_List_providers)
    # 4: read user groups
    for u_group in ds_users:
        # read matrix U for users and their groups
        U = np.zeros((total_users, no_user_groups))
        # load active and inactive users
        active_user_ids = read_user_groups(user_group_fpath=f"user_groups/{dataset}/{u_group}/active_ids.txt", gid=0)
        inactive_user_ids = read_user_groups(user_group_fpath=f"user_groups/{dataset}/{u_group}/inactive_ids.txt",
                                             gid=1)
        print(
            f"ActiveU: {len(active_user_ids)}, InActive: {len(inactive_user_ids)}, All: {len(active_user_ids) + len(inactive_user_ids)}")
        len_sizes = [len(active_user_ids), len(inactive_user_ids)]
        # 5: read item groups
        for i_group in ds_items:
            # read matrix I for items and their groups
            I = np.zeros((total_items, no_item_groups))
            # read item groups
            shorthead_item_ids = read_item_groups(
                item_group_fpath=f"item_groups/{dataset}/{i_group}/shorthead_items.txt", gid=0)
            longtail_item_ids = read_item_groups(item_group_fpath=f"item_groups/{dataset}/{i_group}/longtail_items.txt",
                                                 gid=1)
            print(
                f"No. of Shorthead Items: {len(shorthead_item_ids)} and No. of Longtaill Items: {len(longtail_item_ids)}")
            # 2: iterate over the models
            for model in exp.models:
                results = open(f"CIP_results_{dataset}_{model.name}_Dynamic.csv", 'w')
                results.write(
                    "Dataset,Model,GUser,GItem,Type,User_EPS,Item_EPS,Provider_EPS,gini_provider,gini_inventory,ndcg_ALL,ndcg_ACT,ndcg_INACT,Pre_ALL,Pre_ACT,Pre_INACT,Rec_ALL,Rec_ACT,Rec_INACT,Nov_ALL,Nov_ACT,Nov_INACT,Cov_ALL,Cov_ACT,Cov_INACT,Short_Items,Long_Items,All_Items\n")
                print(f"> Model: {model.name}")
                # load matrix S and P
                S, P = load_ranking_matrices(model=model, total_users=total_users, total_items=total_items,
                                                         topk=topk)
                
                # load matrix Ahelp
                Ahelp = load_ground_truth_index(total_users=total_users, topk=topk, P=P, train_checkins=train_checkins)
                # load matrix Ihelp
                Ihelp = read_item_index(total_users=total_users, topk=50, no_item_groups=no_item_groups)
                print("Matrix Ihelp:", Ihelp)
                Coeff_list = [0.005, 0.05,0.09, 0.5]
                # iterate on fairness mode: Consumers, items, and providers
                for fair_mode in ['N', 'C', 'I', 'P', 'CI', 'CP', 'IP', 'CIP']:
                    if fair_mode == 'N':
                        print("N")
                        provider_eps,item_eps,user_eps=0,0,0
                        W, item_group = CIP_Recommendation_optimizer(fairness=fair_mode)
                        write_results()
                        addItemIndex()
                        providers=providers_inventory["itemsAndRatings_iid"].tolist()
                    elif fair_mode == 'C':
                        for user_eps in Coeff_list:
                            W, item_group = CIP_Recommendation_optimizer(fairness=fair_mode, cepsilon=user_eps)
                            write_results()
                            add_iid_column()
                            providers=providers_inventory["itemsAndRatings_iid"].tolist()
                    elif fair_mode == 'I':
                        for item_eps in Coeff_list:
                            W, item_group = CIP_Recommendation_optimizer(fairness=fair_mode, iepsilon=item_eps)
                            write_results()
                            add_iid_column()
                            providers=providers_inventory["itemsAndRatings_iid"].tolist()
                    elif fair_mode == 'P':
                        for provider_eps in Coeff_list:
                            W, item_group = CIP_Recommendation_optimizer(fairness=fair_mode, pepsilon=provider_eps)
                            write_results()
                            add_iid_column()
                            providers=providers_inventory["itemsAndRatings_iid"].tolist()
                    elif fair_mode == 'CI':
                        for user_eps in Coeff_list:
                            for item_eps in Coeff_list:
                                W, item_group = CIP_Recommendation_optimizer(fairness=fair_mode, cepsilon=user_eps,
                                                                          iepsilon=item_eps)
                                write_results()
                                add_iid_column()
                                providers=providers_inventory["itemsAndRatings_iid"].tolist()
                    elif fair_mode == 'CP':
                        for user_eps in Coeff_list:
                            for provider_eps in Coeff_list:
                                W, item_group = CIP_Recommendation_optimizer(fairness=fair_mode, cepsilon=user_eps,
                                                                          pepsilon=provider_eps)
                                write_results()
                                add_iid_column()
                                providers=providers_inventory["itemsAndRatings_iid"].tolist()
                    elif fair_mode == 'IP':
                        for item_eps in Coeff_list:
                            for provider_eps in Coeff_list:
                                W, item_group = CIP_Recommendation_optimizer(fairness=fair_mode, iepsilon=item_eps,
                                                                          pepsilon=provider_eps)
                                write_results()
                                add_iid_column()
                                providers=providers_inventory["itemsAndRatings_iid"].tolist()
                    elif fair_mode == 'CIP':
                        for user_eps in Coeff_list:
                            for item_eps in Coeff_list:
                                for provider_eps in Coeff_list:
                                    W, item_group = CIP_Recommendation_optimizer(fairness=fair_mode, cepsilon=user_eps,
                                                                              iepsilon=item_eps, pepsilon=provider_eps)
                                    write_results()
                                    add_iid_column()
                                    providers=providers_inventory["itemsAndRatings_iid"].tolist()
                results.close()