# Fairness Metrics

In [2]:
import numpy as np
from scipy.sparse import csr_matrix, load_npz

import sys
import os
import pandas as pd

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"/python")


from dm_function_lib import read_table, get_ratings_df
from fairness_evaluation import GCE

In [3]:
TYPES_ORDER = {"minority": 0, "nimby": 1, "no_type": 2}

MODEL_ORDER = {"rand":0, "pop": 1, "pop_nc":2,
               "ib": 3, "ub":4, "mf": 5, "bpr": 6,
               "cb_cat": 7, "cb_top": 8, "cb_loc":9,
               "cbib_cat":10, "cbib_top":11, "cbib_loc":12,
               "cbub_cat":13, "cbub_top":14, "cbub_loc":15}

In [4]:
def create_dataframes(c, types):
    proposal_cnv = pd.read_csv(f'../../data/rm/{c}/rm_{c}_proposalId_cnv.csv', sep=';')
    types_prop = pd.concat([types[types.proposalId.isin(proposal_cnv.proposalId)],
                            pd.DataFrame(data={'proposalId':
                                               proposal_cnv[~proposal_cnv['proposalId']\
                                                            .isin(types['proposalId'])].proposalId,
                                               'type': ['no_type']*proposal_cnv[~proposal_cnv['proposalId']\
                                                                                .isin(types['proposalId'])].shape[0],
                                               'ranking':[0]*proposal_cnv[~proposal_cnv['proposalId']\
                                                                       .isin(types['proposalId'])].shape[0]})
                           ]).sort_values('proposalId').reset_index(drop=True)
    
    # Delete duplicates
    types_prop = \
        types_prop.loc[types_prop.groupby('proposalId').ranking.idxmin().reset_index().set_index('ranking').index]\
            .reset_index(drop=True)
    
    df_test_types_prop = get_ratings_df(load_npz(f'../../data/rm/{c}/rm_{c}_test.npz'),
                                        rm_info={'userId_cnv':
                                                 pd.read_csv(f'../../data/rm/{c}/rm_{c}_userId_cnv.csv', sep=';'),
                                                 'proposalId_cnv': proposal_cnv})\
    .merge(types_prop, how='left', on='proposalId')
    
    df_train_types_prop = get_ratings_df(load_npz(f'../../data/rm/{c}/rm_{c}_train.npz'),
                                         rm_info={'userId_cnv':
                                                  pd.read_csv(f'../../data/rm/{c}/rm_{c}_userId_cnv.csv', sep=';'),
                                                  'proposalId_cnv': proposal_cnv})\
        .merge(types_prop, how='left', on='proposalId')
    
    return proposal_cnv, types_prop, df_test_types_prop, df_train_types_prop

In [5]:
def fairness_results(c, proposal_cnv, types_prop, df_test_types_prop):
    model_list = os.listdir(f'../../data/recommendations/')
    model_list.remove('.DS_Store')
    model_list.remove('model_history.csv')

    gce_results = []
    pm_t = []
    for m in model_list:
        
        m_name = m.split('.')[0].split('rec_')[1]
        df_rec_m = pd.read_csv(f'../../data/recommendations/{m}', sep=';')
        df_rec_m_types_prop = df_rec_m.merge(types_prop, how='left', on='proposalId')

        types_prop_m = types_prop[(types_prop.proposalId.isin(proposal_cnv.proposalId)) & \
                              (types_prop.proposalId.isin(df_rec_m.proposalId))].reset_index(drop=True)

        types_prop_m_values= list(types_prop_m['type'].unique())
        types_prop_m_values.sort(key=lambda val: TYPES_ORDER[val])

        types_prop_m_name = 'type'
        
        # Uniform
        p0 = dict(zip(types_prop_m_values, [1 / len(types_prop_m_values) for i in range(0, len(types_prop_m_values))]))
        # each = num_likes_test_cat/num_likes_test
        d =  df_test_types_prop.groupby('type').count()['proposalId'].to_dict()
        d = [d[k] for k in sorted(TYPES_ORDER, key=TYPES_ORDER.get)]
        p1 = dict(zip(types_prop_m_values, [v/np.sum(d) for v in d]))
        # minority = 0.5, rest = 0.1
        p2 = dict(zip(types_prop_m_values, [0.8, 0.1, 0.1]))
        # nimby = 0.5, rest = 0.1
        p3 = dict(zip(types_prop_m_values, [0.1, 0.8, 0.1]))
        # no_type = 0.1, rest = 0.9/num_rest
        p7 = dict(zip(types_prop_m_values, [0.9 / (len(types_prop_m_values)-1) 
                                          for i in range(0, len(types_prop_m_values)-1)]+[0.1]))
        
        pf = {'p_uniform':p0, 'p_test':p1, 'p_minority':p2, 'p_nimby':p3, 'p_min_nimby':p7}

        gce_df = pd.DataFrame(index=[m_name])
        
        for n, p in zip(pf.keys(), pf.values()):

            gce_df[n], pm = GCE(proposalIds=proposal_cnv.proposalId.unique(),
                            df_rec_attributes=df_rec_m_types_prop, 
                            df_test_attributes=df_test_types_prop,
                            proposal_attributes=types_prop_m,
                            p_f=p,
                            fun='ndcg', beta=2, h=0.95, pc=0.0001)
        pm_t.append(pd.DataFrame(data=pm, index=[f'pm_{m_name}']))
        gce_results.append(gce_df)

    gce_results = pd.concat(gce_results)
    pm_t = pd.concat(pm_t)
    pm_t = pm_t[TYPES_ORDER.keys()]
    return gce_results.loc[[s for s in MODEL_ORDER.keys()]], pm_t.loc[[f'pm_{s}' for s in MODEL_ORDER.keys()]], pf

In [6]:
types = pd.read_excel('../../data/group_proposals4[3].xlsx', decimal=',', )[['id', 'type', 'ranking']]\
    .rename(columns={'id':'proposalId'})

# GCE for NYMBI, Minority y NO_TYPE

In [7]:
c = 'c1'

In [8]:
proposal_cnv, types_prop, df_test_types, df_train_types = create_dataframes(c, types)

In [9]:
types_prop

Unnamed: 0,proposalId,type,ranking
0,1,no_type,0
1,2,no_type,0
2,3,no_type,0
3,4,no_type,0
4,5,no_type,0
...,...,...,...
9672,13517,no_type,0
9673,13521,no_type,0
9674,13522,no_type,0
9675,13525,no_type,0


In [10]:
num_group = types_prop.groupby('type').count()
num_group

Unnamed: 0_level_0,proposalId,ranking
type,Unnamed: 1_level_1,Unnamed: 2_level_1
minority,776,776
nimby,639,639
no_type,8262,8262


In [11]:
gce_results, pm, pf = fairness_results(c, proposal_cnv, types_prop, df_test_types)

In [12]:
gce_results['nDCG'] = pd.read_csv('../../data/result_metrics/ranking_50.csv',index_col=0)[['nDCG']]
gce_results = gce_results[['nDCG', 'p_uniform', 'p_test', 'p_minority', 'p_nimby', 'p_min_nimby']]
gce_results.to_csv(f'../../data/result_metrics/gce_50.csv')

In [13]:
gce_results_styled = gce_results.style.background_gradient().format('{:.3f}')
gce_results_styled

Unnamed: 0,nDCG,p_uniform,p_test,p_minority,p_nimby,p_min_nimby
rand,0.001,-1.088,-0.012,-3.536,-4.382,-2.281
pop,0.06,-1.369,-0.014,-7.611,-1.939,-2.789
pop_nc,0.049,-0.786,-0.029,-4.898,-1.2,-1.714
ib,0.01,-0.892,-0.0,-4.238,-2.523,-1.92
ub,0.039,-0.831,-0.006,-4.526,-1.862,-1.804
mf,0.05,-0.928,-0.0,-4.226,-2.749,-1.987
bpr,0.02,-1.308,-0.028,-3.41,-5.804,-2.684
cb_cat,0.009,-1.258,-0.006,-5.11,-3.809,-2.592
cb_top,0.017,-1.19,-0.005,-4.776,-3.744,-2.468
cb_loc,0.007,-0.896,-0.001,-3.845,-2.94,-1.927


In [18]:
gce_results.style.background_gradient()

Unnamed: 0,nDCG,p_uniform,p_test,p_minority,p_nimby,p_min_nimby
rand,0.001448,-1.087611,-0.011501,-3.535717,-4.382354,-2.280533
pop,0.059993,-1.368958,-0.014193,-7.611395,-1.938612,-2.789027
pop_nc,0.048868,-0.785854,-0.028869,-4.897858,-1.199562,-1.713661
ib,0.010232,-0.89211,-0.000188,-4.23774,-2.523365,-1.920173
ub,0.039093,-0.830708,-0.00582,-4.526222,-1.862361,-1.804197
mf,0.050099,-0.928132,-3.4e-05,-4.226428,-2.748873,-1.986882
bpr,0.019554,-1.30836,-0.028434,-3.410485,-5.804112,-2.68442
cb_cat,0.00858,-1.25774,-0.006363,-5.110366,-3.808813,-2.592381
cb_top,0.016613,-1.189723,-0.005231,-4.775555,-3.743696,-2.467801
cb_loc,0.006525,-0.895553,-0.000707,-3.844722,-2.939884,-1.927474


In [19]:
pm.style.background_gradient(axis=None)

Unnamed: 0,minority,nimby,no_type
pm_rand,0.080918,0.066463,0.852619
pm_pop,0.039657,0.138742,0.821602
pm_pop_nc,0.059635,0.198884,0.741481
pm_ib,0.068293,0.108693,0.823014
pm_ub,0.064201,0.140462,0.795337
pm_mf,0.06851,0.10095,0.83054
pm_bpr,0.084051,0.051292,0.864658
pm_cb_cat,0.057775,0.075892,0.866332
pm_cb_top,0.061482,0.076987,0.861532
pm_cb_loc,0.07466,0.095043,0.830297


In [20]:
pf = pd.DataFrame(data=pf, index=TYPES_ORDER.keys()).T

In [21]:
pf.round(3)

Unnamed: 0,minority,nimby,no_type
p_uniform,0.333,0.333,0.333
p_test,0.067,0.103,0.83
p_minority,0.8,0.1,0.1
p_nimby,0.1,0.8,0.1
p_min_nimby,0.45,0.45,0.1
