# Simulated Experiments (Section 4.2)

First generate the dataset

Then define the new model class

Then generate the plots in the paper

In [1]:
import numpy as np
import math
import matplotlib.pyplot as plt
from matplotlib import colors, ticker, cm

from pprint import pprint
from progressbar import progressbar

%matplotlib inline

In [2]:
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'png')
plt.rcParams['savefig.dpi'] = 75

plt.rcParams['figure.autolayout'] = False
plt.rcParams['figure.figsize'] = 10, 6
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['axes.titlesize'] = 16
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 10

plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = "serif"
plt.rcParams['font.serif'] = "cm"
plt.rcParams['text.latex.preamble'] = r"\usepackage{subdepth}, \usepackage{type1cm}"

  set_matplotlib_formats('pdf', 'png')


In [2]:
num_queries = 100
cs_size = 10
def get_relevance_labels(xs):
#     return np.clip(np.floor(xs[:, 1]+xs[:,0]), 0.0, 4.0)
    return np.clip(xs[:,2] + xs[:,1], 0.0, 4.0)
    
feats = []
rels = []
for i in range(num_queries):
    groups = np.random.choice(2, size=cs_size, p=[0.8, 0.2])
    #x1s = [np.random.normal(1.0, 0.5) if groups[i]==0 else np.random.normal(2.0, 0.5) for i in range(cs_size)]
    x1s = 3*np.random.random(cs_size)
    x2s = 3*np.random.random(cs_size)
    labels = get_relevance_labels(np.vstack((groups, x1s, x2s)).transpose())
    x2observed = x2s
    x2observed[groups==1] = 0 #3*np.random.random(np.sum(groups==1))
    xs = np.vstack((groups, x1s, x2observed)).transpose()
    feats.append(xs)
    rels.append(labels)

In [3]:
print(type(feats))
print(type(rels))

<class 'list'>
<class 'list'>


In [4]:
len(feats)

100

In [5]:
feats[0].shape

(10, 3)

In [6]:
len(rels)

100

In [7]:
pwd

'/home/ramon/Fair-PGRank'

In [8]:
from train_yahoo_dataset import on_policy_training
from YahooDataReader import YahooDataReader
import torch
from models import NNModel, LinearModel
from evaluation import evaluate_model

In [9]:
dr = YahooDataReader(None)
dr.data = (feats, rels)
# vdr = YahooDataReader(None)
# vdr.data = (val_feats, val_rels)
vdr = dr

In [11]:
feats

[array([[1.        , 1.51111222, 0.        ],
        [0.        , 0.81271508, 2.5263497 ],
        [0.        , 1.25919992, 2.88967714],
        [0.        , 1.97996137, 0.2068889 ],
        [0.        , 2.47836018, 0.4095595 ],
        [0.        , 0.05292051, 2.77154677],
        [0.        , 2.18743358, 2.43094326],
        [1.        , 2.83643575, 0.        ],
        [0.        , 2.22714057, 1.46578957],
        [0.        , 1.36794714, 0.46297639]]),
 array([[0.        , 2.10246921, 2.47883948],
        [0.        , 0.44701749, 1.27723232],
        [0.        , 2.56028127, 2.17658657],
        [0.        , 2.21533978, 2.42154103],
        [1.        , 2.46251336, 0.        ],
        [1.        , 2.92091103, 0.        ],
        [0.        , 1.73624295, 2.82663186],
        [0.        , 0.64570789, 0.63558781],
        [0.        , 0.61490388, 0.31462883],
        [0.        , 2.55591427, 1.53476771]]),
 array([[0.        , 2.36524384, 2.33197044],
        [0.        , 0.0719163

In [7]:
#plt.grid()

feats_ = np.reshape(np.asarray(feats), (num_queries*cs_size, 3))
rels_ = np.asarray(rels).flatten()
#plt.scatter(feats_[:,1], feats_[:,2], c=rels_)
#plt.colorbar()

In [8]:
#plt.grid()

feats_ = np.reshape(np.asarray(feats), (num_queries*cs_size, 3))
rels_ = np.asarray(rels).flatten()
#plt.scatter(feats_[:,1], feats_[:,2], c=feats_[:,0])
#plt.colorbar()

In [11]:
class Namespace:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
args = Namespace(conditional_model=True, gpu_id=None, progressbar=True, evaluate_interval=250, input_dim=3, 
                 eval_rank_limit=1000,
                fairness_version="asym_disparity", entropy_regularizer=0.0, save_checkpoints=False, num_cores=1,
                pooling='concat_avg', dropout=0.0, hidden_layer=8, group_feat_id=0, summary_writing=False, 
                 group_fairness_version="asym_disparity",early_stopping=False, lr_scheduler=False, 
                 validation_deterministic=False, evalk=1000, reward_type="ndcg", baseline_type="value", 
                 use_baseline=True, entreg_decay=0.0, skip_zero_relevance=True, eval_temperature=1.0, optimizer="Adam",
                clamp=False)
torch.set_num_threads(args.num_cores)
args.group_feat_id = 0
args.progressbar = False 

args.lr = 0.1
args.lr_scheduler = True
args.weight_decay = 0.0
args.lr_decay = 0.5

In [12]:
from models import CustomLinearModel

In [11]:
def eval_params(w1, w2, vdr, det=False):
    model = CustomLinearModel(D=3, fix_weight_dim=0)
    model.w.weight.data = torch.tensor([[w1, w2]]).float()
    return evaluate_model(model, vdr, deterministic=det, group_fairness_evaluation=True, args=args, fairness_evaluation=True)

In [12]:
w1s = np.linspace(0, 5, num=20)
w2s = np.linspace(0, 5, num=20)
Z = np.zeros((20,20))
Z2 = np.zeros((20,20))
for i, w1 in enumerate(w1s):
    print("At {}/{} w1s".format(i, len(w1s)))
    for j, w2 in enumerate(w2s):
        res = eval_params(w1, w2, vdr)
        Z[i,j] = res["ndcg"]
        Z2[i,j] = res["avg_group_asym_disparity"]
Z = Z.transpose()
Z2 = Z2.transpose()
#plt.matshow(Z)
#plt.colorbar()
#plt.show()
#plt.matshow(Z2)
#plt.colorbar()

At 0/20 w1s


  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


At 1/20 w1s
At 2/20 w1s
At 3/20 w1s
At 4/20 w1s
At 5/20 w1s
At 6/20 w1s
At 7/20 w1s
At 8/20 w1s
At 9/20 w1s
At 10/20 w1s
At 11/20 w1s
At 12/20 w1s
At 13/20 w1s
At 14/20 w1s
At 15/20 w1s
At 16/20 w1s
At 17/20 w1s
At 18/20 w1s
At 19/20 w1s


In [13]:
Z2.max(), Z2.min()

(0.012199005238261113, 0.0007556589354849546)

In [14]:
args.input_dim

3

In [13]:
model_params_list = []
disparities = []
lambdas_list = [0, 1, 2, 3,5,7,8, 10, 15, 20, 25]
for lgroup in lambdas_list:
    torch.set_num_threads(args.num_cores)
    args.epochs = 5
    args.progressbar = False
    args.weight_decay = 0.0
    args.sample_size = 10
    args.pooling = False
    args.skip_zero_relevance = True
    args.validation_deterministic = False
    args.lambda_reward = 1.0
    args.lambda_ind_fairness = 0.0
    args.lambda_group_fairness = lgroup


    model = CustomLinearModel(D=args.input_dim, use_bias=False, fix_weight_dim=0)
    
    model = on_policy_training(dr, vdr, model, args=args)
    results = evaluate_model(model, vdr, fairness_evaluation=False, group_fairness_evaluation=True, 
                             deterministic=False, args=args, num_sample_per_query=20)
    print(results)
    model_params_list.append(model.w.weight.data.tolist()[0])
    print("Learnt model for lambda={} has model weights as {}".format(lgroup, model_params_list[-1]))
    disparities.append(results["avg_group_asym_disparity"])

Starting training with the following config
Learning rate 0.1, Weight decay 0.0, Sample size 10
Lambda_reward: 1.0, lambda_ind_fairness:0.0, lambda_group_fairness:0
Training....
Evaluating on validation set: iteration 0/100 of epoch 0
Epoch 0, Average Validation: NDCG: 0.8435767254625286, DCG 37.17944884721447, Average Rank 4.5, ERR 0.736007947230709
Training....
LR is set to 0.05
Evaluating on validation set: iteration 0/100 of epoch 1
Epoch 1, Average Validation: NDCG: 0.9738305878496969, DCG 42.68962099971477, Average Rank 4.5, ERR 0.9370229385785507
Training....
LR is set to 0.025
Evaluating on validation set: iteration 0/100 of epoch 2
Epoch 2, Average Validation: NDCG: 0.9748337444087831, DCG 42.73089862185146, Average Rank 4.5, ERR 0.9393134203302677
Training....
LR is set to 0.0125
Evaluating on validation set: iteration 0/100 of epoch 3
Epoch 3, Average Validation: NDCG: 0.9766810219824802, DCG 42.80405835549083, Average Rank 4.5, ERR 0.9424039897625272
Training....
LR is set 

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


{'ndcg': 0.9772550170727514, 'dcg': 42.82949177803192, 'avg_rank': 4.5, 'err': 0.942675888853739, 'avg_group_disparity': 0.0047821638248172395, 'avg_group_asym_disparity': 0.014362022638654638}
Learnt model for lambda=0 has model weights as [4.0142598152160645, 3.3697500228881836]
Starting training with the following config
Learning rate 0.1, Weight decay 0.0, Sample size 10
Lambda_reward: 1.0, lambda_ind_fairness:0.0, lambda_group_fairness:1
Training....
Evaluating on validation set: iteration 0/100 of epoch 0
Epoch 0, Average Validation: NDCG: 0.7780202072894028, DCG 34.323942736104435, Average Rank 4.5, ERR 0.6324374534528042
Average Validation Group Exposure disparity: 0.009092167972813172, Group Asymmetric disparity: 0.006902571716235128
Training....
LR is set to 0.05
Evaluating on validation set: iteration 0/100 of epoch 1
Epoch 1, Average Validation: NDCG: 0.9716085711515925, DCG 42.59681168125323, Average Rank 4.5, ERR 0.9338988997324462
Average Validation Group Exposure dispar

Training....
LR is set to 0.0125
Evaluating on validation set: iteration 0/100 of epoch 3
Epoch 3, Average Validation: NDCG: 0.9607075136026018, DCG 42.13863451423673, Average Rank 4.5, ERR 0.9172179661293192
Average Validation Group Exposure disparity: 0.003312455740241509, Group Asymmetric disparity: 0.01142644860138021
Training....
LR is set to 0.00625
Evaluating on validation set: iteration 0/100 of epoch 4
Epoch 4, Average Validation: NDCG: 0.9602589772873487, DCG 42.123212636468764, Average Rank 4.5, ERR 0.9160301565518114
Average Validation Group Exposure disparity: 0.0032863587775139123, Group Asymmetric disparity: 0.010826891360324671
Epoch 4, Average Validation: NDCG: 0.9579872154419131, DCG 42.03264176080643, Average Rank 4.5, ERR 0.9114784499024776
Average Validation Group Exposure disparity: 0.0031486744184645297, Group Asymmetric disparity: 0.010106509778895635
{'ndcg': 0.9595248407611788, 'dcg': 42.09164893721127, 'avg_rank': 4.5, 'err': 0.913787031569706, 'avg_group_dis

KeyboardInterrupt: 

In [15]:
#scatter plot
for i, model_weights in enumerate(model_params_list):
    plt.scatter(model_weights[0], model_weights[1], label=lambdas_list[i])
    plt.annotate('{}'.format(lambdas_list[i]), (model_weights[0], model_weights[1]))

RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full report generated by latex:
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(/home/ramon/.cache/matplotlib/tex.cache/099208f0b5df9c85455e5e5e9ff93d99.tex
LaTeX2e <2020-02-02> patch level 2
L3 programming layer <2020-02-14>
(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls
Document Class: article 2019/12/20 v1.4l Standard LaTeX document class
(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo))
(/usr/share/texlive/texmf-dist/tex/latex/type1cm/type1cm.sty)

! LaTeX Error: File `type1ec.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on 099208f0b5df9c85455e5e5e9ff93d99.log.




RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full report generated by latex:
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(/home/ramon/.cache/matplotlib/tex.cache/099208f0b5df9c85455e5e5e9ff93d99.tex
LaTeX2e <2020-02-02> patch level 2
L3 programming layer <2020-02-14>
(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls
Document Class: article 2019/12/20 v1.4l Standard LaTeX document class
(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo))
(/usr/share/texlive/texmf-dist/tex/latex/type1cm/type1cm.sty)

! LaTeX Error: File `type1ec.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on 099208f0b5df9c85455e5e5e9ff93d99.log.




<Figure size 720x432 with 1 Axes>

In [16]:
model_params_list_updated = np.array(model_params_list)[np.array([0,1,3,4,6,7])]
lambdas_list_updated = np.array(lambdas_list)[np.array([0,1,3,4,6,7])]
pprint(list(zip(lambdas_list_updated, model_params_list_updated)))

[(0, array([3.88123703, 3.158705  ])),
 (1, array([4.01937914, 2.92454553])),
 (3, array([3.53417754, 1.84737372])),
 (5, array([3.56596613, 1.47670901])),
 (8, array([2.63665271, 1.20407641])),
 (10, array([2.09977102, 0.66162068]))]


In [17]:
fig= plt.figure(figsize=(7, 4))
ax1 = fig.add_subplot(121)
im = ax1.contourf(w2s, w1s, Z, cmap=cm.PuBu)
ax1.xaxis.set_label_position('bottom')
ax1.yaxis.set_label_position('left')
ax1.tick_params(labelbottom=True, labeltop=False, labelleft=True , labelright=False )
ax1.set_xlabel(r'$\theta_1$', fontsize=16)
ax1.set_ylabel(r'$\theta_2$', fontsize=16)
ax1.set_title('(a) NDCG', y=-0.35)
cbaxes = fig.add_axes([0.02, 0.20, 0.02, 0.6])
fig.colorbar(im, ax=ax1, cax=cbaxes)
cbaxes.yaxis.set_label_position('left')
cbaxes.yaxis.set_ticks_position('left')
ax1.axis('equal')

ax2 = fig.add_subplot(122)
from matplotlib import colors
im = ax2.contourf(w2s, w1s, Z2, cmap=cm.Reds)
ax2.xaxis.set_label_position('bottom')
ax2.yaxis.set_label_position('left')
ax2.tick_params(labelbottom=True, labeltop=False, labelleft=True , labelright=False )
ax2.set_xlabel(r'$\theta_1$', fontsize=16)
ax2.set_ylabel(r'$\theta_2$', fontsize=16)
ax2.set_title('(b) Disparity', y=-0.35)
cbaxes = fig.add_axes([0.92, 0.20, 0.02, 0.6])

ax2.axis('equal')

fig.colorbar(im, ax=ax2, cax=cbaxes)
cbaxes.yaxis.set_label_position('right')
cbaxes.yaxis.set_ticks_position('right')

ax1.scatter(model_params_list_updated[:,0], model_params_list_updated[:,1], marker='+', color='k')
ax2.scatter(model_params_list_updated[:,0], model_params_list_updated[:,1], marker='+', color='k')
texts = []
texts2 = []

ax1.plot([0, 5], [0, 5], ls="--", c=".1", alpha=0.2)
ax2.plot([0, 5], [0, 5], ls="--", c=".1", alpha=0.2)
    
for i in range(len(lambdas_list_updated)):
    texts.append(ax1.text(model_params_list_updated[i,0], model_params_list_updated[i,1], 
                          r"$\lambda$="+str(lambdas_list_updated[i]), fontsize=16))
    texts2.append(ax2.text(model_params_list_updated[i,0], model_params_list_updated[i,1], 
                           r"$\lambda$="+str(lambdas_list_updated[i]), fontsize=16))
from adjustText import adjust_text
adjust_text(texts, ax=ax1)#, x=w1s, y=w2s)
adjust_text(texts2, ax=ax2)#,  x=w1s, y=w2s)
ax1.set_aspect('equal', adjustable='box')
ax2.set_aspect('equal', adjustable='box')
ax1.set_xlim(0.0, 5.0)
ax2.set_xlim(0.0, 5.0)
ax1.set_ylim(0.0, 5.0)
ax2.set_ylim(0.0, 5.0)
ax1.set_aspect('equal', adjustable='box')
ax2.set_aspect('equal', adjustable='box')
    
#fig.tight_layout()
plt.savefig("plots/group_fairness_synthetic_contourf.pdf", bbox_inches='tight')
plt.show()

ModuleNotFoundError: No module named 'adjustText'

RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full report generated by latex:
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(/home/ramon/.cache/matplotlib/tex.cache/099208f0b5df9c85455e5e5e9ff93d99.tex
LaTeX2e <2020-02-02> patch level 2
L3 programming layer <2020-02-14>
(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls
Document Class: article 2019/12/20 v1.4l Standard LaTeX document class
(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo))
(/usr/share/texlive/texmf-dist/tex/latex/type1cm/type1cm.sty)

! LaTeX Error: File `type1ec.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on 099208f0b5df9c85455e5e5e9ff93d99.log.




RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full report generated by latex:
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(/home/ramon/.cache/matplotlib/tex.cache/099208f0b5df9c85455e5e5e9ff93d99.tex
LaTeX2e <2020-02-02> patch level 2
L3 programming layer <2020-02-14>
(/usr/share/texlive/texmf-dist/tex/latex/base/article.cls
Document Class: article 2019/12/20 v1.4l Standard LaTeX document class
(/usr/share/texlive/texmf-dist/tex/latex/base/size10.clo))
(/usr/share/texlive/texmf-dist/tex/latex/type1cm/type1cm.sty)

! LaTeX Error: File `type1ec.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on 099208f0b5df9c85455e5e5e9ff93d99.log.




<Figure size 504x288 with 4 Axes>

In [None]:
from itertools import permutations 
from scipy.optimize import linprog

In [None]:


def assign_groups(groups):
    G = [[], []]
    for i in range(len(groups)):
        G[groups[i]].append(i)
    return G


def fair_rank(relevances, groups ,lmda = 1):
    n = len(relevances)
    pos_bias = vvector(n)
    G = assign_groups(groups)
    n_g, n_i = 0, 0
    n_g += (len(G)-1)*len(G)
    n_c = n**2 + n_g 
    
    
    c = np.ones(n_c)
    c[:n**2] *= -1
    c[n**2:] *= lmda
    A_eq = []
    #For each Row
    for i in range(n):
        A_temp = np.zeros(n_c)
        A_temp[i*n:(i+1)*n] = 1
        assert(sum(A_temp)==n)
        A_eq.append(A_temp)
        c[i*n:(i+1)*n] *= relevances[i]

    #For each coloumn
    for i in range(n):
        A_temp = np.zeros(n_c)
        A_temp[i:n**2:n] = 1
        assert(sum(A_temp)==n)
        A_eq.append(A_temp)
        #Optimization 
        c[i:n**2:n] *= pos_bias[i] 
    b_eq = np.ones(n*2) 
    A_eq = np.asarray(A_eq)
    bounds = [(0,1) for _ in range(n**2)] + [(0,None) for _ in range(n_g+n_i)]
    
    
    A_ub = []
    b_ub = np.zeros(n_g)
    sum_rels = []
    for group in G:
        #Avoid devision by zero
        sum_rel = np.max([np.sum(np.asarray(relevances)[group]), 0.01])
        sum_rels.append(sum_rel)
    comparisons = list(permutations(np.arange(len(G)),2))
    j = 0
    for a,b in comparisons:
        f = np.zeros(n_c)
        if len(G[a]) > 0 and len(G[b])>0 and sum_rels[a]/len(G[a]) >= sum_rels[b]/len(G[b]):
            for i in range(n):
                tmp1 = len(G[a]) / sum_rels[a] if i in G[a] else 0 
                tmp2 = len(G[b]) / sum_rels[b] if i in G[b] else 0 
                #f[i*n:(i+1)*n] *= max(0, sign*(tmp1 - tmp2))
                f[i*n:(i+1)*n] =  (tmp1 - tmp2)
            for i in range(n):
                f[i:n**2:n] *= pos_bias[i]
            f[n**2+j] = -1
        j += 1
        A_ub.append(f)

    res = linprog(c, A_eq=A_eq, b_eq=b_eq, A_ub=A_ub, b_ub=b_ub, bounds=bounds, 
                                  method = "interior-point")#, options=dict(tol=1e-12),)
    if res.success is False:
        print("Constraint not satisfied!!")
    probabilistic_ranking = np.reshape(res.x[:n**2],(n,n))
    return probabilistic_ranking, res, res.fun

In [None]:
def get_best_rankmatrix(true_rel_vector):
    N = len(true_rel_vector)
    bestranking = np.zeros((N,N))
    bestr = np.argsort(true_rel_vector)[::-1]
    for i in range(N):
        bestranking[bestr[i], i] = 1
    return bestranking

#returns DCG value
def get_DCG(ranking, relevances, vvector):
    N = len(relevances)
    return np.matmul(np.matmul(2**relevances-1, ranking), vvector.transpose())

def get_ndcg(ranking, relevances, vvector):
    bestr = get_best_rankmatrix(relevances)
    return get_DCG(ranking, relevances, vvector)/get_DCG(bestr, relevances, vvector)


def get_fairness_loss(ranking, relevances, vvector, groups):
    avg_rels = [np.mean(relevances[groups==i]) for i in range(2)]
    sign = +1 if avg_rels[0] > avg_rels[1] else -1
    exposures = np.matmul(ranking, vvector)
    group_avg_exposures = [np.mean(exposures[groups==0]), np.mean(exposures[groups==1])]
    #print(avg_rels, sign, exposures, group_avg_exposures)
    loss = max([0.0, sign*(group_avg_exposures[0]/avg_rels[0] - group_avg_exposures[1]/avg_rels[1])])
    return loss

def get_avg_fairness_loss(dr, predicted_rels, vvector, lmbda):
    feats, rel = dr.data
    test_losses = []
    for i in range(len(rel)):
        N = len(rel[i])
        pred_rels = predicted_rels[i]
        groups = np.array(feats[i][:,0], dtype=np.int)
        P, _, _ = fair_rank(pred_rels, groups, lmbda )
        test_loss = get_fairness_loss(P, rel[i], vvector[:N], groups)
        test_losses.append(test_loss)
    return np.mean(test_losses)

vvector = lambda N: 1. / np.log2(2 + np.arange(N))

In [None]:
# Linear regression
from sklearn import linear_model
model = linear_model.LinearRegression(fit_intercept=False, normalize=False)
feats, rel = dr.data
feats = np.array([item for sublist in feats for item in sublist])
rel = np.array([item for sublist in rel for item in sublist])
model.fit(feats[:, 1:], rel)
# predictions on validation
feats, rel = vdr.data
se_sum = 0
length = 0
predicted_rels = []
for i, query in enumerate(feats):
    rel_pred = model.predict(query[:,1:])
    predicted_rels.append(rel_pred)
    se_sum += np.sum((rel_pred - rel[i])**2)
    length += len(rel[i])
print(se_sum/ length)

In [None]:
print(model.coef_)
w1, w2 = model.coef_
print(eval_params(w1, w2, vdr, True))
print(get_avg_fairness_loss(vdr, predicted_rels, vvector(200), 0.0))

In [None]:
# # for each lambda
# now for each query, estimate all relevances
# we find a fair ranking, find it's loss w.r.t true relevances
lmbdas = np.linspace(0, 6, 20)

plt_data = np.zeros((len(lmbdas), 2))

for j, lmbda in enumerate(lmbdas):
    test_losses = []
    test_ndcgs = []
    
    for i in range(len(predicted_rels)):
        true_rels = rel[i]
        pred_rels = predicted_rels[i]
        groups = np.array(feats[i][:, 0], dtype=np.int)
        n = len(groups)
        
        P, _, _ = fair_rank(pred_rels, groups, lmbda)
        
        test_loss = get_fairness_loss(P, true_rels, vvector(n), groups)
        test_losses.append(test_loss)
        
        test_ndcg = get_ndcg(P, true_rels, vvector(n))
        test_ndcgs.append(test_ndcg)
        
    plt_data[j] = [np.mean(test_ndcgs), np.mean(test_losses)]
        
        
    print("Lambda: {}, Average Test Fairness Loss: {}, Average Test NDCG: {}".format(lmbda, 
                                                     np.mean(test_losses), np.mean(test_ndcgs)))

plt.scatter(plt_data[:, 0], plt_data[:, 1])

In [None]:
model_params_list

In [None]:
plt_data_2 = []
plt_data_3 = []
for i, (w1, w2) in enumerate(model_params_list):
    if i == 0:
        result = eval_params(w1, w2, vdr, det=True)
        plt_data_2.append([result["ndcg"], result["avg_group_asym_disparity"]])
    result = eval_params(w1, w2, vdr, det=False)
    plt_data_2.append([result["ndcg"], result["avg_group_asym_disparity"]])
    result = eval_params(w1, w2, vdr, det=True)
    plt_data_3.append([result["ndcg"], result["avg_group_asym_disparity"]])

In [None]:
plt_data_2 = np.array(plt_data_2)

In [None]:
plt_data_2 = np.array(plt_data_2)
plt_data_3 = np.array(plt_data_3)

In [None]:
plt.scatter(plt_data[:, 0], plt_data[:, 1])
plt.scatter(plt_data_2[:, 0], plt_data_2[:, 1])
plt.scatter(plt_data_3[0, 0], plt_data_3[0, 1])

In [None]:
CustomLinearModel(D=3, fix_weight_dim=0)

In [None]:
from zehlike import *
model = CustomLinearModel(D=3, fix_weight_dim=0)
args.lr = [0.001]
args.lambda_reward = 1.0
plt_data_4 = []
lambdas = [0.0, 0.1, 1.0, 10, 100, 1000, 10000, 100000, 1000000]
args.weight_decay = [0.0]
args.epochs = [10]
args.evaluate_interval = 100
disparities_mat = np.zeros((len(lambdas), 1))
ndcg_mat = np.zeros((len(lambdas), 1))
for i, lg in enumerate(lambdas):
    args.lambda_group_fairness = lg
    model = demographic_parity_train(model, dr, vdr, vvector(200), args)
    results = evaluate_model(
            model,
            vdr,
            fairness_evaluation=False,
            group_fairness_evaluation=True,
            deterministic=True,
            args=args,
            num_sample_per_query=100)
    plt_data_4.append([results["ndcg"], results["avg_group_asym_disparity"]])
    ndcg_mat[i, 0], disparities_mat[i,0] = results["ndcg"], results["avg_group_asym_disparity"]

In [None]:
print(plt_data_4)

In [None]:
plt_data_4 = np.array(plt_data_4)

In [None]:
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('pdf', 'png')
plt.rcParams['savefig.dpi'] = 75

plt.rcParams['figure.autolayout'] = False
plt.rcParams['figure.figsize'] = 10, 6
plt.rcParams['axes.labelsize'] = 14
plt.rcParams['axes.titlesize'] = 16
plt.rcParams['font.size'] = 16
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8
plt.rcParams['legend.fontsize'] = 10

plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = "serif"
plt.rcParams['font.serif'] = "cm"
plt.rcParams[
    'text.latex.preamble'] = r"\usepackage{subdepth}, \usepackage{type1cm}"

In [None]:
def ndcg_vs_disparity_plot(plt_data_mats, names, join=False, ranges=None):
    plt.figure(figsize=(5.5,3.65))
    if ranges:
        plt.xlim(ranges[0])
        plt.ylim(ranges[1])
    for i, plt_data_mat in enumerate(plt_data_mats):
        if not join:
            plt.scatter(
                plt_data_mats[i][:, 0],
                plt_data_mats[i][:, 1],
                marker="+",
                label=names[i])
        else:
            plt.plot(
                plt_data_mats[i][:, 0],
                plt_data_mats[i][:, 1],
                marker="+",
                linestyle='--',
                label=names[i], alpha=0.75)
    plt.legend(fontsize=12)
    plt.title("(c) Utility-Fairness trade-off",y=-0.30)
    plt.xlabel("NDCG", fontsize=16)
    plt.ylabel(r'$\hat{\mathcal{D}}_{\rm group}$', fontsize=16)
    plt.grid()
    plt.savefig('./plots/toy_tradeoff.pdf', bbox_inches='tight')
    plt.show()

In [None]:
ndcg_vs_disparity_plot([plt_data, plt_data_2,plt_data_4],
                      ["Post-Processing ($\lambda \in [0, 6]$)",
                      "Our Method ($\lambda \in [0,25]$)", 
                      "Zehlike et al. ($\lambda \in [0, 10^6]$)"],
                      join=True)