In [2]:
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import scipy
import numpy as np
from itertools import combinations
from scipy.optimize import curve_fit
import statsmodels.api as sm
from tqdm import trange, tqdm
from distributions.sampling import sample_mallows_with_noise
from model_selection.rank_aggregation import borda, trimmed_borda, kemeny, trimmed_kemeny

KERNEL_CACHE = {}    

In [None]:
def objective(ranks, aggregation_type='kemeny'):
    if aggregation_type == 'borda':
        _, sigma_star = borda(ranks=ranks, weights=None)
    elif aggregation_type == 'kemeny':
        _, sigma_star = kemeny(ranks=ranks, weights=None, verbose=False)
    return np.mean([mk.distance(r, sigma_star) for r in ranks])
    
def kernel_weight(u, t): 
    if u == t: 
        return 1e12
    if (u, t) in KERNEL_CACHE.keys():
        return KERNEL_CACHE[(u, t)]
    else: 
        KERNEL_CACHE[(u, t)] = (u - 1)/(scipy.special.comb(u, t)*t*(u-t))
        return KERNEL_CACHE[(u, t)]

In [None]:
def kernel_shapely(ranks, aggregation_type='borda'):
    u = ranks.shape[0]
    
    X = [] # \sum_{j \in T} \phi_j
    y = [] # g(T)
    w = [] # k(U, T)
    for t in trange(1, u+1):
        for T in combinations(np.arange(u), t):
            y.append(objective(ranks[T, :], aggregation_type=aggregation_type)) 
            x = np.zeros((1, u))
            x[0, T] = 1
            X.append(x)
            w.append(kernel_weight(u, t))

    X = np.concatenate(X, axis=0)
    X = sm.add_constant(X)

    wls_model = sm.WLS(y, X, weights=w)
    wls_model = wls_model.fit()

    phi = wls_model.params
    return phi


In [None]:
parameters = {
    'N' : 15, # Number of permutations
    'n' : 30, # Number of items
    'theta' : 0.1,
    'noise': 0.5,
    'type': 'random', 
    'seed': 0
    } 

ranks, n_noisy, n_mallows = sample_mallows_with_noise(**parameters)
N, n = ranks.shape
colors = n_mallows*['darkblue']
colors.extend(n_noisy*['red'])

In [None]:
wls_model = kernel_shapely(ranks, aggregation_type='borda')

In [None]:
dist_from_true_rank = np.array([mk.distance(r) for r in ranks])

In [None]:
plt.style.use('ggplot')
figure, axis = plt.subplots(1, 1, figsize=(4, 4), sharey=True)
axis.scatter(wls_model.params[1:], dist_from_true_rank, c=colors, alpha=0.7)
axis.set_xlabel('Shapely Value of Permutation', fontsize=12)
axis.set_ylabel('Distance from Central Permutation', fontsize=12)