This notebook contains code for some versions of slate clustering we considered that are not used in the paper

In [None]:
from Clustering_Functions import *
import math
import more_itertools

The following version uses Borda or HH embeddings and finds the partition that minimizes the summed distances of the ballots to their nearest slate.  Here both ballots and slates (considered as generalized ballots) are regarded in the proxy space.

In [None]:
# version that exhaustively optimizes over all partitions of the candidates
# the score = summed L^1 distances of ballot proxies to their closest slate proxy
# (regarding slates as generalized ballots, and therefore as having proxies)
def kris_slate_cluster(election, k, proxy='HH'):
    best_score = math.inf
    for slates in  more_itertools.set_partitions(range(1,num_cands+1), k):
        gen_ballots = [(set(slate),) for slate in slates]
        if proxy == 'HH':
            slate_proxies = [HH_proxy(gen_ballot, num_cands=num_cands) for gen_ballot in gen_ballots]
        else:
            slate_proxies = [Borda_proxy(gen_ballot, num_cands=num_cands) for gen_ballot in gen_ballots]
        this_score = 0
        for ballot,weight in election.items():
            if proxy == 'HH':
                ballot_proxy = HH_proxy(ballot, num_cands=num_cands)
            else:
                ballot_proxy = Borda_proxy(ballot, num_cands=num_cands)
            dists = [np.linalg.norm(ballot_proxy - slate_proxy, ord=1) for slate_proxy in slate_proxies]
            this_score += weight * min(dists)
        if this_score < best_score:
            best_score = this_score
            best_slates = slates
    # construct the clustering assignment
    C = [dict() for _ in range(k)]
    for ballot, weight in election.items():
        if proxy == 'HH':
            ballot_proxy = HH_proxy(ballot, num_cands=num_cands)
        else:
            ballot_proxy = Borda_proxy(ballot, num_cands=num_cands)
        dists = [np.linalg.norm(ballot_proxy - (HH_proxy(slate, num_cands=num_cands) if proxy == 'HH' else Borda_proxy(slate, num_cands=num_cands)), ord=1) for slate in best_slates]
        # split the weight for ties
        min_dist = min(dists)
        closest_slate_indices = [i for i, dist in enumerate(dists) if dist == min_dist]
        weight_per_slate = weight / len(closest_slate_indices)
        for i in closest_slate_indices:
            C[i][ballot] = C[i].get(ballot, 0) + weight_per_slate
    return best_slates, C

The following method was used in the 2025 version of the paper.  It only works with k=2 slates.  Over all bipartitions of the candidates, it minimizes a summed (strong or weak) distance from the ballots to their nearest slate.  This distance notion is based on HH-embeddings.  To measure the distance from a ballot to a bipartition {A,B}, it essentially projects onto the subspace corresponding to A-vs-B comparissons.

In [None]:
# Helper functions for Slate_cluster_old
def powerset(iterable): # returns a list of the nontrival non-full subsets of the given iterable
    """
    Helper function for Slate_cluster   
    """
    s = list(iterable)
    l = list(chain.from_iterable(combinations(s, r) for r in range(len(s)+1)))
    l.pop()    # remove the full set from the end of the list
    l.pop(0)   # remove the empty set from the start of the list
    return l

def Slate_cluster_old(election, slate = None, verbose = False, dist = 'strong', normalized = True,
                  share_ties = True, return_data = False):
    """
    Returns a clustering with k=2 clusters using a slate-based method based the distance that ballots are 
    from being consistent.
    
    For each slate S={A,B} (each bi-partition of the candidates), the slate's score is computed as the sum 
    (over the weighted ballots in the election) of the ballot's (strong or weak) distance to the closest condition: $A>B$ or $B>A$.
    
    The slate with the minimal score is used to partition the ballots into 2 clusters.

    Args:
        election : dictionary matching ballots to weights.
        slate: (optional - to cluster based on a prescribed slate) a tuple of the candidates in the first slate.
        verbose : boolean. 
        dist : one of {'strong','weak'} determines whether to penalize a ballot for having an A-candidate tie with a B-candidate 
        normalized : (boolean) whether to normalize the distance by the number of A-vs-B comparisons.
        share_ties  : (boolean) whether to divide between the clusters the weight of a ballot that's equidistance to A>B and B>A (otherwise, tied ballots are assigned to cluster B)
        
    Returns:
        (if return_data == False) clustering
        (if return_data == True) slate_dictionary, avg_dist_to_ordered, consistent_portion, clustering
            slate_dictionary = {0:A_slate, 1:B_slate}
            avg_dist_to_ordered = the average distance of the ballots to the closest of A>B or B>A  
            consistent_portion = the portion of ballots that are (strongly or weakly) consistent with the slate
    """
    num_cands = max([item for ranking in election.keys() for item in ranking])
    # create a matrix X whose rows are the Borda proxies of the unique ballots
    # and a dictionary matching each ballot type with its corresponding row in the matrix
    X = []
    ballot_to_row = dict()
    counter = 0
    for ballot, weight in election.items():
        ballot_to_row[ballot]=counter
        X.append(Borda_vector(ballot,num_cands=num_cands))
        counter +=1
    
    if slate:
        best_subset = slate
    else:
        best_score = float('inf')
        best_subset = tuple()
        
        # Determine the best slate
        for A in powerset(range(1,num_cands+1)):
            B = tuple(set(range(1,num_cands+1))-set(A)) # the compliment of A
            a = len(A)
            b = len(B)

            slate_score = 0
            for ballot, weight in election.items(): # compute dist from the ballot to the slate
                ballot_proxy = X[ballot_to_row[ballot]]
                A_over_B = 0
                B_over_A = 0
                AB_tie = 0
                for i in A:
                    for j in B:
                        if ballot_proxy[i-1]>ballot_proxy[j-1]:
                            B_over_A += 1
                        elif ballot_proxy[i-1]<ballot_proxy[j-1]:
                            A_over_B += 1
                        else:
                            AB_tie += 1
                if dist == 'strong':
                    A_dist = (B_over_A+.5*AB_tie)/(a*b) if normalized else B_over_A+.5*AB_tie
                    B_dist = (A_over_B+.5*AB_tie)/(a*b) if normalized else A_over_B+.5*AB_tie
                elif dist == 'weak':
                    A_dist = B_over_A/(A_over_B+B_over_A) if normalized else B_over_A
                    B_dist = A_over_B/(A_over_B+B_over_A) if normalized else A_over_B
                else:
                    raise Exception("dist must be one of {'strong','weak'}")

                slate_score += min(A_dist,B_dist)*weight

            if slate_score<best_score:
                best_score = slate_score
                best_subset = A

    # Form clusters from the best slate
    A = best_subset
    B = tuple(set(range(1,num_cands+1))-set(A)) # the compliment of A
    a = len(A)
    b = len(B)
    CA = dict()
    CB = dict()
    total_shared_weight = 0
    total_consistent_ballots = 0
    A_dist_list = []
    weight_list = []
    
    for ballot, weight in election.items():
        ballot_proxy = X[ballot_to_row[ballot]]
        A_over_B = 0
        B_over_A = 0
        AB_tie = 0
        for i in A:
            for j in B:
                if ballot_proxy[i-1]>ballot_proxy[j-1]:
                    B_over_A += 1
                elif ballot_proxy[i-1]<ballot_proxy[j-1]:
                    A_over_B += 1
                else:
                    AB_tie += 1

        if dist == 'strong':
            A_dist = (B_over_A+.5*AB_tie)/(a*b)
            B_dist = (A_over_B+.5*AB_tie)/(a*b)
        elif dist == 'weak':
            A_dist = B_over_A/(A_over_B+B_over_A)
            B_dist = A_over_B/(A_over_B+B_over_A) 
        else:
            raise Exception("dist must be one of {'strong','weak'}")

        total_consistent_ballots += weight*(A_dist==0 or B_dist==0)
        A_dist_list.append(A_dist)
        weight_list.append(weight)

        if share_ties and A_dist == B_dist:
            CA[ballot]=weight/2
            CB[ballot]=weight/2
            total_shared_weight +=weight
        elif A_dist < B_dist:
            CA[ballot]=weight
        else:
            CB[ballot]=weight
    avg_dist_to_ordered = sum([min(A_dist_list[i],1-A_dist_list[i])*weight_list[i] 
                               for i in range(len(A_dist_list))])/sum(weight_list)
    consistent_portion = total_consistent_ballots/sum(election.values())
    
    if verbose:
        print(f"Slates = {A,B}.")
        print(f"Portion of ballots that tied = {total_shared_weight/sum(election.values())}")
        print(f"Portion of ballots that are {dist}ly consistent = {consistent_portion}")
        print(f"Average distance to ordered = {avg_dist_to_ordered}")
    
    if return_data:
        slate_dict = {0:A, 1:B}
        return slate_dict, avg_dist_to_ordered, consistent_portion, (CA,CB)
    else:
        return CA,CB