In [64]:
from random import randint
from math import factorial
import numpy as np
from itertools import permutations

### Write methods for each approach

In [None]:
def gen_freqs_indices(num_cands, num_ballots):
    '''
        roll a 0 to {remaning} sided dice
        roll an index from 0 to n!-1 (until we see a new one)
        add each roll to appropriate lists

    '''
    indices_to_frequencies = {}
    remaning = int(num_ballots)
    while remaning > 0:
        freq = randint(0,remaning)
        index = randint(0, factorial(num_cands)-1)
        # adds to dictionary if the key does not exist, otherwise
        # increments it by freq
        indices_to_frequencies[index] = indices_to_frequencies.get(index, 0) + freq
        remaning -= freq
    return indices_to_frequencies

def Sn_indices_to_perms(indices, cands):
    '''
        Given a list of indices on S_n, returns a list of permutations
        of cands

        create a new ballot by shuffling cands,
        add it to the list if it doesnt already exist
    '''
    ballots = []
    while len(ballots) < len(indices):
        new_perm = np.random.permutation(cands).tolist()
        if not new_perm in ballots:
            ballots.append(new_perm)
    return ballots



In [65]:
# methods for multinomial approach
def sample_integer_vector_sum_k(num_cands, num_ballots):
    N = num_ballots
    k = factorial(num_cands)

    # Sample k-1 cut points from N + k - 1 positions
    cuts = np.sort(np.random.choice(N + k - 1, k - 1, replace=False))
    # Pad with 0 at start and N + k - 1 at end
    padded = np.concatenate(([0], cuts, [N + k - 1]))
    # The differences between cuts minus 1 give the bin counts
    almost_result = np.diff(padded)-1

    #
    if almost_result[0] == -1:
        almost_result[0] += 1
    else:
        # choose an index to randomly increment
        almost_result[np.random.choice(k)] +=1
        
    return almost_result

def convert_freq_vector_to_indices(int_vector_sum_k):
    # input is an integer vector whose values sum to something
    indices_freqs = {}
    for i, val in enumerate(int_vector_sum_k):
        if val != 0:
            indices_freqs[i] = val
    return indices_freqs

def convert_freq_vector_ballot_freq_map(int_vector_sum_k, cands):
    result = {} #map from ballots to freqs 
    all_perms = permutations(cands)
    for i, perm in enumerate(all_perms):
        freq = int_vector_sum_k[i]
        if freq != 0:
            if perm in result:
                print("Ayuuhh")
            result[perm] = freq
    return result

### fiddle with methods to make sure they're outputting correctly

In [None]:
n_cands = 25
n_ballots = 100
cands = [f"Cand{i}" for i in range(n_cands)]

freq_ind = gen_freqs_indices(num_cands=n_cands, num_ballots=n_ballots)
print(freq_ind)

ballots = Sn_indices_to_perms(list(freq_ind.keys()), cands)
print(ballots)
print(len(ballots))



{1421476424164159587554836: 89, 12191756976404486226995705: 4, 4874008981453881433776029: 0, 9872944197385619092016310: 3, 7649407217881305276786648: 0, 10962022561724217369689292: 4}
[['Cand8', 'Cand18', 'Cand2', 'Cand12', 'Cand1', 'Cand6', 'Cand14', 'Cand22', 'Cand13', 'Cand7', 'Cand17', 'Cand24', 'Cand21', 'Cand16', 'Cand23', 'Cand9', 'Cand4', 'Cand0', 'Cand19', 'Cand15', 'Cand10', 'Cand3', 'Cand5', 'Cand20', 'Cand11'], ['Cand19', 'Cand15', 'Cand9', 'Cand11', 'Cand4', 'Cand2', 'Cand7', 'Cand14', 'Cand1', 'Cand3', 'Cand13', 'Cand8', 'Cand23', 'Cand5', 'Cand20', 'Cand22', 'Cand21', 'Cand16', 'Cand0', 'Cand12', 'Cand24', 'Cand17', 'Cand18', 'Cand6', 'Cand10'], ['Cand17', 'Cand11', 'Cand23', 'Cand21', 'Cand3', 'Cand22', 'Cand9', 'Cand16', 'Cand1', 'Cand6', 'Cand4', 'Cand7', 'Cand5', 'Cand2', 'Cand20', 'Cand19', 'Cand24', 'Cand15', 'Cand12', 'Cand10', 'Cand14', 'Cand0', 'Cand18', 'Cand13', 'Cand8'], ['Cand3', 'Cand1', 'Cand18', 'Cand5', 'Cand14', 'Cand22', 'Cand23', 'Cand13', 'Cand21', '

In [70]:
multi_freqs = sample_integer_vector_sum_k(num_cands = 8, num_ballots=100)
#multi_indices_freqs = convert_freq_vector_to_indices(multi_freqs)

convert_freq_vector_ballot_freq_map(multi_freqs, [f"{i}" for i in range(8)] )



{('0', '1', '2', '5', '4', '7', '6', '3'): np.int64(1),
 ('0', '4', '3', '5', '1', '2', '6', '7'): np.int64(1),
 ('0', '4', '6', '2', '3', '5', '7', '1'): np.int64(1),
 ('0', '5', '6', '4', '3', '7', '1', '2'): np.int64(1),
 ('0', '6', '7', '3', '2', '4', '5', '1'): np.int64(1),
 ('0', '7', '4', '2', '5', '3', '1', '6'): np.int64(1),
 ('0', '7', '4', '5', '1', '2', '6', '3'): np.int64(1),
 ('1', '0', '7', '2', '6', '4', '3', '5'): np.int64(1),
 ('1', '2', '3', '5', '7', '0', '4', '6'): np.int64(1),
 ('1', '2', '5', '3', '0', '7', '6', '4'): np.int64(1),
 ('1', '3', '4', '2', '6', '5', '7', '0'): np.int64(1),
 ('1', '3', '6', '7', '0', '4', '5', '2'): np.int64(1),
 ('1', '3', '7', '6', '4', '5', '0', '2'): np.int64(1),
 ('1', '4', '0', '7', '2', '3', '5', '6'): np.int64(1),
 ('1', '4', '2', '6', '3', '5', '7', '0'): np.int64(1),
 ('1', '5', '2', '0', '6', '3', '4', '7'): np.int64(1),
 ('1', '6', '0', '7', '4', '3', '5', '2'): np.int64(1),
 ('1', '6', '3', '0', '4', '7', '2', '5'): np.in

In [None]:
'''
    how to compare the distribution of the above?
    well, i suppose we can paste in the multinomial code
        conver multinomail code into: freqs and indices form
    shall we compare on tv?
    once we have ballots and frequencies for both: new and multinomial
    we can extend these to complete maps
    and then we can take the tv distance
'''