## Measuring the divergence in empirical and theoretical probabilities of permutations using squared difference

$$
\Delta(a, b) = \sum_{i}(Prob(i, a) - Prob(j, a))^2
$$

In [1]:
import import_ipynb
from Mallows_Notebook import *
from PL_Notebook import *
import metropolis
import math
from tqdm import tqdm_notebook
import itertools

importing Jupyter notebook from Mallows_Notebook.ipynb
importing Jupyter notebook from PL_Notebook.ipynb
0.125


HBox(children=(IntProgress(value=0, description='plackettCost'), HTML(value='')))




In [2]:
files = ['ED-00002-00000001.soi',\
         'ED-00002-00000002.soi',\
         'ED-00002-00000003.soi',\
         'ED-00002-00000004.soi',\
         'ED-00002-00000005.soi',\
         'ED-00002-00000006.soi',\
         'ED-00002-00000007.soi']

list_of_votes = []
mallows_params = []
pl_params = []

nruns = 100_000

print('Projected Time =',120/1000.0 * nruns,'seconds, which is ~', 2/1000.0 * nruns, 'minutes')

for file in tqdm_notebook(files,desc = 'All Files'):
    _, lengths, num_votes, votes = readPreflib.soiInputwithNumVotes('preflib_soi/'+file)
    print(num_votes)
    list_of_votes.append((num_votes, lengths, votes))
    p_mal = runMallows(votes, nruns, lengths)
    mallows_params.append(p_mal)
    p_pl = runPL(votes, nruns, lengths)
    pl_params.append(p_pl)

Projected Time = 12000.0 seconds, which is ~ 200.0 minutes


HBox(children=(IntProgress(value=0, description='All Files', max=7), HTML(value='')))

475


HBox(children=(IntProgress(value=0, description='mallowsCost', max=100000), HTML(value='')))

HBox(children=(IntProgress(value=0, description='plackettCost', max=100000), HTML(value='')))

488


HBox(children=(IntProgress(value=0, description='mallowsCost', max=100000), HTML(value='')))

HBox(children=(IntProgress(value=0, description='plackettCost', max=100000), HTML(value='')))

504


HBox(children=(IntProgress(value=0, description='mallowsCost', max=100000), HTML(value='')))

HBox(children=(IntProgress(value=0, description='plackettCost', max=100000), HTML(value='')))

421


HBox(children=(IntProgress(value=0, description='mallowsCost', max=100000), HTML(value='')))

HBox(children=(IntProgress(value=0, description='plackettCost', max=100000), HTML(value='')))

482


HBox(children=(IntProgress(value=0, description='mallowsCost', max=100000), HTML(value='')))

HBox(children=(IntProgress(value=0, description='plackettCost', max=100000), HTML(value='')))

436


HBox(children=(IntProgress(value=0, description='mallowsCost', max=100000), HTML(value='')))

HBox(children=(IntProgress(value=0, description='plackettCost', max=100000), HTML(value='')))

403


HBox(children=(IntProgress(value=0, description='mallowsCost', max=100000), HTML(value='')))

HBox(children=(IntProgress(value=0, description='plackettCost', max=100000), HTML(value='')))




In [3]:
# mallows_params
# prob pickle things here

In [4]:
_,_,temp = list_of_votes[0]
known = [tuple(x) for x in list(zip(*temp))[1]]
pos = all_possible_votes(4)
len(pos)
# [x for x in pos if x not in known]
# len(known)

NameError: name 'all_possible_votes' is not defined

In [6]:
def all_possible_votes(n):
    all_votes = []
    for i in range(1,n+1):
        # print(i)
        sub = list(itertools.permutations(range(1,n+1),i))
        #sub = itertools.permutations(range(1,i+1))
        for p in sub:
            all_votes.append(p)
    return all_votes

len(all_possible_votes(9))

986409

In [7]:
def squaredDif(a, b):
    return (a - b)**2

table = []
col_names = ['Number_Votes','Mallow\'s_Divergence','Plackett-Luce_Divergence']

for i in tqdm_notebook(range(len(list_of_votes))):
    num_votes, lengths, votes = list_of_votes[i]
    num_alternatives = len(lengths)
    sigma, phi = mallows_params[i]
    pl_weights = pl_params[i]
    
    delta_mallows = 0
    delta_plackett = 0
       
    freq_list = list(zip(*votes))[0]
    known_votes = [tuple(x) for x in list(zip(*votes))[1]]
    # possibles = all_possible_votes(num_alternatives)
    # empirical_zero = [x for x in possibles if x not in known_votes]
    
    for vote in tqdm_notebook(all_possible_votes(num_alternatives)):
        if (vote in known_votes):
            index = known_votes.index(vote)
            num_occurances = freq_list[index]
            
            empirical = num_occurances / num_votes
            mallows = mallowsProb(vote, sigma, phi)
            plackett = probPlackett(vote, pl_weights)
            delta_mallows += squaredDif(mallows, empirical)
            delta_plackett += squaredDif(plackett, empirical)
            # print(vote)
        else:
            empirical = 0
            mallows = mallowsProb(vote, sigma, phi)
            plackett = probPlackett(vote, pl_weights)
            delta_mallows += squaredDif(mallows, empirical)
            delta_plackett += squaredDif(plackett, empirical)
    
#     for entry in votes:
#         num_occurances, vote = entry
#         empirical = num_occurances / num_votes
#         mallows = mallowsProb(vote, sigma, phi)
#         plackett = probPlackett(vote, pl_weights)
#         delta_mallows += squaredDif(mallows, empirical)
#         delta_plackett += squaredDif(plackett, empirical)
    
    table.append([num_votes, delta_mallows, delta_plackett])

HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

HBox(children=(IntProgress(value=0, max=64), HTML(value='')))

HBox(children=(IntProgress(value=0, max=325), HTML(value='')))

HBox(children=(IntProgress(value=0, max=13699), HTML(value='')))

HBox(children=(IntProgress(value=0, max=109600), HTML(value='')))

HBox(children=(IntProgress(value=0, max=986409), HTML(value='')))

HBox(children=(IntProgress(value=0, max=325), HTML(value='')))

HBox(children=(IntProgress(value=0, max=64), HTML(value='')))




In [8]:
import pandas as pd

npdata = np.array(table)
results_df = pd.DataFrame(data=npdata,columns=col_names)
results_df.index += 1
# pickle.dump(results_df, open('pickle/divergence10k.p','wb'))
results_df

Unnamed: 0,Number_Votes,Mallow's_Divergence,Plackett-Luce_Divergence
1,475.0,233.060052,7.778155
2,488.0,779.340626,12.644121
3,504.0,320.412273,26.22931
4,421.0,4497.960653,37.407906
5,482.0,8651.540042,51.029067
6,436.0,591.484631,12.426945
7,403.0,139.157712,7.884394


In [None]:
perms = list(itertools.permutations([1,2,3]))
print(perms)
(1,2,3) in perms

In [9]:
pickle.dump([mallows_params, pl_params, table], open('./pickle/11-14_100kruns_sumofsquares.p', 'wb'))