In [47]:
import urllib3
import numpy as np
import logging
from io import BytesIO
from zipfile import ZipFile
from urllib.request import urlopen

def calc_pl_maj_paradox(res, filename):
       
    candidates = dict(map(lambda x: x.split(','), res[1:int(res[0])+1]))
    rankings = list(map(lambda x: x.split(','), res[int(res[0])+2:len(res)-1]))
    
    lenc = len(candidates)
    c = list(candidates.keys())

    freqs = dict(zip(c, np.zeros(lenc)))
    
    for item in rankings:
        freqs[item[1]] += int(item[0])
    
    vwinner = max(freqs.values())
   
    winner = max(freqs, key=freqs.get)
    
    paradox = False
    
    l = dict(zip(c, np.zeros(lenc)))

    sfreq = sum(freqs.values())
    
    if vwinner > sfreq / 2: return paradox
        
    for item in rankings:
        ilen = len(item)
        if len(item) == 2: continue
        if len(item) == 3:
            if item[2] == winner:
                l[item[1]] += int(item[0])
        else: 
            for i in range(1,ilen):
                    if item[1] == winner: continue
                    else :
                        if item[i] == winner:
                            for j in range(1, i):
                                l[item[j]] += int(item[0])
    
    for i in l:
        if l[i] > sfreq/2:
                paradox = True
                print("{0}: {1} total votes. The candidate {2} is preferred {3} times over the winner {4} who won with {5} votes\n"
                      .format(filename, sfreq, candidates[i], l[i], candidates[winner],  freqs[winner]))
                
        if l[i] == sfreq/2:
                if i[0]<winner[0]:
                    paradox+=1
                print("{0}: {1} total votes. The candidate {2} is preferred {3} times over the winner {4} who won with {5} votes\n"
                          .format(filename, sfreq, candidates[i], l[i], candidates[winner],  freqs[winner]))
    return paradox


def read_zip_file(filepath):
          
    res = urlopen(filepath)
    
    zfile = ZipFile(BytesIO(res.read()))
    n = zfile.namelist()
    
    num = 0
    total_p = 0
    
    for i in n:
        f=zfile.open(i)
        contents=f.read()
        try:
            total_p+=calc_pl_maj_paradox(contents.decode('utf-8').split('\n'), i)
            num += 1
        except Exception:
            logging.info("{0} was not a valid dataset".format(i))
            pass

        finally:
            
            f.close() 
            
    print("\nFrequency of the PM paradox is {0}%".format(round((total_p/num)*100),2))
    
print('Reading SOI files...\n\n')   
read_zip_file('http://www.preflib.org/data/packs/soi.zip') 
print('\n\nReading SOC files...\n\n')   
read_zip_file('http://www.preflib.org/data/packs/soc.zip') 

Reading SOI files...


ED-00010-00000004.soi: 10.0 total votes. The candidate Dan Gurney  is preferred 5.0 times over the winner Jim Clark  who won with 3.0 votes

ED-00010-00000004.soi: 10.0 total votes. The candidate Graham Hill  is preferred 5.0 times over the winner Jim Clark  who won with 3.0 votes

ED-00010-00000004.soi: 10.0 total votes. The candidate Richie Ginther  is preferred 6.0 times over the winner Jim Clark  who won with 3.0 votes

ED-00010-00000004.soi: 10.0 total votes. The candidate Lorenzo Bandini  is preferred 5.0 times over the winner Jim Clark  who won with 3.0 votes

ED-00010-00000004.soi: 10.0 total votes. The candidate John Surtees  is preferred 5.0 times over the winner Jim Clark  who won with 3.0 votes

ED-00010-00000014.soi: 15.0 total votes. The candidate Emerson Fittipaldi  is preferred 11.0 times over the winner Ronnie Peterson  who won with 3.0 votes

ED-00010-00000014.soi: 15.0 total votes. The candidate Jody Scheckter  is preferred 8.0 times over the w


ED-00011-00000038.soi: 4.0 total votes. The candidate http://31453.r.msn.com/  is preferred 4.0 times over the winner http://en.wikipedia.org/  who won with 2.0 votes

ED-00011-00000038.soi: 4.0 total votes. The candidate http://www.surreyclassicwedding.ndo.co.uk/  is preferred 3.0 times over the winner http://en.wikipedia.org/  who won with 2.0 votes

ED-00011-00000038.soi: 4.0 total votes. The candidate http://www.dallasfortworthtravel.com/  is preferred 2.0 times over the winner http://en.wikipedia.org/  who won with 2.0 votes

ED-00011-00000038.soi: 4.0 total votes. The candidate http://vintagesportscars.com/  is preferred 2.0 times over the winner http://en.wikipedia.org/  who won with 2.0 votes

ED-00011-00000038.soi: 4.0 total votes. The candidate http://vintageirongolf.com/  is preferred 2.0 times over the winner http://en.wikipedia.org/  who won with 2.0 votes

ED-00011-00000038.soi: 4.0 total votes. The candidate http://www.tias.com/  is preferred 3.0 times over the winner h


Frequency of the PM paradox is 32%


Reading SOC files...


ED-00015-00000003.soc: 4.0 total votes. The candidate France  is preferred 2.0 times over the winner Indonesia  who won with 1.0 votes

ED-00015-00000003.soc: 4.0 total votes. The candidate United+States  is preferred 2.0 times over the winner Indonesia  who won with 1.0 votes

ED-00015-00000003.soc: 4.0 total votes. The candidate Japan  is preferred 2.0 times over the winner Indonesia  who won with 1.0 votes

ED-00015-00000003.soc: 4.0 total votes. The candidate Mexico  is preferred 2.0 times over the winner Indonesia  who won with 1.0 votes

ED-00015-00000003.soc: 4.0 total votes. The candidate Italy  is preferred 2.0 times over the winner Indonesia  who won with 1.0 votes

ED-00015-00000003.soc: 4.0 total votes. The candidate Portugal  is preferred 2.0 times over the winner Indonesia  who won with 1.0 votes

ED-00015-00000003.soc: 4.0 total votes. The candidate Spain  is preferred 2.0 times over the winner Indonesia  who w