# Proportional Approval Voting implementation
Todo: When it gets slow, use counts of unique cvrs, not all cvrs

When that gets slow, come up with more efficient algorithm

In [2]:
import pandas as pd
import collections
import itertools
import time

In [11]:
def harmonic(matches):
    """Calculate harmonic series sum up to the integer 'matches'
    >>> harmonic(0)
    0.0
    >>> harmonic(2)
    1.5
    """
    utility = 0.0
    for i in range(1, matches + 1):
        utility += 1 / i

    return utility

In [4]:
UTILITY = [harmonic(i) for i in range(0, 100)]

In [8]:
UTILITY[:6]

[0.0, 1.0, 1.5, 1.8333333333333333, 2.083333333333333, 2.283333333333333]

In [9]:
def utility(permutation, cvr):
    """Return utility to voter who voted cvr of given permutation
    >>> utility(frozenset(["c1", "c2", "c4"]), frozenset(["c1", "c3", "c4"]))
    1.5
    """
    return UTILITY[len(permutation.intersection(cvr))]

In [12]:
import doctest
doctest.testmod()

TestResults(failed=0, attempted=3)

In [56]:
def tally_pav(file, num_winners):
    """Tally given csv file with Proportional Approval Voting method
    File should have one column per candidate, identified in headers,
    and have either a "0" or a "1" for each candidate in each row.
    """

    df = pd.read_csv(file)

    plurality = df.sum()
    plurality.sort_values(inplace=True, ascending=False)
    print("Plurality results in order")
    print(plurality)
    print()

    all_candidates = type(next(df.itertuples(False)))._fields

    cvrs = []

    for t in df.itertuples(False):
        cvrs.append(frozenset(key for key, value in t._asdict().items() if value))

    uniq = collections.Counter(cvrs)
    print("{} CVRs, {} unique selections of candidates".format(len(cvrs), len(uniq)))

    possible_results = itertools.combinations(all_candidates, num_winners)

    scores = {}
    for result in possible_results:
        resultset = frozenset(result)
        scores[resultset] = sum(utility(resultset, cvr) for cvr in cvrs)

    print("Top 10 scores:")
    [print(res) for res in sorted(scores.items(), key=lambda tuple: tuple[1], reverse=True)[:10]]

    winner = max(scores.items(), key=lambda tuple: tuple[1])
    print("\nMax score: {} for {}".format(scores[winner[0]], winner))

    return winner, cvrs, scores, df

In [68]:
winner, cvrs, scores, df = tally_pav("Westminster_Adams.csv", 3)

Plurality results in order
Anita_Seitz         4921
Shannon_Bird        4823
Maria_De_Cambra     3921
Mark_Clark          3500
David_DeMott        3126
Debbie_Bergamo      2668
Mike_Melvin         2112
Jason_Blanckaert    2026
Steve_Caulk         2026
Nathan_Pearce       1229
dtype: int64

12684 CVRs, 169 unique selections of candidates
Top 10 scores:
(frozenset({'Anita_Seitz', 'Mark_Clark', 'Shannon_Bird'}), 10799.500000000013)
(frozenset({'Anita_Seitz', 'Shannon_Bird', 'Maria_De_Cambra'}), 10619.16666666674)
(frozenset({'Anita_Seitz', 'Shannon_Bird', 'Debbie_Bergamo'}), 10607.50000000001)
(frozenset({'Anita_Seitz', 'Shannon_Bird', 'David_DeMott'}), 10588.83333333334)
(frozenset({'Mark_Clark', 'Shannon_Bird', 'Maria_De_Cambra'}), 10366.333333333334)
(frozenset({'Anita_Seitz', 'Shannon_Bird', 'Mike_Melvin'}), 10349.499999999996)
(frozenset({'Anita_Seitz', 'Shannon_Bird', 'Jason_Blanckaert'}), 10260.666666666666)
(frozenset({'Anita_Seitz', 'Mark_Clark', 'Maria_De_Cambra'}), 10213.999999

In [60]:
df.head()

Unnamed: 0,Mark_Clark,Mike_Melvin,Maria_De_Cambra,Shannon_Bird,Steve_Caulk,Debbie_Bergamo,Anita_Seitz,Jason_Blanckaert,David_DeMott,Nathan_Pearce
0,0,0,0,0,0,0,0,0,0,0
1,0,0,1,1,1,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,1,0,1,1,0,0
4,0,0,0,1,0,1,0,0,1,0


In [69]:
all_candidates = type(next(df.itertuples(False)))._fields

In [67]:
winner, cvrs, scores, df = tally_pav("Byers_SD_32J_Adams.csv", 4)

Plurality results in order
Tom_Thompson_III        81
Jerry_L_Sauer           75
Christopher_P_Cary      57
Julie_Smith             56
Donna_J_Sauer           54
Yvonne_M_Gerhardt       47
Jennifer_Simanovicki    36
Margaret_A_Holeman      23
dtype: int64

121 CVRs, 50 unique selections of candidates
Top 10 scores:
(frozenset({'Jerry_L_Sauer', 'Tom_Thompson_III', 'Julie_Smith', 'Donna_J_Sauer'}), 182.33333333333343)
(frozenset({'Jerry_L_Sauer', 'Christopher_P_Cary', 'Tom_Thompson_III', 'Julie_Smith'}), 182.3333333333334)
(frozenset({'Jerry_L_Sauer', 'Tom_Thompson_III', 'Julie_Smith', 'Yvonne_M_Gerhardt'}), 180.83333333333337)
(frozenset({'Jerry_L_Sauer', 'Christopher_P_Cary', 'Tom_Thompson_III', 'Yvonne_M_Gerhardt'}), 179.5833333333334)
(frozenset({'Jerry_L_Sauer', 'Christopher_P_Cary', 'Tom_Thompson_III', 'Donna_J_Sauer'}), 179.33333333333343)
(frozenset({'Jerry_L_Sauer', 'Tom_Thompson_III', 'Donna_J_Sauer', 'Yvonne_M_Gerhardt'}), 177.58333333333348)
(frozenset({'Christopher_P_Cary', 

# Timing results
Takes about 12 seconds(?) for 20 candidates, 10 winners, 121 

Memory pressure with 24 and 12, nearly 3 GB used

24 choose 12 = 2704156

In [71]:
timings = []

In [77]:
candidates = 12
winners = 6

In [78]:
possible_results = itertools.combinations(all_candidates + tuple(list(range(candidates - len(all_candidates)))), winners)

In [79]:
t1 = time.time()
scores = {}
for result in possible_results:
    resultset = frozenset(result)
    scores[resultset] = sum(utility(resultset, cvr) for cvr in cvrs)
timings.append("{} seconds for {} candidates, {} winners, {} ballots".format(time.time() - t1, candidates, winners, len(cvrs)))

In [80]:
timings

['1031.4427270889282 seconds for 20 candidates, 10 winners, 121 ballots',
 '5.157426595687866 seconds for 12 candidates, 6 winners, 12684 ballots']

In [285]:
# older results
timings

['0.005549907684326172 seconds for 8 candidates, 4 winners, 121 ballots',
 '0.0004897117614746094 seconds for 8 candidates, 4 winners, 121 ballots',
 '0.022046566009521484 seconds for 10 candidates, 5 winners, 121 ballots',
 '0.22809982299804688 seconds for 14 candidates, 7 winners, 121 ballots',
 '2.5786209106445312 seconds for 18 candidates, 9 winners, 121 ballots',
 '9.772108793258667 seconds for 20 candidates, 10 winners, 121 ballots',
 '39.48445129394531 seconds for 22 candidates, 11 winners, 121 ballots',
 '161.45553970336914 seconds for 24 candidates, 12 winners, 121 ballots',
 '4.725412607192993 seconds for 24 candidates, 5 winners, 121 ballots',
 '6.555292844772339 seconds for 30 candidates, 5 winners, 121 ballots']

In [229]:
winner = max(scores.items(), key=lambda tuple: tuple[1])

In [230]:
print("Max score: {} for {}".format(scores[winner[0]], winner))

Max score: 234.00000000000037 for (frozenset({0, 'Choice_573_1', 'Choice_564_1', 'Choice_585_1', 10, 'Choice_570_1', 'Choice_582_1', 'Choice_576_1', 'Choice_567_1', 'Choice_579_1'}), 234.00000000000037)


In [195]:
resultset

frozenset({'Choice_576_1', 'Choice_579_1', 'Choice_582_1', 'Choice_585_1'})

In [203]:
utility(resultset, c[0])

1.5