In [245]:
import pandas as pd
import numpy as np

import copy
import itertools
import os
import re

# Erroneous input (two policies ranked the same) is resolved randomly.
# Set the seed to prevent flaky voting.
np.random.seed(42)

In [3]:
working_dir = '/home/rgiordan/Documents/git_repos/Presentations/CCC_covid_vote'

In [7]:
responses = pd.read_csv(os.path.join(working_dir, 'example.csv'))

In [112]:
# Select the columns that compile the votes

#print(responses.columns)
a_col = responses.columns[3]
b_col = responses.columns[4]
c_col = responses.columns[5]
d_col = responses.columns[6]
# print('\n'.join([a_col, b_col, c_col, d_col]))

# Assert that I have selected the columns correctly
assert(re.search("Policy A", a_col))
assert(re.search("Policy B", b_col))
assert(re.search("Policy C", c_col))
assert(re.search("Policy D", d_col))

In [275]:
# Load the CSV and convert the votes to numeric ranks.

def ConvertToNumericVote(pd_col):
    # Empty rows (not voted on) are storted as nan, not strings.
    vote_str = pd_col.fillna('0').to_numpy()
    
    # Keep only the numbers.
    vote_str = np.array([ re.sub(r'[^0-9]', '', v) for v in vote_str ])
    
    return vote_str.astype('int')

original_votes = np.vstack([
    ConvertToNumericVote(responses[col]) for col in [ a_col, b_col, c_col, d_col ]
])
print(original_votes)

# Assert that the A vote is the first row as expected.
assert(np.all(ConvertToNumericVote(responses[a_col]) == original_votes[0, :]))

[[2 4 4 4 0 4 3 4 4 4 4 2 4 4 3 2 4 3 2]
 [3 3 3 3 0 2 4 3 3 2 1 1 3 2 4 1 3 4 3]
 [1 2 2 2 2 1 2 2 2 3 2 3 2 3 3 3 2 2 1]
 [4 1 1 1 1 3 1 1 1 1 3 4 1 1 1 4 1 1 4]]


In [260]:
def AssertEquivalentVotes(old_vote, new_vote, exclude=np.array([])):
    # Assert that the strict pairwise ranks of the new_vote matches the old_vote,
    # excluding the indices in exclude.
    
    # Though computatoinally wasteful, I'll use this in place of unit tests when
    # modifying the votes for ranked choice and resolving errors.
    
    assert(len(old_vote) == len(new_vote))

    num_policies = len(old_vote)
    if (len(exclude) > 0):
        assert(np.max(exclude) < num_policies)
        assert(np.min(exclude) >= 0)
    
    policies = np.setdiff1d(np.arange(num_policies), exclude)
    
    for i, j in itertools.product(policies, policies):
        if old_vote[i] < old_vote[j]:
            assert(new_vote[i] < new_vote[j])


In [309]:
#vote = votes[:, 0]
vote = np.array([0, 2, 2, 4])

def RepairVote(old_vote, verbose=False):
    # Check for valid input and correct voting errors, following the office hours document.
    # Returns a new set of rankings which is in order and has no duplicates.
    # For a description of what's going on, set verbose=True.
    
    def VerbosePrint(s):
        if verbose:
            print(s)

    vote = copy.copy(old_vote)
    VerbosePrint(f'Original vote: {old_vote}')
    num_votes = sum(vote > 0)
    rank = 1
    while rank <= num_votes:
        VerbosePrint(f'Rank {rank}.  Current vote: {vote}')
        if not np.any(vote == rank):
            # There is no policy with this rank; decrement the other votes and try again.
            VerbosePrint(f'Rank {rank} missing, decrementing other votes')
            dec_inds = np.logical_and(vote > rank, vote > 0) 
            vote[dec_inds] = vote[dec_inds] - 1
        else:
            rank_inds = np.argwhere(vote == rank).flatten()
            if len(rank_inds) > 1:
                # This rank was duplicated.  Randomly split up this rank among the
                # policies.
                VerbosePrint(f'Rank {rank} duplicated, randomly splitting indices {rank_inds}')

                num_dups = len(rank_inds)
                new_ranks = np.random.choice(num_dups, num_dups, replace=False) + rank

                # Increment other votes to make room for num_dups - 1 extra votes
                dec_inds = np.logical_and(vote > rank, vote > 0) 
                vote[dec_inds] = vote[dec_inds] + num_dups - 1
                vote[rank_inds] = new_ranks
                rank += num_dups - 1
            else:
                # This rank is okay, go to the next one.
                rank += 1

    VerbosePrint(f'Final vote: {vote}')
    AssertEquivalentVotes(old_vote, vote)
    return vote

print(original_votes[:, 14])
print(RepairVote(original_votes[:, 14], verbose=True))

# vote = copy.copy(original_votes)
# for voter in range(vote.shape[1]):
#     print(voter)
#     vote[:, voter] = RepairVote(vote[:, voter])

[3 4 3 1]
Original vote: [3 4 3 1]
Rank 1.  Current vote: [3 4 3 1]
Rank 2.  Current vote: [3 4 3 1]
Rank 2 missing, decrementing other votes
Rank 2.  Current vote: [2 3 2 1]
Rank 2 duplicated, randomly splitting indices [0 2]
Rank 3.  Current vote: [3 4 2 1]
Rank 4.  Current vote: [3 4 2 1]


NameError: name 'verbosePrint' is not defined

[3 4 3 1]


In [None]:
print(vote)
print(original_votes)

In [273]:
def FindWinnerAndLoser(votes):
    # Identify a winner (if there is one) and the loser among the votes array.
    # Return (winner, loser), where both may be arrays in the case of a tie.

    num_voters = votes.shape[1]
    num_votes = np.sum(votes == 1)
    assert(num_votes <= num_voters)
    if num_votes != num_voters:
        print(f'This many voters did not vote this round: {num_voters - num_votes}')
    vote_count = np.sum(votes == 1, axis=1)

    loser = np.argwhere(vote_count == np.min(vote_count)).flatten()
    if len(loser) > 1:
        print(f'There was a tie among losers: {loser}')
    majority = vote_count / num_votes >= 0.5
    if np.any(majority):
        winner = np.argwhere(majority).flatten()
        if (len(winner) > 1):
            print(f'There was a tie among winners: {winner}.')
        else:
            print(f'The winner was {winner}')
        return (winner, loser)
    else:
        print('There was no majority')
        return (-1, loser)
    

def RemovePolicy(votes, drop_index):
    new_votes = copy.copy(votes)
    drop_index = 3
    increment_cols = np.argwhere(new_votes[drop_index, :] == 1).flatten()
    new_votes[:, increment_cols] = votes[:, increment_cols] - 1
    new_votes[new_votes < 0] = 0
    new_votes[drop_index, :] = 0
    
    for voter in range(votes.shape[1]):
        AssertEquivalentVotes(votes[:, voter], new_votes[:, voter], exclude=[drop_index])
    return new_votes

print('\nOriginal:')
FindWinnerAndLoser(votes)

print('\nThree stripped:')
FindWinnerAndLoser(RemovePolicy(votes, 3))


Original:
The winner was [3]

Three stripped:
This many voters did not vote this round: 1
There was a tie among losers: [0 3]
The winner was [2]


(array([2]), array([0, 3]))

In [263]:
votes
print(np.sum(votes == 1))
print(np.sum(RemovePolicy(votes, 3) == 1))

19
18


In [264]:
print(votes)

print(RemovePolicy(votes, 3))

[[2 4 4 4 0 4 3 4 4 4 4 2 4 4 3 2 4 3 2]
 [3 3 3 3 0 2 4 3 3 2 1 1 3 2 4 1 3 4 3]
 [1 2 2 2 2 1 2 2 2 3 2 3 2 3 3 3 2 2 1]
 [4 1 1 1 1 3 1 1 1 1 3 4 1 1 1 4 1 1 4]]
[[2 3 3 3 0 4 2 3 3 3 4 2 3 3 2 2 3 2 2]
 [3 2 2 2 0 2 3 2 2 1 1 1 2 1 3 1 2 3 3]
 [1 1 1 1 1 1 1 1 1 2 2 3 1 2 2 3 1 1 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
