In [16]:
import pandas as pd
from collections import Counter
from votekit.utils import mentions
from votekit.elections import STV
import pickle

In [None]:

overvote_set_size_by_cand = {d:{} for d in range(1,5)}

for d in range(1,5):
    print(d)
    df = pd.read_csv(f"../CVRs/raw_city_csv/Portland_D{d}_raw_from_city.csv")

    rank_columns = {i:[col for col in df.columns if f'{i}:Number' in col] for i in range(1,7)}
    all_rank_cols = [col for col_list in rank_columns.values() for col in col_list]

    voters_df = df[df[all_rank_cols].sum(axis=1) > 0].reset_index(drop=True) 

    for voter_index, row in voters_df.iterrows():
        for rank_position in range(1,7):
            num_votes_cast = row[rank_columns[rank_position]].sum()
            
            if num_votes_cast > 1:
                # find candidate names from columns
                pd_series = row[rank_columns[rank_position]]
                cast_vote_column_names = pd_series.loc[pd_series == 1].index.tolist()
                candidates = [col_name.split(":")[-2] for col_name in cast_vote_column_names]

                for c in candidates:
                    if c not in overvote_set_size_by_cand[d]:
                        overvote_set_size_by_cand[d][c] = [len(candidates)]
                    else:
                        overvote_set_size_by_cand[d][c].append(len(candidates))
    

    # to compute total number of overvotes involving a candidate, simply take the length of the list
    # or use Counter to get a breakdown of what kinds of overvotes they were in
    overvote_set_size_by_cand[d] = {c:Counter(o_list) for c, o_list in overvote_set_size_by_cand[d].items()}




In [None]:
# district level
overvote_set_size_by_cand[1]

20

In [17]:
# candidate specific
print("Peggy overvotes by set size:",overvote_set_size_by_cand[1]["Peggy Sue Owens"])

print("Peggy Sue Owens was in an overvote with 3 candidates this many times: ", overvote_set_size_by_cand[1]["Peggy Sue Owens"][3]) 

print("total number of positions that ranked Peggy wasted to overvotes: ", sum(overvote_set_size_by_cand[1]["Peggy Sue Owens"].values())) 

Peggy overvotes by set size: Counter({3: 201, 4: 158, 2: 144, 5: 75, 6: 72, 16: 58, 19: 55, 7: 36, 8: 30, 9: 22, 14: 12, 18: 12, 15: 12, 10: 11, 11: 11, 17: 8, 13: 8, 12: 4})
Peggy Sue Owens was in an overvote with 3 candidates this many times:  201
total number of positions that ranked Peggy wasted to overvotes:  929


Things to note about current computation:

- overvotes are not 1-1 with wasted votes in the sense that someone could overvote candidate A multiple times on a ballot. Even if we allowed the overvote, they would have been removed later on.

# Only computing First Place overvotes

In [None]:
first_place_overvote_set_size_by_cand = {d:{} for d in range(1,5)}

for d in range(1,5):
    print(d)
    df = pd.read_csv(f"../CVRs/raw_city_csv/Portland_D{d}_raw_from_city.csv")

    rank_columns = {i:[col for col in df.columns if f'{i}:Number' in col] for i in range(1,7)}
    all_rank_cols = [col for col_list in rank_columns.values() for col in col_list]

    voters_df = df[df[all_rank_cols].sum(axis=1) > 0].reset_index(drop=True) 

    for voter_index, row in voters_df.iterrows():
        for rank_position in [1]:
            num_votes_cast = row[rank_columns[rank_position]].sum()
            
            if num_votes_cast > 1:
                # find candidate names from columns
                pd_series = row[rank_columns[rank_position]]
                cast_vote_column_names = pd_series.loc[pd_series == 1].index.tolist()
                candidates = [col_name.split(":")[-2] for col_name in cast_vote_column_names]

                for c in candidates:
                    if c not in first_place_overvote_set_size_by_cand[d]:
                        first_place_overvote_set_size_by_cand[d][c] = [len(candidates)]
                    else:
                        first_place_overvote_set_size_by_cand[d][c].append(len(candidates))
    

    # to compute total number of overvotes involving a candidate, simply take the length of the list
    # or use Counter to get a breakdown of what kinds of overvotes they were in
    first_place_overvote_set_size_by_cand[d] = {c:Counter(o_list) for c, o_list in first_place_overvote_set_size_by_cand[d].items()}

    


In [19]:
# candidate specific
print("Peggy first place overvotes by set size:",first_place_overvote_set_size_by_cand[1]["Peggy Sue Owens"])

print("Peggy Sue Owens was in a first place overvote with 3 candidates this many times: ", first_place_overvote_set_size_by_cand[1]["Peggy Sue Owens"][3]) 

print("total number of first place votes for Peggy wasted to overvotes: ", sum(first_place_overvote_set_size_by_cand[1]["Peggy Sue Owens"].values())) 

Peggy first place overvotes by set size: Counter({3: 71, 4: 52, 2: 49, 6: 24, 19: 22, 16: 21, 5: 18, 7: 9, 9: 7, 8: 7, 10: 5, 18: 5, 11: 4, 15: 2, 13: 2, 17: 2, 12: 1, 14: 1})
Peggy Sue Owens was in a first place overvote with 3 candidates this many times:  71
total number of first place votes for Peggy wasted to overvotes:  302
