In [8]:
import os
import pandas as pd
from elections.Ballot import Ballot
from elections.Candidate import Candidate
from elections.CandidateScore import CandidateScore
from elections.Party import Party  
from elections.HeadToHeadElection import HeadToHeadElection
from elections.DiversityRunoff import DiversityRunoffElection
from elections.InstantRunoffElection import InstantRunoffElection
from CVRLoader import CVRLoader


In [9]:

def load_xlsx(directory: str) -> pd.DataFrame:
    # List all XLSX files in the directory
    xlsx_files = [f for f in os.listdir(directory) if f.endswith('.xlsx') and not f.startswith('~$')]   
    
    # Declare a dictionary that maps a candidate name to a Candidate
    candidates = {}
    ballots = []
    for xlsx_file in xlsx_files:
        print(f"Processing file: '{xlsx_file}'")
        df = pd.read_excel(os.path.join(directory, xlsx_file)) 
        candidate_columns = list(range(3, len(df.columns)))
        for index, row in df.iterrows():
            scores = []
            seen = set()
            for col in candidate_columns:
                candidate_name = row.iloc[col]
                # trim anything after a ( in the candidate name.
                candidate_name = candidate_name.split('(')[0]
                # trim any leading or trailing whitespace
                candidate_name = candidate_name.strip()
                # skip if the candidate_name is "undervote" or "overvote"
                if candidate_name not in seen and candidate_name != "undervote" and candidate_name != "overvote":
                    if candidate_name not in candidates: 
                        print(f"Creating new candidate: '{candidate_name}'")
                        candidates[candidate_name] = Candidate(candidate_name, Party("None", "none"))
                    candidate = candidates[candidate_name]
                    score = 7 - col
                    scores.append(CandidateScore(candidate, score))
                    seen.add(candidate_name)
            if len(scores) > 0:
                ballots.append(Ballot(scores))
    
    return ballots

In [10]:
from dataclasses import dataclass
@dataclass
class Results:
    irv_matches_condorcet: int
    irv_matches_diversity: int
    diversity_matches_condorcet: int
    elections: int  
    def __init__(self):
        self.irv_matches_condorcet = 0
        self.irv_matches_diversity = 0
        self.diversity_matches_condorcet = 0
        self.elections = 0  
    
    def add(self, irv_result: str, condorcet_result: str, diversity_result: str):
        if irv_result == condorcet_result:
            self.irv_matches_condorcet += 1
        if irv_result == diversity_result:
            self.irv_matches_diversity += 1
        if diversity_result == condorcet_result:
            self.diversity_matches_condorcet += 1
        self.elections += 1

    def print(self):
        print("Results summary:")
        print(f"elections {self.elections}")
        if self.elections == 0:
            print("No elections to analyze")
            return

        print(f"IRV matches Condorcet: {self.irv_matches_condorcet} {self.irv_matches_condorcet / self.elections * 100}%")
        print(f"IRV matches Diversity: {self.irv_matches_diversity} {self.irv_matches_diversity / self.elections * 100}%")
        print(f"Diversity matches Condorcet: {self.diversity_matches_condorcet} {self.diversity_matches_condorcet / self.elections * 100}%")

In [11]:

def analyze_election(results: Results, ballots: list[Ballot], diversity_threshold=.035, diversity_depth=2, debug=False):
    # create a set of the unique candidates in all of the ballots
    candidates = set()
    for ballot in ballots:
        for score in ballot.ordered_candidates:
            candidates.add(score.candidate) 

    if len(candidates) < 3:
        print("There are not enough candidates for analysis.")
        return  
    diversity_runoff = DiversityRunoffElection(ballots,
                                               set(candidates),
                                               diversity_threshold=diversity_threshold,
                                               diversity_depth=diversity_depth,
                                               debug=debug)
    condorcet = HeadToHeadElection(ballots, set(candidates))
    IRV = InstantRunoffElection(ballots, set(candidates))
    print(f"Condorcet Winner: {condorcet.result().winner().name}")
    print(f"IRV Winner: {IRV.result().winner().name}")
    print(f"Diversity Runoff Winner: {diversity_runoff.result().winner().name}")

    results.add(IRV.result().winner().name, condorcet.result().winner().name, diversity_runoff.result().winner().name)


In [None]:
cvr_cache = {}
def load_cached_cvr(directory: str) -> CVRLoader:
    if directory not in cvr_cache:
        print(f"Loading CVR from {directory}")
        cvr_cache[directory] = CVRLoader(directory)
    return cvr_cache[directory]

csv_cache = {}
def load_cached_csv(directory: str) -> list[Ballot]:
    if directory not in csv_cache:
        print(f"Loading xlsx from {directory}")
        csv_cache[directory] = load_xlsx(directory)
    return csv_cache[directory]     

In [13]:
diversity_threshold = .035
diversity_depth = 2
debug = True

def analyze_Maine(results: Results, debug=False):
    # for path in [ "testData/"]:
    for path in [ "MaineData/2018/House-2", "MaineData/2022/House-2", "MaineData/2024/House-2" ]:
        ballots = load_cached_csv(path)
        print(f"analyzing {path} Loaded {len(ballots)} ballots")
        analyze_election(results, ballots, diversity_threshold, diversity_depth, debug)

def analyze_Alaska(results: Results):
    base_dir = "AlaskaData"
    for dir in [f for f in os.listdir(base_dir) if f.startswith("CVR")]:
        cvr = load_cached_cvr(os.path.join(base_dir, dir))
        print(f"{dir}: Loaded {len(cvr.elections)} elections")
        for contest in [c for c in cvr.elections.values() if len(c.ballots) > 0 and c.number_of_ranks > 2]:
            print(f"analyzing contest {contest.name}")
            analyze_election(results, contest.ballots, diversity_threshold, diversity_depth, debug)

In [None]:

# results = Results()
# analyze_Maine(results)
# analyze_Alaska(results)
# results.print()

In [28]:
# create a set of unique candidates in all of the ballots
def find_all_candidates(ballots: list[Ballot]) -> set[Candidate]:
    candidates = set()
    for ballot in ballots:
        for score in ballot.ordered_candidates:
            candidates.add(score.candidate.name)
    return candidates

In [52]:
def find_top_candidates(ballots: list[Ballot], top_n: int, pct: float) -> set:
    # find the top_n candidates that are ranked first the most often
    candidates = find_all_candidates(ballots)
    # create a dict of borda counts for each candidate initially set to 0
    borda_counts = {c: 0 for c in candidates}

    for ballot in ballots:
        for score in ballot.ordered_candidates:
            borda_counts[score.candidate.name] += score.score
    
    # find the top_n candidates that have the highest borda counts
    top_candidates = sorted(borda_counts.items(), key=lambda x: x[1], reverse=True)[:top_n]

    # Repeat while the last place candidate has less than pct of the votes of the first place candidate
    while len(top_candidates) > 1:
        last_place_pct = top_candidates[-1][1] / top_candidates[0][1]
        if last_place_pct < pct:
            print(f"Last place candidate has {last_place_pct:.2f}% of the votes of the first place candidate, discarded.")
            top_candidates = top_candidates[:-1]
        else:
            break

    print(f"Top {top_n} candidates with highest borda counts:")
    for candidate, score in top_candidates:
        print("%30s %9d %.2f%%" % (candidate, score, score / top_candidates[0][1] * 100))

    # returns the qualifying candiates
    return set(list(zip(*top_candidates))[0])

def compute_ballot_orders(ballots: list[Ballot], top_n: int, pct: float) -> dict:
    top_candidates = find_top_candidates(ballots, top_n, pct)

    # for each ballot create a key that is the ordered list of candidates separated by semi-colons
    # increment the count for that key in the map

    ballot_orders = {}
    for ballot in ballots:
        names = [score.candidate.name for score in ballot.ordered_candidates if score.candidate.name in top_candidates] 
        if len(names) == len(top_candidates):
            # drop the last name because it is redundant
            names = names[:-1]
            
        if len(names) == 0:
            continue
        order = ";".join(names)
        ballot_orders[order] = ballot_orders.get(order, 0) + 1
    return ballot_orders

def print_ballot_orders(ballot_orders: dict):
    total_votes = sum(ballot_orders.values())
    for order, votes in sorted(ballot_orders.items(), key=lambda x: x[0], reverse=True):
        percentage = (votes / total_votes) * 100
        print(f"{order:70} {votes:7d} {percentage:5.2f}%")

def single_peakedness_test(ballot_orders: dict):
    # the voter preferences are single peaked if there is at least one candidate who is ranked 
    # last rarely.  find the candidate that is ranked last the least often and the percentage of the time
    # that is the case.

    candidates = set()
    for order in ballot_orders.keys():
        for c in order.split(";"):
            candidates.add(c)

    weak_single_peakedness_count = 0
    total_votes = sum(ballot_orders.values())
    last_place_votes = {}
    for candidate_order, votes in ballot_orders.items():
        candidate_names = [c for c in candidate_order.split(";")]
        if (len(candidate_names) == 2):
            last_place_candidate = list(candidates.difference(candidate_names))[0]
            last_place_votes[last_place_candidate] = last_place_votes.get(last_place_candidate, 0) + votes
        else:
            print("weak single peakedness test: more than 2 candidates in order")
            weak_single_peakedness_count += votes

    # find the candidate that is ranked last the least often
    min_votes = total_votes
    min_candidate = None
    for candidate, votes in last_place_votes.items():
        if votes < min_votes:
            min_votes = votes
            min_candidate = candidate

    print(f"Single peakedness test: {min_candidate:30s} " + 
          f"is ranked last {min_votes:7d} " + 
          f"times out of {total_votes:7d} ({min_votes / total_votes * 100:.2f}%)" + 
     f"weak single peaked ballots: {weak_single_peakedness_count}/{weak_single_peakedness_count / total_votes * 100:.2f}%"
          )


def test_single_peakedness():
    base_dir = "AlaskaData"
    for dir in [f for f in os.listdir(base_dir) if f.startswith("CVR")]:
        cvr = load_cached_cvr(os.path.join(base_dir, dir))
        print(f"{dir}: Loaded {len(cvr.elections)} elections")

        for contest in cvr.elections.values():
            if (contest.number_of_ranks < 3):
                continue
            candidates = find_all_candidates(contest.ballots)
            if ",".join(list(candidates)).find("Begich") != -1:
                ballot_orders = compute_ballot_orders(contest.ballots, 3, .10)
                print_ballot_orders(ballot_orders)
                single_peakedness_test(ballot_orders)

test_single_peakedness()

CVR_Export_20241130154411: Loaded 73 elections
Top 3 candidates with highest borda counts:
                  Begich, Nick  20026168 100.00%
              Peltola, Mary S.  17796292 88.87%
              Howe, John Wayne   8494771 42.42%
Peltola, Mary S.;Howe, John Wayne                                        34635 10.43%
Peltola, Mary S.;Begich, Nick                                            13398  4.03%
Peltola, Mary S.                                                        107620 32.40%
Howe, John Wayne;Peltola, Mary S.                                         2813  0.85%
Howe, John Wayne;Begich, Nick                                             4977  1.50%
Howe, John Wayne                                                          6472  1.95%
Begich, Nick;Peltola, Mary S.                                             7074  2.13%
Begich, Nick;Howe, John Wayne                                            27767  8.36%
Begich, Nick                                                            1274