In [None]:
import pandas as pd
import numpy as np
import glob

In [None]:
df = pd.read_csv('aggregated_voting_NYCMayor.csv')
df.head()

Unnamed: 0,1st,2nd,3rd,4th,5th,count
0,217572,Null,Null,Null,Null,64274
1,217796,Null,Null,Null,Null,20566
2,219469,Null,Null,Null,Null,12771
3,219978,Null,Null,Null,Null,10258
4,217572,219469,Null,Null,Null,9833


In [None]:
df_clean = df[~df.eq("undervote").any(axis=1)]
df_clean = df[~df.eq("overvote").any(axis=1)]
df_clean = df[~df.eq("Null").any(axis=1)]

In [None]:
df_clean.to_csv('cleaned_MayorData.csv', index=False)

In [None]:
df_clean.head()

Unnamed: 0,1st,2nd,3rd,4th,5th,count
90,219469,219978,217605,218491,221183,868
97,219469,219978,218491,217605,221183,827
98,219978,219469,217605,221183,217796,811
109,219469,219978,217605,221183,218491,724
123,219469,219978,217605,218491,217796,656


In [None]:
df = pd.read_csv('cleaned_MayorData.csv')

In [None]:
first_column = df.columns[0]

number_counts = df[first_column].value_counts()

print(f"Number of unique values in the first column: {len(number_counts)}")
print(number_counts)

Number of unique values in the first column: 14
1st
217572      9140
219469      7784
217796      7277
219978      6177
217605      4205
218491      4070
221183      3129
218127      3032
221141      2071
217654      2021
218117      1160
218922      1146
221458       969
Write-in     328
Name: count, dtype: int64


In [42]:
ballots_df = pd.read_csv('cleaned_MayorData.csv')


In [43]:

# Convert each row into a tuple: (preferences list, weight)
ballots = [(list(row[:-1]), row[-1]) for row in ballots_df.values.tolist()]

# Identify all unique candidates (from the ranked preferences)
candidates = set()
for ballot, weight in ballots:
    for candidate in ballot:
        candidates.add(candidate)
candidates = list(candidates)

def count_votes(ballots, candidates):
    """
    Count the votes for the current round using weighted ballots.
    Each ballot contributes its weight to the vote of the highest-ranked candidate
    that hasn't been eliminated.
    """
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break  # Only count the top remaining candidate.
    return vote_counts

round_num = 1
while True:
    # Tally the weighted votes for this round.
    vote_counts = count_votes(ballots, candidates)
    total_votes = sum(vote_counts.values())
    print(f"Round {round_num} vote counts: {vote_counts} (Total active votes: {total_votes})")

    # Check if any candidate has a majority (>50% of active votes)
    for candidate, count in vote_counts.items():
        if count > total_votes / 2:
            print(f"\nWinner: {candidate} with {count} votes (majority of {total_votes} active votes)!")
            winner = candidate
            break
    else:
        # No candidate has a majority; eliminate candidate(s) with the fewest votes.
        min_votes = min(vote_counts.values())
        eliminated = [candidate for candidate, count in vote_counts.items() if count == min_votes]
        print(f"Eliminated in round {round_num}: {eliminated}\n")
        
        # Remove the eliminated candidate(s) from further consideration.
        for candidate in eliminated:
            candidates.remove(candidate)
        
        # If no candidates remain or all remaining are tied, declare a tie.
        if not candidates or len(vote_counts) == len(eliminated):
            print("Election resulted in a tie among the remaining candidates:")
            print(vote_counts)
            break

        round_num += 1
        continue
    break

Round 1 vote counts: {'221183': 12217, '218922': 1400, '218117': 2290, '219978': 91940, '217654': 3647, '221458': 1124, '218127': 11863, '217796': 44783, '221141': 3901, '217572': 120901, '217605': 23057, '218491': 13480, 'Write-in': 356, '219469': 103825} (Total active votes: 434784)
Eliminated in round 1: ['Write-in']

Round 2 vote counts: {'221183': 12226, '218922': 1414, '218117': 2315, '219978': 91985, '217654': 3655, '221458': 1135, '218127': 11885, '217796': 44847, '221141': 3910, '217572': 120957, '217605': 23092, '218491': 13505, '219469': 103858} (Total active votes: 434784)
Eliminated in round 2: ['221458']

Round 3 vote counts: {'221183': 12304, '218922': 1479, '218117': 2354, '219978': 92053, '217654': 3697, '218127': 11984, '217796': 44964, '221141': 3931, '217572': 121230, '217605': 23149, '218491': 13585, '219469': 104054} (Total active votes: 434784)
Eliminated in round 3: ['218922']

Round 4 vote counts: {'221183': 12388, '218117': 2402, '219978': 92232, '217654': 373

In [35]:
#IRV SIMULATION
import random 
ballots = [(list(row[:-1]), row[-1]) for row in ballots_df.values.tolist()]

# Identify all unique candidates from the ranked preferences.
original_candidates = set()
for ballot, weight in ballots:
    for candidate in ballot:
        original_candidates.add(candidate)
original_candidates = list(original_candidates)

def count_votes(ballots, candidates):
    """
    Count the weighted votes for the current round.
    For each ballot, the vote is assigned to the highest-ranked candidate
    that hasn't been eliminated.
    """
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break  # Only count the top remaining candidate.
    return vote_counts

def simulate_irv(ballots, original_candidates):
    """
    Simulate an IRV election on the given ballots.
    Returns the winning candidate (or None if there's a tie).
    """
    # Work on a copy of the candidate list.
    candidates = list(original_candidates)
    round_num = 1
    while True:
        vote_counts = count_votes(ballots, candidates)
        total_votes = sum(vote_counts.values())
        print(f"Round {round_num} vote counts: {vote_counts} (Total active votes: {total_votes})")
        
        # Check if any candidate has a majority (>50%).
        for candidate, count in vote_counts.items():
            if count > total_votes / 2:
                print(f"\nWinner: {candidate} with {count} votes (majority of {total_votes} active votes)!")
                return candidate
        
        # No majority found; eliminate candidate(s) with the fewest votes.
        min_votes = min(vote_counts.values())
        eliminated = [candidate for candidate, count in vote_counts.items() if count == min_votes]
        print(f"Eliminated in round {round_num}: {eliminated}\n")
        
        # Remove eliminated candidate(s) from further consideration.
        for candidate in eliminated:
            candidates.remove(candidate)
        
        # If no candidates remain or all remaining candidates are tied, declare a tie.
        if not candidates or len(vote_counts) == len(eliminated):
            print("Election resulted in a tie among the remaining candidates:")
            print(vote_counts)
            return None
        
        round_num += 1

# ------------------------------
# Run simulation with full ballots
# ------------------------------
print("Running IRV simulation with full ballots:")
winner_full = simulate_irv(ballots, original_candidates)

# ------------------------------
# Randomly eliminate an X percent of the ballots and simulate again.
# ------------------------------
elimination_percent = 0.1  # Change this value for a different percentage (e.g., 0.1 for 10%).
num_ballots = len(ballots)
num_to_remove = int(num_ballots * elimination_percent)

# Randomly select ballots to remove. The remaining ballots will be used for the simulation.
ballots_reduced = random.sample(ballots, num_ballots - num_to_remove)

print("\nRunning IRV simulation with reduced ballots (after randomly eliminating {:.0f}% of ballots):".format(elimination_percent * 100))
winner_reduced = simulate_irv(ballots_reduced, original_candidates)

# ------------------------------
# Compare the results.
# ------------------------------
print("\nComparison of Results:")
print("Winner with full ballots:", winner_full)
print("Winner with reduced ballots:", winner_reduced)

Running IRV simulation with full ballots:
Round 1 vote counts: {'221183': 12217, '218922': 1400, '218117': 2290, '219978': 91940, '217654': 3647, '221458': 1124, '218127': 11863, '217796': 44783, '221141': 3901, '217572': 120901, '217605': 23057, '218491': 13480, 'Write-in': 356, '219469': 103825} (Total active votes: 434784)
Eliminated in round 1: ['Write-in']

Round 2 vote counts: {'221183': 12226, '218922': 1414, '218117': 2315, '219978': 91985, '217654': 3655, '221458': 1135, '218127': 11885, '217796': 44847, '221141': 3910, '217572': 120957, '217605': 23092, '218491': 13505, '219469': 103858} (Total active votes: 434784)
Eliminated in round 2: ['221458']

Round 3 vote counts: {'221183': 12304, '218922': 1479, '218117': 2354, '219978': 92053, '217654': 3697, '218127': 11984, '217796': 44964, '221141': 3931, '217572': 121230, '217605': 23149, '218491': 13585, '219469': 104054} (Total active votes: 434784)
Eliminated in round 3: ['218922']

Round 4 vote counts: {'221183': 12388, '218

In [66]:
import random
import pandas as pd

# ------------------------------
# Load and clean ballots
# ------------------------------

ballots_df = pd.read_csv('aggregated_voting_NYCMayor.csv')

# Candidates to ignore
IGNORED_CANDIDATES = {"Null", "Write-in"}

# Clean ballots and remove ignored candidates
ballots = []
for row in ballots_df.values.tolist():
    ranked = [c for c in row[:-1] if c not in IGNORED_CANDIDATES]
    weight = row[-1]
    ballots.append((ranked, weight))

# Get list of unique valid candidates
original_candidates = list(set(
    c for ballot, _ in ballots for c in ballot if c not in IGNORED_CANDIDATES
))

# ------------------------------
# IRV
# ------------------------------

def count_votes_irv(ballots, candidates):
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break
    return vote_counts

def simulate_irv(ballots, original_candidates):
    candidates = list(original_candidates)
    elimination_order = []
    while True:
        vote_counts = count_votes_irv(ballots, candidates)
        total_votes = sum(vote_counts.values())

        for candidate, count in vote_counts.items():
            if count > total_votes / 2:
                if candidate not in elimination_order:
                    elimination_order.append(candidate)
                break

        min_votes = min(vote_counts.values())
        eliminated = [c for c, count in vote_counts.items() if count == min_votes]

        for c in eliminated:
            if c not in elimination_order:
                elimination_order.insert(0, c)
            if c in candidates:
                candidates.remove(c)

        if not candidates or len(vote_counts) == len(eliminated):
            break

    top_5 = elimination_order[::-1][:5]
    print(f"IRV Top 5: {top_5}")
    return top_5

# ------------------------------
# Copeland
# ------------------------------

def simulate_copeland(ballots, candidates):
    pairwise_wins = {c: 0 for c in candidates}
    for c1 in candidates:
        for c2 in candidates:
            if c1 == c2:
                continue
            c1_wins, c2_wins = 0, 0
            for ballot, weight in ballots:
                ranks = {name: i for i, name in enumerate(ballot) if name in candidates}
                if c1 in ranks and c2 in ranks:
                    if ranks[c1] < ranks[c2]:
                        c1_wins += weight
                    elif ranks[c2] < ranks[c1]:
                        c2_wins += weight
            if c1_wins > c2_wins:
                pairwise_wins[c1] += 1
            elif c2_wins > c1_wins:
                pairwise_wins[c1] -= 1

    sorted_scores = sorted(pairwise_wins.items(), key=lambda x: x[1], reverse=True)
    top_5 = [c for c, s in sorted_scores[:5]]
    print(f"Copeland Top 5: {top_5}")
    return top_5

# ------------------------------
# Borda
# ------------------------------

def simulate_borda(ballots, candidates):
    scores = {c: 0 for c in candidates}
    for ballot, weight in ballots:
        ranked = [c for c in ballot if c in candidates]
        length = len(ranked)
        for i, c in enumerate(ranked):
            scores[c] += weight * (length - i - 1)
    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    top_5 = [c for c, s in sorted_scores[:5]]
    print(f"Borda Top 5: {top_5}")
    return top_5

# ------------------------------
# Plurality
# ------------------------------

def simulate_plurality(ballots, candidates):
    scores = {c: 0 for c in candidates}
    for ballot, weight in ballots:
        for c in ballot:
            if c in candidates:
                scores[c] += weight
                break
    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    top_5 = [c for c, s in sorted_scores[:5]]
    print(f"Plurality Top 5: {top_5}")
    return top_5

# ------------------------------
# Plurality Veto
# ------------------------------

def simulate_plurality_veto(ballots, candidates):
    scores = {c: 0 for c in candidates}
    for ballot, weight in ballots:
        ranked = [c for c in ballot if c in candidates]
        if len(ranked) > 1:
            vetoed = ranked[-1]
            for c in ranked[:-1]:
                scores[c] += weight
        elif len(ranked) == 1:
            scores[ranked[0]] += weight
    sorted_scores = sorted(scores.items(), key=lambda x: x[1], reverse=True)
    top_5 = [c for c, s in sorted_scores[:5]]
    print(f"Plurality Veto Top 5: {top_5}")
    return top_5

# ------------------------------
# Summary Function
# ------------------------------

def run_all_methods(ballots, candidates):
    print("\n===== Summary of Voting Methods =====")
    results = {
        "IRV": simulate_irv(ballots, candidates),
        "Copeland": simulate_copeland(ballots, candidates),
        "Borda": simulate_borda(ballots, candidates),
        "Plurality": simulate_plurality(ballots, candidates),
        "Plurality Veto": simulate_plurality_veto(ballots, candidates),
    }
    print("\n=== Final Summary ===")
    for method, top5 in results.items():
        print(f"{method} Top 5: {top5}")
    return results

# ------------------------------
# Run Full and Reduced Ballot Scenarios
# ------------------------------

print("Running election simulations with full ballots:")
full_results = run_all_methods(ballots, original_candidates)

elimination_percent = 0.1
num_ballots = len(ballots)
num_to_remove = int(num_ballots * elimination_percent)
ballots_reduced = random.sample(ballots, num_ballots - num_to_remove)

print("\nRunning election simulations with reduced ballots ({:.0f}% removed):".format(elimination_percent * 100))
reduced_results = run_all_methods(ballots_reduced, original_candidates)

# ------------------------------
# Compare Results
# ------------------------------

print("\n=== Comparison of Results ===")
for method in full_results:
    print(f"{method} - Full: {full_results[method]}, Reduced: {reduced_results[method]}")


Running election simulations with full ballots:

===== Summary of Voting Methods =====
IRV Top 5: ['217572', '221458', '218922', '218117', '221141']
Copeland Top 5: ['217572', '219469', '219978', '217796', '217605']
Borda Top 5: ['219469', '219978', '217572', '217796', '217605']
Plurality Top 5: ['217572', '219469', '219978', '217796', '217605']
Plurality Veto Top 5: ['217572', '219978', '219469', '217796', '217605']

=== Final Summary ===
IRV Top 5: ['217572', '221458', '218922', '218117', '221141']
Copeland Top 5: ['217572', '219469', '219978', '217796', '217605']
Borda Top 5: ['219469', '219978', '217572', '217796', '217605']
Plurality Top 5: ['217572', '219469', '219978', '217796', '217605']
Plurality Veto Top 5: ['217572', '219978', '219469', '217796', '217605']

Running election simulations with reduced ballots (10% removed):

===== Summary of Voting Methods =====
IRV Top 5: ['219978', '221458', '218922', '218117', '217654']
Copeland Top 5: ['217572', '219469', '219978', '217796'

In [None]:
import pandas as pd
import random  # Make sure to import the random module

# Convert each row into a tuple: (preferences list, weight)
ballots = [(list(row[:-1]), row[-1]) for row in ballots_df.values.tolist()]

# Identify all unique candidates from the ranked preferences.
original_candidates = set()
for ballot, weight in ballots:
    for candidate in ballot:
        original_candidates.add(candidate)
original_candidates = list(original_candidates)

# ------------------------------
# Define helper functions for IRV simulation
# ------------------------------
def count_votes(ballots, candidates):
    """
    Count the weighted votes for the current round.
    Each ballot contributes its weight to the vote of the highest-ranked candidate
    that hasn't been eliminated.
    """
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break  # Only count the top remaining candidate.
    return vote_counts

def simulate_irv(ballots, original_candidates, verbose=False):
    """
    Simulate an IRV election on the given ballots.
    Returns the winning candidate (or None if there's a tie).
    
    If verbose is True, prints detailed round-by-round information.
    """
    # Work on a copy of the candidate list.
    candidates = list(original_candidates)
    round_num = 1
    while True:
        vote_counts = count_votes(ballots, candidates)
        total_votes = sum(vote_counts.values())
        if verbose:
            print(f"Round {round_num} vote counts: {vote_counts} (Total active votes: {total_votes})")
        
        # Check if any candidate has a majority (>50% of active votes)
        for candidate, count in vote_counts.items():
            if count > total_votes / 2:
                if verbose:
                    print(f"\nWinner: {candidate} with {count} votes (majority of {total_votes} active votes)!")
                return candidate
        
        # No majority found; eliminate candidate(s) with the fewest votes.
        min_votes = min(vote_counts.values())
        eliminated = [candidate for candidate, count in vote_counts.items() if count == min_votes]
        if verbose:
            print(f"Eliminated in round {round_num}: {eliminated}\n")
        
        # Remove the eliminated candidate(s) from further consideration.
        for candidate in eliminated:
            candidates.remove(candidate)
        
        # If no candidates remain or all remaining candidates are tied, declare a tie.
        if not candidates or len(vote_counts) == len(eliminated):
            if verbose:
                print("Election resulted in a tie among the remaining candidates:")
                print(vote_counts)
            return None
        
        round_num += 1

# ------------------------------
# Run multiple simulations and record the results
# ------------------------------
num_simulations = 1000  # Set this to the desired number of simulations.
elimination_percent = 0.9  # Percent (e.g., 0.1 for 10%) of ballots to eliminate for the reduced simulation.

results = []  # List to store each simulation's results.

for sim in range(num_simulations):
    print(f"\nRunning simulation {sim + 1}...")
    
    # Run simulation with full ballots.
    winner_full = simulate_irv(ballots, original_candidates, verbose=False)
    
    # For the reduced simulation, randomly eliminate the specified percent of ballots.
    num_ballots = len(ballots)
    num_to_remove = int(num_ballots * elimination_percent)
    ballots_reduced = random.sample(ballots, num_ballots - num_to_remove)
    winner_reduced = simulate_irv(ballots_reduced, original_candidates, verbose=False)
    
    # Record the winners.
    results.append({
        'simulation': sim + 1,
        'winner_full': winner_full,
        'winner_reduced': winner_reduced
    })

# ------------------------------
# Save the simulation results to a CSV file
# ------------------------------
results_df = pd.DataFrame(results)
results_df.to_csv('simulation_results_10.csv', index=False)
print("\nSimulation results saved to simulation_results_10.csv")


Running simulation 1...

Running simulation 2...

Running simulation 3...

Running simulation 4...

Running simulation 5...

Running simulation 6...

Running simulation 7...

Running simulation 8...

Running simulation 9...

Running simulation 10...

Running simulation 11...

Running simulation 12...

Running simulation 13...

Running simulation 14...

Running simulation 15...

Running simulation 16...

Running simulation 17...

Running simulation 18...

Running simulation 19...

Running simulation 20...

Running simulation 21...

Running simulation 22...

Running simulation 23...

Running simulation 24...

Running simulation 25...

Running simulation 26...

Running simulation 27...

Running simulation 28...

Running simulation 29...

Running simulation 30...

Running simulation 31...

Running simulation 32...

Running simulation 33...

Running simulation 34...

Running simulation 35...

Running simulation 36...

Running simulation 37...

Running simulation 38...

Running simulation 3

In [None]:
ballots = [(list(row[:3]), row[-1]) for row in ballots_df.values.tolist()]

# Identify all unique candidates from the first 3 ranked preferences.
original_candidates = set()
for ballot, weight in ballots:
    for candidate in ballot:
        original_candidates.add(candidate)
original_candidates = list(original_candidates)

# ------------------------------
# Define helper functions for IRV simulation
# ------------------------------
def count_votes(ballots, candidates):
    """
    Count the weighted votes for the current round.
    Each ballot contributes its weight to the vote of the highest-ranked candidate
    that hasn't been eliminated.
    """
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break  # Only count the top remaining candidate.
    return vote_counts

def simulate_irv(ballots, original_candidates):
    """
    Simulate an IRV election on the given ballots.
    Returns the winning candidate (or None if there's a tie).
    """
    # Work on a copy of the candidate list.
    candidates = list(original_candidates)
    round_num = 1
    while True:
        vote_counts = count_votes(ballots, candidates)
        total_votes = sum(vote_counts.values())
        print(f"Round {round_num} vote counts: {vote_counts} (Total active votes: {total_votes})")
        
        # Check if any candidate has a majority (>50% of active votes)
        for candidate, count in vote_counts.items():
            if count > total_votes / 2:
                print(f"\nWinner: {candidate} with {count} votes (majority of {total_votes} active votes)!")
                return candidate
        
        # No majority found; eliminate candidate(s) with the fewest votes.
        min_votes = min(vote_counts.values())
        eliminated = [candidate for candidate, count in vote_counts.items() if count == min_votes]
        print(f"Eliminated in round {round_num}: {eliminated}\n")
        
        # Remove the eliminated candidate(s) from further consideration.
        for candidate in eliminated:
            candidates.remove(candidate)
        
        # If no candidates remain or all remaining candidates are tied, declare a tie.
        if not candidates or len(vote_counts) == len(eliminated):
            print("Election resulted in a tie among the remaining candidates:")
            print(vote_counts)
            return None
        
        round_num += 1

# ------------------------------
# Run simulation with truncated ballots
# ------------------------------
print("Running IRV simulation with truncated ballots (top 3 preferences only):")
winner = simulate_irv(ballots, original_candidates)
print("\nWinner with truncated ballots:", winner)

Running IRV simulation with truncated ballots (top 3 preferences only):
Round 1 vote counts: {'218491': 13480, '217605': 23057, '219978': 91940, '217654': 3647, '217796': 44783, '218117': 2290, '221141': 3901, '218922': 1400, '221458': 1124, 'Write-in': 356, '218127': 11863, '217572': 120901, '219469': 103825, '221183': 12217} (Total active votes: 434784)
Eliminated in round 1: ['Write-in']

Round 2 vote counts: {'218491': 13505, '217605': 23092, '219978': 91985, '217654': 3655, '217796': 44847, '218117': 2315, '221141': 3910, '218922': 1414, '221458': 1135, '218127': 11885, '217572': 120957, '219469': 103858, '221183': 12226} (Total active votes: 434784)
Eliminated in round 2: ['221458']

Round 3 vote counts: {'218491': 13585, '217605': 23149, '219978': 92053, '217654': 3697, '217796': 44964, '218117': 2354, '221141': 3931, '218922': 1479, '218127': 11984, '217572': 121230, '219469': 104054, '221183': 12304} (Total active votes: 434784)
Eliminated in round 3: ['218922']

Round 4 vote 

In [25]:
import pandas as pd
import random

# ------------------------------
# Set the number of ranked candidates to consider (Change this value as needed)
# ------------------------------
num_ranked_candidates = 5  # Change this value to the desired number of ranked candidates

# ------------------------------
# Load and prepare the ballots data
# ------------------------------
ballots_df = pd.read_csv('exact_top5_ballots.csv')  # Replace with the actual file path

# Convert each row into a tuple: (limited candidate preferences, weight)
ballots = [(list(row[:num_ranked_candidates]), row[-1]) for row in ballots_df.values.tolist()]

# Identify all unique candidates from the selected number of ranked preferences
original_candidates = set()
for ballot, weight in ballots:
    for candidate in ballot:
        original_candidates.add(candidate)
original_candidates = list(original_candidates)

# ------------------------------
# Define helper functions for IRV simulation
# ------------------------------
def count_votes(ballots, candidates):
    """
    Count the weighted votes for the current round.
    Each ballot contributes its weight to the vote of the highest-ranked candidate
    that hasn't been eliminated.
    """
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break  # Only count the top remaining candidate.
    return vote_counts

def simulate_irv(ballots, original_candidates):
    """
    Simulate an IRV election on the given ballots.
    Returns the winning candidate (or None if there's a tie).
    """
    # Work on a copy of the candidate list.
    candidates = list(original_candidates)
    round_num = 1
    while True:
        vote_counts = count_votes(ballots, candidates)
        total_votes = sum(vote_counts.values())
        print(f"Round {round_num} vote counts: {vote_counts} (Total active votes: {total_votes})")
        
        # Check if any candidate has a majority (>50% of active votes)
        for candidate, count in vote_counts.items():
            if count > total_votes / 2:
                print(f"\nWinner: {candidate} with {count} votes (majority of {total_votes} active votes)!")
                return candidate
        
        # No majority found; eliminate candidate(s) with the fewest votes.
        min_votes = min(vote_counts.values())
        eliminated = [candidate for candidate, count in vote_counts.items() if count == min_votes]
        print(f"Eliminated in round {round_num}: {eliminated}\n")
        
        # Remove the eliminated candidate(s) from further consideration.
        for candidate in eliminated:
            candidates.remove(candidate)
        
        # If no candidates remain or all remaining candidates are tied, declare a tie.
        if not candidates or len(vote_counts) == len(eliminated):
            print("Election resulted in a tie among the remaining candidates:")
            print(vote_counts)
            return None
        
        round_num += 1

# ------------------------------
# Run simulation with predefined ranked candidates
# ------------------------------
print(f"Running IRV simulation with top {num_ranked_candidates} preferences only:")
winner = simulate_irv(ballots, original_candidates)
print(f"\nWinner with top {num_ranked_candidates} ranked candidates:", winner)

Running IRV simulation with top 5 preferences only:
Round 1 vote counts: {217796: 1311, 217572: 2962, 219978: 1981, 219469: 2842, 218491: 477} (Total active votes: 9573)
Eliminated in round 1: [218491]

Round 2 vote counts: {217796: 1360, 217572: 3059, 219978: 2111, 219469: 3043} (Total active votes: 9573)
Eliminated in round 2: [217796]

Round 3 vote counts: {217572: 3518, 219978: 2638, 219469: 3417} (Total active votes: 9573)
Eliminated in round 3: [219978]

Round 4 vote counts: {217572: 4589, 219469: 4984} (Total active votes: 9573)

Winner: 219469 with 4984 votes (majority of 9573 active votes)!

Winner with top 5 ranked candidates: 219469


In [None]:
import pandas as pd
ballots_df = pd.read_csv('top5_ballots_named')
ballots = [(list(row[:-1]), row[-1]) for row in ballots_df.values.tolist()]


In [62]:
#single peaked data set 

import pandas as pd

# Candidate ID to name mapping
id_to_name = {
    "217572": "Adams (#1)",
    "217605": "Stringer (#5)",
    "219469": "Wiley (#3)",
    "219978": "Garcia (#4)",
    "217796": "Yang (#2)"
}

# Define the top 5 candidates as a set (IDs only)
top5_candidates = set(id_to_name.keys())

# Load the cleaned Mayor data CSV
ballots_df = pd.read_csv('cleaned_MayorData.csv')

# Convert each row into (preferences, weight)
ballots = [(list(row[:-1]), row[-1]) for row in ballots_df.values.tolist()]

# Filter ballots that have exactly the top 5 candidates (no more, no less)
exact_top5_ballots = [(prefs, weight) for (prefs, weight) in ballots if set(prefs) == top5_candidates]

# Calculate and print total weight of exact top 5 ballots
total_weight_top5 = sum(weight for _, weight in exact_top5_ballots)
print("Total weight of exact top 5 ballots:", total_weight_top5)
print("Number of ballots with exactly the top 5 candidates:", len(exact_top5_ballots))

def has_bad_adjacency(prefs):
    adams = "217572"     # 1
    garcia = "219978"    # 4
    yang = "217796"      # 2
    wiley = "219469"     # 3
    stringer = "217605"   # 5

    # --- Triplet rule first ---
   
    triplet_constraints = [
        {adams, yang, wiley},
        {yang, wiley, garcia},
        {wiley, garcia, stringer}
    ]

    top3 = prefs[:3]
    top3_set = set(top3)

    # First rule: top 3 must match a triplet (in any order)
    if top3_set not in triplet_constraints:
        return True

    # Triplet "middle" rule: middle candidate must not be ranked 3rd
    triplet_tuples = [
        (adams, yang, wiley),
        (yang, wiley, garcia),
        (wiley, garcia, stringer)
    ]
    for a, b, c in triplet_tuples:
        if {a, b, c} == top3_set:
            if prefs[2] == b:
                return True



    return False




# Apply filtering
filtered_ballots = [
    (prefs, weight)
    for (prefs, weight) in exact_top5_ballots
    if not has_bad_adjacency(prefs)
]

# Calculate and print total weight after filtering
total_weight_filtered = sum(weight for _, weight in filtered_ballots)
print("Total weight after removing bad adjacencies:", total_weight_filtered)
print("Number of ballots after removing bad adjacencies:", len(filtered_ballots))

# Prepare output data with names instead of IDs
data = []
for prefs, weight in filtered_ballots:
    row = {
        "Candidate_1": id_to_name[prefs[0]],
        "Candidate_2": id_to_name[prefs[1]],
        "Candidate_3": id_to_name[prefs[2]],
        "Candidate_4": id_to_name[prefs[3]],
        "Candidate_5": id_to_name[prefs[4]],
        "Weight": weight
    }
    data.append(row)

# Create a DataFrame and save to CSV
df_filtered = pd.DataFrame(data)
output_file = "filtered_top5_ballots.csv"
df_filtered.to_csv(output_file, index=False)
print(f"Filtered ballots saved to {output_file}")


Total weight of exact top 5 ballots: 17295
Number of ballots with exactly the top 5 candidates: 120
Total weight after removing bad adjacencies: 3896
Number of ballots after removing bad adjacencies: 24
Filtered ballots saved to filtered_top5_ballots.csv


In [60]:
#single peaked ballots, order Adams, Garica, Yang, Wiley, Morales
import pandas as pd

# Candidate ID to name mapping
id_to_name = {
    "217572": "Adams (#1)",
    "218491": "Morales (#5)",
    "219469": "Wiley (#4)",
    "219978": "Garcia (#2)",
    "217796": "Yang (#3)"
}

allowed_adjacent_pairs = {
    ("217572", "219978"),  # Adams - Garcia (1-2)
    ("219978", "217572"),  # Garcia - Adams (2-1)
    ("219978", "217796"),  # Garcia - Yang (2-3)
    ("217796", "219978"),  # Yang - Garcia (3-2)
    ("217796", "219469"),  # Yang - Wiley (3-4)
    ("219469", "217796"),  # Wiley - Yang (4-3)
    ("219469", "218491"),  # Wiley - Morales (4-5)
    ("218491", "219469"),  # Morales - Wiley (5-4)
}

id_to_rank = {
        "217572": 1,  # Adams
        "219978": 2,  # Garcia
        "217796": 3,  # Yang
        "219469": 4,  # Wiley
        "218491": 5   # Morales
     }  


top5_ids = set(id_to_name.keys())



# Load ballots from full data
df = pd.read_csv("aggregated_voting_NYCMayor.csv")

# Assumes last column is "Weight"
ranking_columns = df.columns[:-1]
weight_column = df.columns[-1]

# Convert to (prefs, weight)
ballots = []
for _, row in df.iterrows():
    
    raw_prefs = [
    str(x).strip() if pd.notna(x) and str(x).strip().lower() != 'null' else ''
    for x in row[ranking_columns]
    ]

    weight = row[weight_column]

    # Extract which of the top5 are ranked
    top5_ranked = [cand for cand in raw_prefs if cand in top5_ids]

    # Case 1: exactly one top5 candidate ranked, and all others are ''
    if len(top5_ranked) == 1 and raw_prefs[0] in top5_ids and all(p == '' for p in raw_prefs[1:]):
        prefs = [raw_prefs[0]] + [''] * 4
        ballots.append((prefs, weight))
    # Case 2: exactly two top5 candidates ranked, in an allowed adjacent pair
    elif (
        len(top5_ranked) == 2 and
        raw_prefs[0] in top5_ids and
        raw_prefs[1] in top5_ids and
        all(p == '' for p in raw_prefs[2:]) and
        (raw_prefs[0], raw_prefs[1]) in allowed_adjacent_pairs
    ):
        prefs = raw_prefs[:2] + [''] * 3
        ballots.append((prefs, weight))


    # Case 3: exactly the full top 5 candidates are present, no more, no less
    elif set(top5_ranked) == top5_ids and len(top5_ranked) == 5:
        prefs = [p for p in raw_prefs if p in top5_ids]  # exclude other candidates
        ballots.append((prefs, weight))

    # Case 4: exactly 3 top-5 candidates in positions 1-3, last two are blank/null, and it passes triplet rule
    elif (
        len(top5_ranked) == 3 and
        all(p in top5_ids for p in raw_prefs[:3]) and
        all(p == '' for p in raw_prefs[3:]) and
        not fails_triplet_rule(raw_prefs[:5])  # pass full prefs to rule
    ):
        prefs = raw_prefs[:3] + [''] * 2
        ballots.append((prefs, weight))

 
    # Case 5: 4 top-5s in positions 1–4, 5th is blank/null, and they form an ordered group of 4 consecutive ranks
    elif (
        len(top5_ranked) == 4 and
        all(p in top5_ids for p in raw_prefs[:4]) and
        raw_prefs[4] == ''
    ):
        # Convert candidate IDs to rank numbers
        ranks = sorted([id_to_rank[p] for p in raw_prefs[:4]])
        if all(ranks[i] + 1 == ranks[i + 1] for i in range(3)):  # check for consecutive
            prefs = raw_prefs[:4] + ['']
            ballots.append((prefs, weight))





# --- Triplet rule ---
def fails_triplet_rule(prefs):
    adams = "217572"
    garcia = "219978"
    yang = "217796"
    wiley = "219469"
    morales = "218491"

    triplet_sets = [
        {adams, garcia, yang},
        {garcia, yang, wiley},
        {yang, wiley, morales}
    ]
    triplet_tuples = [
        (adams, garcia, yang),
        (garcia, yang, wiley),
        (yang, wiley, morales)
    ]

    top5_in_prefs = [p for p in prefs if p in top5_ids]
    if len(top5_in_prefs) < 3:
        return False

    top3 = prefs[:3]
    if any(x == '' for x in top3):  # skip triplet rule for single-candidate ballots
        return False

    top3_set = set(top3)

    if top3_set not in triplet_sets:
        return True  # doesn't match any valid triplet

    for a, b, c in triplet_tuples:
        if {a, b, c} == top3_set:
            if prefs[2] == b:
                return True  # middle candidate is in 3rd place

    return False

# Apply triplet rule
filtered_ballots = [(prefs, weight) for prefs, weight in ballots if not fails_triplet_rule(prefs)]

# Totals after filtering
total_weight_filtered = sum(weight for _, weight in filtered_ballots)
print("Total weight after removing bad adjacencies:", total_weight_filtered)
print("Number of ballots after removing bad adjacencies:", len(filtered_ballots))

# Output formatting
output_data = []
for prefs, weight in filtered_ballots:
    row = {
        "Candidate_1": id_to_name.get(prefs[0], ''),
        "Candidate_2": id_to_name.get(prefs[1], ''),
        "Candidate_3": id_to_name.get(prefs[2], ''),
        "Candidate_4": id_to_name.get(prefs[3], ''),
        "Candidate_5": id_to_name.get(prefs[4], ''),
        "Weight": weight
    }
    output_data.append(row)

# Save to CSV
df_output = pd.DataFrame(output_data)
df_output.to_csv("triplet_filtered_ballots.csv", index=False)
print("Filtered ballots saved to triplet_filtered_ballots.csv")


Total weight after removing bad adjacencies: 150913
Number of ballots after removing bad adjacencies: 65
Filtered ballots saved to triplet_filtered_ballots.csv


In [64]:
#single peaked order adams, yang, wiley garica, stringer
import pandas as pd

# Candidate ID to name mapping
id_to_name = {
    "217572": "Adams (#1)",
    "217605": "Stringer (#5)",
    "219469": "Wiley (#3)",
    "219978": "Garcia (#4)",
    "217796": "Yang (#2)"
}

allowed_adjacent_pairs = {
    ("217572", "217796"),  # Adams - Yang (1-2)
    ("217796", "217572"),  # Yang - Adams (2-1)
    ("219469", "217796"),  # Wiley - Yang (2-3)
    ("217796", "219469"),  # Yang - Wiley (3-2)
    ("219978", "219469"),  # Garica - Wiley (3-4)
    ("219469", "219978"),  # Wiley - Garica (4-3)
    ("219978", "217605"),  # Garica - Stringer (4-5)
    ("217605", "219978"),  # Stringer - Garcia (5-4)
}

id_to_rank = {
        "217572": 1,  # Adams
        "219978": 4,  # Garcia
        "217796": 2,  # Yang
        "219469": 3,  # Wiley
        "217605": 5   # Stringer
     }  


top5_ids = set(id_to_name.keys())



# Load ballots from full data
df = pd.read_csv("aggregated_voting_NYCMayor.csv")

# Assumes last column is "Weight"
ranking_columns = df.columns[:-1]
weight_column = df.columns[-1]

# Convert to (prefs, weight)
ballots = []
for _, row in df.iterrows():
    
    raw_prefs = [
    str(x).strip() if pd.notna(x) and str(x).strip().lower() != 'null' else ''
    for x in row[ranking_columns]
    ]

    weight = row[weight_column]

    # Extract which of the top5 are ranked
    top5_ranked = [cand for cand in raw_prefs if cand in top5_ids]

    # Case 1: exactly one candidate ranked (top-5 or not), and all others are ''
    if sum(p != '' for p in raw_prefs) == 1:
        prefs = [p if p != '' else '' for p in raw_prefs]
        while len(prefs) < 5:
            prefs.append('')
        ballots.append((prefs, weight))


    # Case 2: exactly two top5 candidates ranked, in an allowed adjacent pair
    elif (
        len(top5_ranked) == 2 and
        raw_prefs[0] in top5_ids and
        raw_prefs[1] in top5_ids and
        all(p == '' for p in raw_prefs[2:]) and
        (raw_prefs[0], raw_prefs[1]) in allowed_adjacent_pairs
    ):
        prefs = raw_prefs[:2] + [''] * 3
        ballots.append((prefs, weight))


    # Case 3: exactly the full top 5 candidates are present, no more, no less
    elif set(top5_ranked) == top5_ids and len(top5_ranked) == 5:
        prefs = [p for p in raw_prefs if p in top5_ids]  # exclude other candidates
        ballots.append((prefs, weight))

    # Case 4: exactly 3 top-5 candidates in positions 1-3, last two are blank/null, and it passes triplet rule
    elif (
        len(top5_ranked) == 3 and
        all(p in top5_ids for p in raw_prefs[:3]) and
        all(p == '' for p in raw_prefs[3:]) and
        not fails_triplet_rule(raw_prefs[:5])  # pass full prefs to rule
    ):
        prefs = raw_prefs[:3] + [''] * 2
        ballots.append((prefs, weight))

 
    # Case 5: 4 top-5s in positions 1–4, 5th is blank/null, and they form an ordered group of 4 consecutive ranks
    elif (
        len(top5_ranked) == 4 and
        all(p in top5_ids for p in raw_prefs[:4]) and
        raw_prefs[4] == ''
    ):
        # Convert candidate IDs to rank numbers
        ranks = sorted([id_to_rank[p] for p in raw_prefs[:4]])
        if all(ranks[i] + 1 == ranks[i + 1] for i in range(3)):  # check for consecutive
            prefs = raw_prefs[:4] + ['']
            ballots.append((prefs, weight))





# --- Triplet rule ---
def fails_triplet_rule(prefs):
    adams = "217572"
    yang = "217796"
    wiley = "219469"
    garcia = "219978"
    stringer = "217605"

    triplet_sets = [
        {adams, yang, wiley},
        {yang, wiley, garcia},
        {wiley, garcia, stringer}
    ]
    triplet_tuples = [
        (adams, yang, wiley),
        (yang, wiley, garcia),
        (wiley, garcia, stringer)
    ]

    top5_in_prefs = [p for p in prefs if p in top5_ids]
    if len(top5_in_prefs) < 3:
        return False

    top3 = prefs[:3]
    if any(x == '' for x in top3):  # skip triplet rule for single-candidate ballots
        return False

    top3_set = set(top3)

    if top3_set not in triplet_sets:
        return True  # doesn't match any valid triplet

    for a, b, c in triplet_tuples:
        if {a, b, c} == top3_set:
            if prefs[2] == b:
                return True  # middle candidate is in 3rd place

    return False

# Apply triplet rule
filtered_ballots = [(prefs, weight) for prefs, weight in ballots if not fails_triplet_rule(prefs)]

# Totals after filtering
total_weight_filtered = sum(weight for _, weight in filtered_ballots)
print("Total weight after removing bad adjacencies:", total_weight_filtered)
print("Number of ballots after removing bad adjacencies:", len(filtered_ballots))

# Output formatting
output_data = []
for prefs, weight in filtered_ballots:
    row = {
        "Candidate_1": id_to_name.get(prefs[0], ''),
        "Candidate_2": id_to_name.get(prefs[1], ''),
        "Candidate_3": id_to_name.get(prefs[2], ''),
        "Candidate_4": id_to_name.get(prefs[3], ''),
        "Candidate_5": id_to_name.get(prefs[4], ''),
        "Weight": weight
    }
    output_data.append(row)

# Save to CSV
df_output = pd.DataFrame(output_data)
df_output.to_csv("triplet_filtered_ballots.csv", index=False)
print("Filtered ballots saved to triplet_filtered_ballots.csv")


Total weight after removing bad adjacencies: 183341
Number of ballots after removing bad adjacencies: 74
Filtered ballots saved to triplet_filtered_ballots.csv
