In [1]:
import pandas as pd
import numpy as np
import glob

In [2]:
df = pd.read_csv('aggregated_voting_NYCMayor.csv')
df.head()

Unnamed: 0,1st,2nd,3rd,4th,5th,count
0,217572,Null,Null,Null,Null,64274
1,217796,Null,Null,Null,Null,20566
2,219469,Null,Null,Null,Null,12771
3,219978,Null,Null,Null,Null,10258
4,217572,219469,Null,Null,Null,9833


In [3]:
df_clean = df[~df.eq("undervote").any(axis=1)]
df_clean = df[~df.eq("overvote").any(axis=1)]
df_clean = df[~df.eq("Null").any(axis=1)]

In [4]:
df_clean.to_csv('cleaned_MayorData.csv', index=False)

In [5]:
df_clean.head()

Unnamed: 0,1st,2nd,3rd,4th,5th,count
90,219469,219978,217605,218491,221183,868
97,219469,219978,218491,217605,221183,827
98,219978,219469,217605,221183,217796,811
109,219469,219978,217605,221183,218491,724
123,219469,219978,217605,218491,217796,656


In [6]:
df = pd.read_csv('cleaned_MayorData.csv')

In [7]:
first_column = df.columns[0]

number_counts = df[first_column].value_counts()

print(f"Number of unique values in the first column: {len(number_counts)}")
print(number_counts)

Number of unique values in the first column: 14
1st
217572      9140
219469      7784
217796      7277
219978      6177
217605      4205
218491      4070
221183      3129
218127      3032
221141      2071
217654      2021
218117      1160
218922      1146
221458       969
Write-in     328
Name: count, dtype: int64


In [8]:
ballots_df = pd.read_csv('cleaned_MayorData.csv')


In [9]:

# Convert each row into a tuple: (preferences list, weight)
ballots = [(list(row[:-1]), row[-1]) for row in ballots_df.values.tolist()]

# Identify all unique candidates (from the ranked preferences)
candidates = set()
for ballot, weight in ballots:
    for candidate in ballot:
        candidates.add(candidate)
candidates = list(candidates)

def count_votes(ballots, candidates):
    """
    Count the votes for the current round using weighted ballots.
    Each ballot contributes its weight to the vote of the highest-ranked candidate
    that hasn't been eliminated.
    """
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break  # Only count the top remaining candidate.
    return vote_counts

round_num = 1
while True:
    # Tally the weighted votes for this round.
    vote_counts = count_votes(ballots, candidates)
    total_votes = sum(vote_counts.values())
    print(f"Round {round_num} vote counts: {vote_counts} (Total active votes: {total_votes})")

    # Check if any candidate has a majority (>50% of active votes)
    for candidate, count in vote_counts.items():
        if count > total_votes / 2:
            print(f"\nWinner: {candidate} with {count} votes (majority of {total_votes} active votes)!")
            winner = candidate
            break
    else:
        # No candidate has a majority; eliminate candidate(s) with the fewest votes.
        min_votes = min(vote_counts.values())
        eliminated = [candidate for candidate, count in vote_counts.items() if count == min_votes]
        print(f"Eliminated in round {round_num}: {eliminated}\n")
        
        # Remove the eliminated candidate(s) from further consideration.
        for candidate in eliminated:
            candidates.remove(candidate)
        
        # If no candidates remain or all remaining are tied, declare a tie.
        if not candidates or len(vote_counts) == len(eliminated):
            print("Election resulted in a tie among the remaining candidates:")
            print(vote_counts)
            break

        round_num += 1
        continue
    break

Round 1 vote counts: {'218491': 13480, '217605': 23057, '219978': 91940, '217796': 44783, '218117': 2290, '217654': 3647, '221141': 3901, '218922': 1400, '221458': 1124, 'Write-in': 356, '218127': 11863, '217572': 120901, '219469': 103825, '221183': 12217} (Total active votes: 434784)
Eliminated in round 1: ['Write-in']

Round 2 vote counts: {'218491': 13505, '217605': 23092, '219978': 91985, '217796': 44847, '218117': 2315, '217654': 3655, '221141': 3910, '218922': 1414, '221458': 1135, '218127': 11885, '217572': 120957, '219469': 103858, '221183': 12226} (Total active votes: 434784)
Eliminated in round 2: ['221458']

Round 3 vote counts: {'218491': 13585, '217605': 23149, '219978': 92053, '217796': 44964, '218117': 2354, '217654': 3697, '221141': 3931, '218922': 1479, '218127': 11984, '217572': 121230, '219469': 104054, '221183': 12304} (Total active votes: 434784)
Eliminated in round 3: ['218922']

Round 4 vote counts: {'218491': 13817, '217605': 23226, '219978': 92232, '217796': 45

In [11]:
import random 
ballots = [(list(row[:-1]), row[-1]) for row in ballots_df.values.tolist()]

# Identify all unique candidates from the ranked preferences.
original_candidates = set()
for ballot, weight in ballots:
    for candidate in ballot:
        original_candidates.add(candidate)
original_candidates = list(original_candidates)

def count_votes(ballots, candidates):
    """
    Count the weighted votes for the current round.
    For each ballot, the vote is assigned to the highest-ranked candidate
    that hasn't been eliminated.
    """
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break  # Only count the top remaining candidate.
    return vote_counts

def simulate_irv(ballots, original_candidates):
    """
    Simulate an IRV election on the given ballots.
    Returns the winning candidate (or None if there's a tie).
    """
    # Work on a copy of the candidate list.
    candidates = list(original_candidates)
    round_num = 1
    while True:
        vote_counts = count_votes(ballots, candidates)
        total_votes = sum(vote_counts.values())
        print(f"Round {round_num} vote counts: {vote_counts} (Total active votes: {total_votes})")
        
        # Check if any candidate has a majority (>50%).
        for candidate, count in vote_counts.items():
            if count > total_votes / 2:
                print(f"\nWinner: {candidate} with {count} votes (majority of {total_votes} active votes)!")
                return candidate
        
        # No majority found; eliminate candidate(s) with the fewest votes.
        min_votes = min(vote_counts.values())
        eliminated = [candidate for candidate, count in vote_counts.items() if count == min_votes]
        print(f"Eliminated in round {round_num}: {eliminated}\n")
        
        # Remove eliminated candidate(s) from further consideration.
        for candidate in eliminated:
            candidates.remove(candidate)
        
        # If no candidates remain or all remaining candidates are tied, declare a tie.
        if not candidates or len(vote_counts) == len(eliminated):
            print("Election resulted in a tie among the remaining candidates:")
            print(vote_counts)
            return None
        
        round_num += 1

# ------------------------------
# Run simulation with full ballots
# ------------------------------
print("Running IRV simulation with full ballots:")
winner_full = simulate_irv(ballots, original_candidates)

# ------------------------------
# Randomly eliminate an X percent of the ballots and simulate again.
# ------------------------------
elimination_percent = 0.1  # Change this value for a different percentage (e.g., 0.1 for 10%).
num_ballots = len(ballots)
num_to_remove = int(num_ballots * elimination_percent)

# Randomly select ballots to remove. The remaining ballots will be used for the simulation.
ballots_reduced = random.sample(ballots, num_ballots - num_to_remove)

print("\nRunning IRV simulation with reduced ballots (after randomly eliminating {:.0f}% of ballots):".format(elimination_percent * 100))
winner_reduced = simulate_irv(ballots_reduced, original_candidates)

# ------------------------------
# Compare the results.
# ------------------------------
print("\nComparison of Results:")
print("Winner with full ballots:", winner_full)
print("Winner with reduced ballots:", winner_reduced)

Running IRV simulation with full ballots:
Round 1 vote counts: {'218491': 13480, '217605': 23057, '219978': 91940, '217796': 44783, '218117': 2290, '217654': 3647, '221141': 3901, '218922': 1400, '221458': 1124, 'Write-in': 356, '218127': 11863, '217572': 120901, '219469': 103825, '221183': 12217} (Total active votes: 434784)
Eliminated in round 1: ['Write-in']

Round 2 vote counts: {'218491': 13505, '217605': 23092, '219978': 91985, '217796': 44847, '218117': 2315, '217654': 3655, '221141': 3910, '218922': 1414, '221458': 1135, '218127': 11885, '217572': 120957, '219469': 103858, '221183': 12226} (Total active votes: 434784)
Eliminated in round 2: ['221458']

Round 3 vote counts: {'218491': 13585, '217605': 23149, '219978': 92053, '217796': 44964, '218117': 2354, '217654': 3697, '221141': 3931, '218922': 1479, '218127': 11984, '217572': 121230, '219469': 104054, '221183': 12304} (Total active votes: 434784)
Eliminated in round 3: ['218922']

Round 4 vote counts: {'218491': 13817, '217

In [12]:
import pandas as pd
import random  # Make sure to import the random module

# Convert each row into a tuple: (preferences list, weight)
ballots = [(list(row[:-1]), row[-1]) for row in ballots_df.values.tolist()]

# Identify all unique candidates from the ranked preferences.
original_candidates = set()
for ballot, weight in ballots:
    for candidate in ballot:
        original_candidates.add(candidate)
original_candidates = list(original_candidates)

# ------------------------------
# Define helper functions for IRV simulation
# ------------------------------
def count_votes(ballots, candidates):
    """
    Count the weighted votes for the current round.
    Each ballot contributes its weight to the vote of the highest-ranked candidate
    that hasn't been eliminated.
    """
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break  # Only count the top remaining candidate.
    return vote_counts

def simulate_irv(ballots, original_candidates, verbose=False):
    """
    Simulate an IRV election on the given ballots.
    Returns the winning candidate (or None if there's a tie).
    
    If verbose is True, prints detailed round-by-round information.
    """
    # Work on a copy of the candidate list.
    candidates = list(original_candidates)
    round_num = 1
    while True:
        vote_counts = count_votes(ballots, candidates)
        total_votes = sum(vote_counts.values())
        if verbose:
            print(f"Round {round_num} vote counts: {vote_counts} (Total active votes: {total_votes})")
        
        # Check if any candidate has a majority (>50% of active votes)
        for candidate, count in vote_counts.items():
            if count > total_votes / 2:
                if verbose:
                    print(f"\nWinner: {candidate} with {count} votes (majority of {total_votes} active votes)!")
                return candidate
        
        # No majority found; eliminate candidate(s) with the fewest votes.
        min_votes = min(vote_counts.values())
        eliminated = [candidate for candidate, count in vote_counts.items() if count == min_votes]
        if verbose:
            print(f"Eliminated in round {round_num}: {eliminated}\n")
        
        # Remove the eliminated candidate(s) from further consideration.
        for candidate in eliminated:
            candidates.remove(candidate)
        
        # If no candidates remain or all remaining candidates are tied, declare a tie.
        if not candidates or len(vote_counts) == len(eliminated):
            if verbose:
                print("Election resulted in a tie among the remaining candidates:")
                print(vote_counts)
            return None
        
        round_num += 1

# ------------------------------
# Run multiple simulations and record the results
# ------------------------------
num_simulations = 1000  # Set this to the desired number of simulations.
elimination_percent = 0.9  # Percent (e.g., 0.1 for 10%) of ballots to eliminate for the reduced simulation.

results = []  # List to store each simulation's results.

for sim in range(num_simulations):
    print(f"\nRunning simulation {sim + 1}...")
    
    # Run simulation with full ballots.
    winner_full = simulate_irv(ballots, original_candidates, verbose=False)
    
    # For the reduced simulation, randomly eliminate the specified percent of ballots.
    num_ballots = len(ballots)
    num_to_remove = int(num_ballots * elimination_percent)
    ballots_reduced = random.sample(ballots, num_ballots - num_to_remove)
    winner_reduced = simulate_irv(ballots_reduced, original_candidates, verbose=False)
    
    # Record the winners.
    results.append({
        'simulation': sim + 1,
        'winner_full': winner_full,
        'winner_reduced': winner_reduced
    })

# ------------------------------
# Save the simulation results to a CSV file
# ------------------------------
results_df = pd.DataFrame(results)
results_df.to_csv('simulation_results_10.csv', index=False)
print("\nSimulation results saved to simulation_results_10.csv")


Running simulation 1...

Running simulation 2...

Running simulation 3...

Running simulation 4...

Running simulation 5...

Running simulation 6...

Running simulation 7...

Running simulation 8...

Running simulation 9...

Running simulation 10...

Running simulation 11...

Running simulation 12...

Running simulation 13...

Running simulation 14...

Running simulation 15...

Running simulation 16...

Running simulation 17...

Running simulation 18...

Running simulation 19...

Running simulation 20...

Running simulation 21...

Running simulation 22...

Running simulation 23...

Running simulation 24...

Running simulation 25...

Running simulation 26...

Running simulation 27...

Running simulation 28...

Running simulation 29...

Running simulation 30...

Running simulation 31...

Running simulation 32...

Running simulation 33...

Running simulation 34...

Running simulation 35...

Running simulation 36...

Running simulation 37...

Running simulation 38...

Running simulation 3

In [13]:
ballots = [(list(row[:3]), row[-1]) for row in ballots_df.values.tolist()]

# Identify all unique candidates from the first 3 ranked preferences.
original_candidates = set()
for ballot, weight in ballots:
    for candidate in ballot:
        original_candidates.add(candidate)
original_candidates = list(original_candidates)

# ------------------------------
# Define helper functions for IRV simulation
# ------------------------------
def count_votes(ballots, candidates):
    """
    Count the weighted votes for the current round.
    Each ballot contributes its weight to the vote of the highest-ranked candidate
    that hasn't been eliminated.
    """
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break  # Only count the top remaining candidate.
    return vote_counts

def simulate_irv(ballots, original_candidates):
    """
    Simulate an IRV election on the given ballots.
    Returns the winning candidate (or None if there's a tie).
    """
    # Work on a copy of the candidate list.
    candidates = list(original_candidates)
    round_num = 1
    while True:
        vote_counts = count_votes(ballots, candidates)
        total_votes = sum(vote_counts.values())
        print(f"Round {round_num} vote counts: {vote_counts} (Total active votes: {total_votes})")
        
        # Check if any candidate has a majority (>50% of active votes)
        for candidate, count in vote_counts.items():
            if count > total_votes / 2:
                print(f"\nWinner: {candidate} with {count} votes (majority of {total_votes} active votes)!")
                return candidate
        
        # No majority found; eliminate candidate(s) with the fewest votes.
        min_votes = min(vote_counts.values())
        eliminated = [candidate for candidate, count in vote_counts.items() if count == min_votes]
        print(f"Eliminated in round {round_num}: {eliminated}\n")
        
        # Remove the eliminated candidate(s) from further consideration.
        for candidate in eliminated:
            candidates.remove(candidate)
        
        # If no candidates remain or all remaining candidates are tied, declare a tie.
        if not candidates or len(vote_counts) == len(eliminated):
            print("Election resulted in a tie among the remaining candidates:")
            print(vote_counts)
            return None
        
        round_num += 1

# ------------------------------
# Run simulation with truncated ballots
# ------------------------------
print("Running IRV simulation with truncated ballots (top 3 preferences only):")
winner = simulate_irv(ballots, original_candidates)
print("\nWinner with truncated ballots:", winner)

Running IRV simulation with truncated ballots (top 3 preferences only):
Round 1 vote counts: {'218491': 13480, '217605': 23057, '219978': 91940, '217654': 3647, '217796': 44783, '218117': 2290, '221141': 3901, '218922': 1400, '221458': 1124, 'Write-in': 356, '218127': 11863, '217572': 120901, '219469': 103825, '221183': 12217} (Total active votes: 434784)
Eliminated in round 1: ['Write-in']

Round 2 vote counts: {'218491': 13505, '217605': 23092, '219978': 91985, '217654': 3655, '217796': 44847, '218117': 2315, '221141': 3910, '218922': 1414, '221458': 1135, '218127': 11885, '217572': 120957, '219469': 103858, '221183': 12226} (Total active votes: 434784)
Eliminated in round 2: ['221458']

Round 3 vote counts: {'218491': 13585, '217605': 23149, '219978': 92053, '217654': 3697, '217796': 44964, '218117': 2354, '221141': 3931, '218922': 1479, '218127': 11984, '217572': 121230, '219469': 104054, '221183': 12304} (Total active votes: 434784)
Eliminated in round 3: ['218922']

Round 4 vote 

In [18]:
import pandas as pd
import random

# ------------------------------
# Set the number of ranked candidates to consider (Change this value as needed)
# ------------------------------
num_ranked_candidates = 3  # Change this value to the desired number of ranked candidates

# ------------------------------
# Load and prepare the ballots data
# ------------------------------
ballots_df = pd.read_csv('exact_top5_ballots.csv')  # Replace with the actual file path

# Convert each row into a tuple: (limited candidate preferences, weight)
ballots = [(list(row[:num_ranked_candidates]), row[-1]) for row in ballots_df.values.tolist()]

# Identify all unique candidates from the selected number of ranked preferences
original_candidates = set()
for ballot, weight in ballots:
    for candidate in ballot:
        original_candidates.add(candidate)
original_candidates = list(original_candidates)

# ------------------------------
# Define helper functions for IRV simulation
# ------------------------------
def count_votes(ballots, candidates):
    """
    Count the weighted votes for the current round.
    Each ballot contributes its weight to the vote of the highest-ranked candidate
    that hasn't been eliminated.
    """
    vote_counts = {candidate: 0 for candidate in candidates}
    for ballot, weight in ballots:
        for candidate in ballot:
            if candidate in candidates:
                vote_counts[candidate] += weight
                break  # Only count the top remaining candidate.
    return vote_counts

def simulate_irv(ballots, original_candidates):
    """
    Simulate an IRV election on the given ballots.
    Returns the winning candidate (or None if there's a tie).
    """
    # Work on a copy of the candidate list.
    candidates = list(original_candidates)
    round_num = 1
    while True:
        vote_counts = count_votes(ballots, candidates)
        total_votes = sum(vote_counts.values())
        print(f"Round {round_num} vote counts: {vote_counts} (Total active votes: {total_votes})")
        
        # Check if any candidate has a majority (>50% of active votes)
        for candidate, count in vote_counts.items():
            if count > total_votes / 2:
                print(f"\nWinner: {candidate} with {count} votes (majority of {total_votes} active votes)!")
                return candidate
        
        # No majority found; eliminate candidate(s) with the fewest votes.
        min_votes = min(vote_counts.values())
        eliminated = [candidate for candidate, count in vote_counts.items() if count == min_votes]
        print(f"Eliminated in round {round_num}: {eliminated}\n")
        
        # Remove the eliminated candidate(s) from further consideration.
        for candidate in eliminated:
            candidates.remove(candidate)
        
        # If no candidates remain or all remaining candidates are tied, declare a tie.
        if not candidates or len(vote_counts) == len(eliminated):
            print("Election resulted in a tie among the remaining candidates:")
            print(vote_counts)
            return None
        
        round_num += 1

# ------------------------------
# Run simulation with predefined ranked candidates
# ------------------------------
print(f"Running IRV simulation with top {num_ranked_candidates} preferences only:")
winner = simulate_irv(ballots, original_candidates)
print(f"\nWinner with top {num_ranked_candidates} ranked candidates:", winner)

Running IRV simulation with top 3 preferences only:
Round 1 vote counts: {217796: 1311, 217572: 2962, 219978: 1981, 219469: 2842, 218491: 477} (Total active votes: 9573)
Eliminated in round 1: [218491]

Round 2 vote counts: {217796: 1360, 217572: 3059, 219978: 2111, 219469: 3043} (Total active votes: 9573)
Eliminated in round 2: [217796]

Round 3 vote counts: {217572: 3518, 219978: 2638, 219469: 3417} (Total active votes: 9573)
Eliminated in round 3: [219978]

Round 4 vote counts: {217572: 4469, 219469: 4820} (Total active votes: 9289)

Winner: 219469 with 4820 votes (majority of 9289 active votes)!

Winner with top 3 ranked candidates: 219469


In [None]:
import pandas as pd
ballots_df = pd.read_csv('top5_ballots_named')
ballots = [(list(row[:-1]), row[-1]) for row in ballots_df.values.tolist()]


In [20]:
import pandas as pd

# Define the top 5 candidates as a set (use your actual candidate IDs)
top5_candidates = {"217572", "219469", "219978", "217796", "218491"}

# Define adjacency exclusions as unordered pairs of candidate IDs
# These are pairs of candidates that should not appear next to each other on a ballot
bad_adjacencies = [
    ("217572", "218491"),  # Candidate Adams and Morales
    ("217572", "219469"),  # Candidate Adams and Wiley
    ("219978", "218491"),  # Candidate Garica and Morales
]

# Convert to a set for quick lookups (treating as unordered pairs)
bad_adjacency_set = set(frozenset(pair) for pair in bad_adjacencies)

# Load the cleaned Mayor data CSV
ballots_df = pd.read_csv('cleaned_MayorData.csv')

# Convert each row into (preferences, weight)
ballots = [(list(row[:-1]), row[-1]) for row in ballots_df.values.tolist()]

# Filter ballots that have exactly the top 5 candidates (no more, no less)
exact_top5_ballots = [(prefs, weight) for (prefs, weight) in ballots if set(prefs) == top5_candidates]

# Calculate and print total weight of exact top 5 ballots
total_weight_top5 = sum(weight for _, weight in exact_top5_ballots)
print("Total weight of exact top 5 ballots:", total_weight_top5)

print("Number of ballots with exactly the top 5 candidates:", len(exact_top5_ballots))

# Function to check for bad adjacencies
def has_bad_adjacency(prefs, bad_pairs):
    for i in range(len(prefs) - 1):
        pair = frozenset((prefs[i], prefs[i + 1]))
        if pair in bad_pairs:
            return True
    return False

# Filter out ballots with any of the bad adjacencies
filtered_ballots = [
    (prefs, weight)
    for (prefs, weight) in exact_top5_ballots
    if not has_bad_adjacency(prefs, bad_adjacency_set)
]

# Calculate and print total weight of filtered ballots
total_weight_filtered = sum(weight for _, weight in filtered_ballots)
print("Total weight after removing bad adjacencies:", total_weight_filtered)

print("Number of ballots after removing bad adjacencies:", len(filtered_ballots))

# Prepare for output
data = []
for prefs, weight in filtered_ballots:
    row = {
        "Candidate_1": prefs[0],
        "Candidate_2": prefs[1],
        "Candidate_3": prefs[2],
        "Candidate_4": prefs[3],
        "Candidate_5": prefs[4],
        "Weight": weight
    }
    data.append(row)

# Create a DataFrame and save to CSV
df_filtered = pd.DataFrame(data)
output_file = "filtered_top5_ballots.csv"
df_filtered.to_csv(output_file, index=False)

print(f"Filtered ballots saved to {output_file}")


Total weight of exact top 5 ballots: 9573
Number of ballots with exactly the top 5 candidates: 120
Total weight after removing bad adjacencies: 1893
Number of ballots after removing bad adjacencies: 20
Filtered ballots saved to filtered_top5_ballots.csv
