The purpose of this notebook is to identify the frequency of conflicts between metrics that purport to measure the degree to which a plan favors one party or another.

In [8]:
from typing import List, Dict, Any, Set

import os
import pandas as pd
from collections import defaultdict

from rdametrics import states, chambers, ensembles

Load the scores dataframe

In [9]:
scores_path: str = "~/local/beta-ensembles/prepackaged/scores/scores.parquet"
scores_df = pd.read_parquet(os.path.expanduser(scores_path))

Helper code

In [10]:
ensembles = [e for e in ensembles if e not in ["A1", "A2", "A3", "A4", "Rev*"]]

partisan_advantage_metrics: List[str] = ["disproportionality", "efficiency_gap"]

symmetry_metrics: List[str] = [
    "geometric_seats_bias",
    "seats_bias",
    "votes_bias",
]

packing_cracking_metrics: List[str] = [
    "mean_median_average_district",
    "lopsided_outcomes",
    "declination",
]

partisan_metrics: List[str] = partisan_advantage_metrics + symmetry_metrics + packing_cracking_metrics

def same_sign(a, b):
    if a == 0.0 or b == 0.0:
        return True
    
    if pd.isna(a) or pd.isna(b):
        return True

    return a * b > 0

def same_signs(list1, list2):
    # Combine both lists and filter out zero values and NaN values
    all_values = list1 + list2
    non_zero_defined_values = [x for x in all_values if x != 0 and not pd.isna(x)]
    
    # If no valid values or only one valid value, return True
    if len(non_zero_defined_values) <= 1:
        return True
    
    # Check if all valid values have the same sign
    all_positive = all(x > 0 for x in non_zero_defined_values)
    all_negative = all(x < 0 for x in non_zero_defined_values)
    
    return all_positive or all_negative



Count of rows with undefined declination. Get examples.

In [11]:
total: int = 0
undefined_decl: int = 0
combos: Set = set()
examples: List = list()

for index, row in scores_df.iterrows():
    if row["ensemble"] not in ensembles:
        continue

    total += 1
    if pd.isna(row["declination"]):
        undefined_decl += 1

        if len(examples) < 10:
            combo = (row["state"], row["chamber"], row["ensemble"])
            # if row["state"] not in states:
            if combo not in combos:
                combos.add(combo) 
                examples.append(f"{combo} / {row['map']:09d}")

print(f"# of plans w/ undefined declination: {undefined_decl:,} of {total:,} ({undefined_decl/total:.2%})")
for example in examples:
    print(f" - {example}")

# of plans w/ undefined declination: 75,736 of 4,619,979 (1.64%)
 - ('NY', 'congress', 'A0') / 000025000
 - ('NY', 'congress', 'Pop-') / 000005000
 - ('NY', 'congress', 'Pop+') / 000002500
 - ('NY', 'congress', 'B') / 000007500
 - ('NY', 'congress', 'C') / 000005000
 - ('NY', 'congress', 'D') / 000005000
 - ('NY', 'congress', 'R25') / 000005000
 - ('NY', 'congress', 'R50') / 000002500
 - ('NY', 'congress', 'R75') / 000002500
 - ('NY', 'congress', 'R100') / 000002500


Setup counters for various kinds of conflicts.

In [12]:
by_state: Dict[str, Any] = dict()

for xx in states:
    by_state[xx] = {
        "Vf": None,
        "total": 0
    }

conflicts: Dict[str, Any] = dict()
categories: List[str] = ["with_proportionality", "within_symmetry", "within_packing_cracking", "between_symmetry_and_packing_cracking"]
subcategories: Dict[str, List[str]] = {
    "within_symmetry": ["all", "seats_vs_votes"],
    "within_packing_cracking": ["all"],
    "between_symmetry_and_packing_cracking": ["all"]
}

conflicts["with_proportionality"] = dict()
for sc in partisan_metrics[1:]:
    conflicts["with_proportionality"][sc] = dict()
    for xx in states:
        for chamber in chambers:
            for ensemble in ensembles:
                combo = (xx, chamber, ensemble)
                conflicts["with_proportionality"][sc][combo] = {
                    "count": 0,
                    "example": None,
                    #
                    "value": None,
                    "disproportionality": None,
                    "delta": None,
                }

for c in categories[1:]:
    conflicts[c] = dict()
    for sc in subcategories[c]:
        conflicts[c][sc] = dict()
        for xx in states:
            for chamber in chambers:
                for ensemble in ensembles:
                    combo = (xx, chamber, ensemble)
                    conflicts[c][sc][combo] = {
                        "count": 0,
                        "example": None,
                    }

In [15]:
(by_state, conflicts)

({'FL': {'Vf': None, 'total': 0},
  'IL': {'Vf': None, 'total': 0},
  'MI': {'Vf': None, 'total': 0},
  'NC': {'Vf': None, 'total': 0},
  'NY': {'Vf': None, 'total': 0},
  'OH': {'Vf': None, 'total': 0},
  'WI': {'Vf': None, 'total': 0}},
 {'with_proportionality': {'efficiency_gap': {('FL',
     'congress',
     'A0'): {'count': 0, 'example': None, 'value': None, 'disproportionality': None, 'delta': None},
    ('FL', 'congress', 'Pop-'): {'count': 0,
     'example': None,
     'value': None,
     'disproportionality': None,
     'delta': None},
    ('FL', 'congress', 'Pop+'): {'count': 0,
     'example': None,
     'value': None,
     'disproportionality': None,
     'delta': None},
    ('FL', 'congress', 'B'): {'count': 0,
     'example': None,
     'value': None,
     'disproportionality': None,
     'delta': None},
    ('FL', 'congress', 'C'): {'count': 0,
     'example': None,
     'value': None,
     'disproportionality': None,
     'delta': None},
    ('FL', 'congress', 'D'): {'c

Count instances where the scores conflict. Keep track by state / chamber / ensemble combination.

In [16]:
for index, row in scores_df.iterrows():
    if row["ensemble"] not in ensembles:
        continue

    xx, chamber, ensemble = (row["state"], row["chamber"], row["ensemble"])
    combo = (xx, chamber, ensemble)

    by_state[xx]["total"] += 1
    if by_state[xx]["Vf"] is None:
        by_state[xx]["Vf"] = row["estimated_vote_pct"]

    if not same_sign(row["seats_bias"], row["geometric_seats_bias"]) or not same_sign(row["votes_bias"], row["geometric_seats_bias"]):
        conflicts["within_symmetry"]["all"][combo]["count"] += 1
        if conflicts["within_symmetry"]["all"][combo]["example"] is None:
            conflicts["within_symmetry"]["all"][combo]["example"] = f"{combo} / {row['map']:09d}"
    if not same_sign(row["seats_bias"], row["votes_bias"]):
        conflicts["within_symmetry"]["seats_vs_votes"][combo]["count"] += 1
        if conflicts["within_symmetry"]["seats_vs_votes"][combo]["example"] is None:
            conflicts["within_symmetry"]["seats_vs_votes"][combo]["example"] = f"{combo} / {row['map']:09d}"

    if not same_sign(row["mean_median_average_district"], row["lopsided_outcomes"]) or not same_sign(row["mean_median_average_district"], row["declination"]) or not same_sign(row["lopsided_outcomes"], row["declination"]):  
        conflicts["within_packing_cracking"]["all"][combo]["count"] += 1
        if conflicts["within_packing_cracking"]["all"][combo]["example"] is None:
            conflicts["within_packing_cracking"]["all"][combo]["example"] = f"{combo} / {row['map']:09d}"

    if not same_signs([row["seats_bias"], row["votes_bias"], row["geometric_seats_bias"]], [row["mean_median_average_district"], row["lopsided_outcomes"], row["declination"]]):
        conflicts["between_symmetry_and_packing_cracking"]["all"][combo]["count"] += 1
        if conflicts["between_symmetry_and_packing_cracking"]["all"][combo]["example"] is None:
            conflicts["between_symmetry_and_packing_cracking"]["all"][combo]["example"] = f"{combo} / {row['map']:09d}"

    for i, sc in enumerate(partisan_metrics[1:]):
        if same_sign(row[sc], row["disproportionality"]):
            continue
        
        else:
            conflicts["with_proportionality"][sc][combo]["count"] += 1
            delta: float = abs(row[sc] - row["disproportionality"])

            if conflicts["with_proportionality"][sc][combo]["example"] is None or delta > conflicts["with_proportionality"][sc][combo]["delta"]:
                conflicts["with_proportionality"][sc][combo]["example"] = f"{combo} / {row['map']:09d}"
                conflicts["with_proportionality"][sc][combo]["value"] = row[sc]
                conflicts["with_proportionality"][sc][combo]["disproportionality"] = row["disproportionality"]
                conflicts["with_proportionality"][sc][combo]["delta"] = delta

Aggregate the results by state

In [17]:
conflicts_summary: Dict[str, Any] = dict()

for c in categories[1:]:
    conflicts_summary[c] = dict()
    for sc in subcategories[c]:
        conflicts_summary[c][sc] = dict()
        for xx in states:
            conflicts_summary[c][sc][xx] = {
                "count": 0,
                "conflict-rate": None,
                "example": None,
            }

conflicts_summary["with_proportionality"] = dict()
for sc in partisan_metrics[1:]:
    conflicts_summary["with_proportionality"][sc] = dict()
    for xx in states:
        conflicts_summary["with_proportionality"][sc][xx] = {
            "count": 0,
            "conflict-rate": None,
            "example": None,
            "value": None,
            "disproportionality": None,
            "delta": None,
        }

for sc in partisan_metrics[1:]:
    for combo, _data in conflicts["with_proportionality"][sc].items():
        xx, chamber, ensemble = combo
        conflicts_summary["with_proportionality"][sc][xx]["count"] += _data["count"]
        if conflicts_summary["with_proportionality"][sc][xx]["example"] is None or (_data["delta"] is not None and (_data["delta"] > conflicts_summary["with_proportionality"][sc][xx]["delta"])):
            conflicts_summary["with_proportionality"][sc][xx]["example"] = _data["example"]
            conflicts_summary["with_proportionality"][sc][xx]["value"] = _data["value"]
            conflicts_summary["with_proportionality"][sc][xx]["disproportionality"] = _data["disproportionality"]
            conflicts_summary["with_proportionality"][sc][xx]["delta"] = _data["delta"]

for sc in partisan_metrics[1:]:
    for xx in states:
        conflicts_summary["with_proportionality"][sc][xx]["conflict-rate"] = conflicts_summary["with_proportionality"][sc][xx]["count"] / by_state[xx]["total"]

for c in categories[1:]:
    for sc in subcategories[c]:
        for combo, _data in conflicts[c][sc].items():
            xx, chamber, ensemble = combo
            conflicts_summary[c][sc][xx]["count"] += _data["count"]
            if conflicts_summary[c][sc][xx]["example"] is None:
                conflicts_summary[c][sc][xx]["example"] = _data["example"]

for c in categories[1:]:
    for sc in subcategories[c]:
        for xx in states:
            conflicts_summary[c][sc][xx]["conflict-rate"] = conflicts_summary[c][sc][xx]["count"] / by_state[xx]["total"]


Format for reporting & plotting

In [18]:
report: Dict[str, Dict[Any, Any]] = dict()

def partisan_balance(Vf: float) -> float:
    balance: float = Vf - 0.5
    if Vf < 0.5:
        balance = -balance
    return balance

for sc, _data in conflicts_summary["with_proportionality"].items():
    report[sc] = dict()

    first_xx: str = list(_data.keys())[0]
    report[sc]["total"] = by_state[first_xx]["total"]

    conflicts_by_state = [(xx, partisan_balance(by_state[xx]['Vf']), _info['conflict-rate']) for xx, _info in _data.items()]
    conflicts_by_state.sort(key=lambda x: x[1])

    max_xx = max(_data.items(), key=lambda x: x[1]['delta'])[0]

    report[sc]["example"] = _data[max_xx]["example"]
    report[sc]["value"] = _data[max_xx]["value"]
    report[sc]["disproportionality"] = _data[max_xx]["disproportionality"]
    report[sc]["delta"] = _data[max_xx]["delta"]

    report[sc]['by-state'] = conflicts_by_state

# report


Find conflict coverage

In [19]:
coverage = dict()
for sc in partisan_metrics[1:]:
    coverage[sc] = {
        "count": 0,
        "total": 0,
        "combos": 0
    }

    for xx, _data in conflicts_summary["with_proportionality"][sc].items():
        coverage[sc]["total"] = by_state[xx]["total"]
        coverage[sc]["count"] += _data["count"]
        coverage[sc]["combos"] += 1

# coverage

Report the percentage of conflicts by metric and state.

NB. I'm not totally totals correctly here. Hand updated the output.

In [20]:
print("Conflicts With Proportionality")
print("==============================")
print()

precision: int = 4
for sc, _data in report.items():
    name: str = _data["example"]
    sample: float = _data["value"]
    disp: float = _data["disproportionality"]

    by_metric_conflicts: int = coverage[sc]["count"]
    total_plans: int = coverage[sc]["total"]
    combos: int = coverage[sc]["combos"]

    by_state_output = [(state, f"{round(val1, precision):.2%}", f"{round(val2, precision):.2%}") for state, val1, val2 in _data['by-state']]


    print(f"{sc}:")
    print(
        f"  {by_metric_conflicts:,} of {total_plans:,} plans conflict ({by_metric_conflicts / total_plans:.1%} across {combos} of 231 = 7 x 3 x 11 state, chamber, and ensemble combinations."
    )
    if sc != 'declination':
        print(f"  Example: Map ({name}) has {sample:.2%} vs. disproportionality {disp:.2%}.")
    else:
        print(f"  Example: Map ({name}) has {sample:.4f} degrees vs. disproportionality {disp:.2%}.")
    print("  State, Deviation from 50-50 Balance, Conflict Rate")
    print(f"  {by_state_output}")
    print()

print()
print(
    f"Where a 'conflict' is when the sign of the metric is the *opposite* of the sign for simple 'disproportionality'."
)

Conflicts With Proportionality

efficiency_gap:
  680,747 of 659,997 plans conflict (103.1% across 7 of 231 = 7 x 3 x 11 state, chamber, and ensemble combinations.
  Example: Map (('NY', 'upper', 'A0') / 007460000) has 0.50% vs. disproportionality -14.28%.
  State, Deviation from 50-50 Balance, Conflict Rate
  [('NC', '0.57%', '1.04%'), ('WI', '0.68%', '0.35%'), ('FL', '1.63%', '2.94%'), ('MI', '1.88%', '2.06%'), ('OH', '3.62%', '0.74%'), ('IL', '8.17%', '71.68%'), ('NY', '14.78%', '24.33%')]

geometric_seats_bias:
  1,545,812 of 659,997 plans conflict (234.2% across 7 of 231 = 7 x 3 x 11 state, chamber, and ensemble combinations.
  Example: Map (('NY', 'congress', 'D') / 031252500) has 1.09% vs. disproportionality -24.66%.
  State, Deviation from 50-50 Balance, Conflict Rate
  [('NC', '0.57%', '2.90%'), ('WI', '0.68%', '0.56%'), ('FL', '1.63%', '18.23%'), ('MI', '1.88%', '2.68%'), ('OH', '3.62%', '9.84%'), ('IL', '8.17%', '100.00%'), ('NY', '14.78%', '99.99%')]

seats_bias:
  1,510,95

Report conflicts by metric and state sorted by partisan balance

In [21]:
precision: int = 4
for sc, _data in report.items():
    x = [(state, round(val1, precision), round(val2, precision)) for state, val1, val2 in _data['by-state']]

    formatted_items = [f"({state}, {float(val1):>6.2%}, {float(val2):>7.2%})" for state, val1, val2 in x]
    print(f"{sc:>30}: {', '.join(formatted_items)}")



                efficiency_gap: (NC,  0.57%,   1.04%), (WI,  0.68%,   0.35%), (FL,  1.63%,   2.94%), (MI,  1.88%,   2.06%), (OH,  3.62%,   0.74%), (IL,  8.17%,  71.68%), (NY, 14.78%,  24.33%)
          geometric_seats_bias: (NC,  0.57%,   2.90%), (WI,  0.68%,   0.56%), (FL,  1.63%,  18.23%), (MI,  1.88%,   2.68%), (OH,  3.62%,   9.84%), (IL,  8.17%, 100.00%), (NY, 14.78%,  99.99%)
                    seats_bias: (NC,  0.57%,   2.86%), (WI,  0.68%,   0.56%), (FL,  1.63%,  14.48%), (MI,  1.88%,   2.67%), (OH,  3.62%,   8.53%), (IL,  8.17%,  99.98%), (NY, 14.78%,  99.86%)
                    votes_bias: (NC,  0.57%,   2.82%), (WI,  0.68%,   0.55%), (FL,  1.63%,  14.35%), (MI,  1.88%,   2.67%), (OH,  3.62%,   8.47%), (IL,  8.17%,  99.98%), (NY, 14.78%,  99.85%)
  mean_median_average_district: (NC,  0.57%,   9.00%), (WI,  0.68%,   0.55%), (FL,  1.63%,   5.97%), (MI,  1.88%,   2.45%), (OH,  3.62%,  19.44%), (IL,  8.17%,  99.92%), (NY, 14.78%, 100.00%)
             lopsided_outcomes: (NC,  0.

Report other conflicts

In [23]:
report2: Dict[str, Dict[Any, Any]] = dict()

for c in categories[1:]:
    report2[c] = dict()
    for sc, _data in conflicts_summary[c].items():
        report2[c][sc] = dict()

        first_xx: str = list(_data.keys())[0]
        report2[c][sc]["total"] = by_state[first_xx]["total"]

        conflicts_by_state = [(xx, partisan_balance(by_state[xx]['Vf']), _info['conflict-rate']) for xx, _info in _data.items()]
        conflicts_by_state.sort(key=lambda x: x[1])

        report2[c][sc]["example"] = _data[max_xx]["example"]
        report2[c][sc]['by-state'] = conflicts_by_state

In [24]:
titles: Dict[str, str] = {
    "with_proportionality": "N/A", 
    "within_symmetry": "Conflicts Within Symmetry",
    "within_packing_cracking": "Conflicts Within Packing & Cracking",
    "between_symmetry_and_packing_cracking": "Conflicts Between Symmetry & Packing & Cracking"
}

for c in categories[1:]:
    print(titles[c])
    print("=" * len(titles[c]))

    for sc, _data in report2[c].items():
        by_state_output = [(state, f"{round(val1, precision):.2%}", f"{round(val2, precision):.2%}") for state, val1, val2 in _data['by-state']]

        print(f"Subcategory: {sc}")
        print(f"Example: {_data['example']}")
        # print(f"Total: {_data['total']}")
        print("State, Deviation from 50-50 Balance, Conflict Rate")
        print(f"{by_state_output}")
        print()

    print()


Conflicts Within Symmetry
Subcategory: all
Example: ('NY', 'congress', 'A0') / 014170000
State, Deviation from 50-50 Balance, Conflict Rate
[('NC', '0.57%', '0.03%'), ('WI', '0.68%', '0.01%'), ('FL', '1.63%', '3.60%'), ('MI', '1.88%', '0.01%'), ('OH', '3.62%', '2.73%'), ('IL', '8.17%', '0.02%'), ('NY', '14.78%', '0.14%')]

Subcategory: seats_vs_votes
Example: None
State, Deviation from 50-50 Balance, Conflict Rate
[('NC', '0.57%', '0.00%'), ('WI', '0.68%', '0.00%'), ('FL', '1.63%', '0.00%'), ('MI', '1.88%', '0.00%'), ('OH', '3.62%', '0.00%'), ('IL', '8.17%', '0.00%'), ('NY', '14.78%', '0.00%')]


Conflicts Within Packing & Cracking
Subcategory: all
Example: ('NY', 'congress', 'C') / 036385000
State, Deviation from 50-50 Balance, Conflict Rate
[('NC', '0.57%', '11.79%'), ('WI', '0.68%', '0.47%'), ('FL', '1.63%', '18.74%'), ('MI', '1.88%', '0.32%'), ('OH', '3.62%', '70.08%'), ('IL', '8.17%', '4.98%'), ('NY', '14.78%', '0.02%')]


Conflicts Between Symmetry & Packing & Cracking
Subcategor