The purpose of this notebook is to identify the frequency of conflicts between metrics that purport to measure the degree to which a plan favors one party or another.

In [43]:
from typing import List, Dict, Any, Set

import os
import pandas as pd
# from collections import defaultdict

# from rdapy import DISTRICTS_BY_STATE
from rdametrics import states, chambers, ensembles

Load the scores dataframe

In [44]:
scores_path: str = "~/local/beta-ensembles/prepackaged/scores/scores.parquet"
scores_df = pd.read_parquet(os.path.expanduser(scores_path))

Helper code

In [45]:
ensembles = [e for e in ensembles if e not in ["A1", "A2", "A3", "A4", "Rev*"]]

partisan_metrics: List[str] = [
    "disproportionality",
    "efficiency_gap",
    "geometric_seats_bias",
    "seats_bias",
    "votes_bias",
    "mean_median_average_district",
    "lopsided_outcomes",
    "declination",
]

symmetry_metrics: List[str] = [
    "geometric_seats_bias",
    "seats_bias",
    "votes_bias",
]

packing_cracking_metrics: List[str] = [
    "mean_median_average_district",
    "lopsided_outcomes",
    "declination",
]

def same_sign(a, b):
    if a == 0.0 or b == 0.0:
        return True

    return a * b > 0

Count instances where the scores conflict. Keep track by state / chamber / ensemble combination.

In [46]:
total: int = 0
by_state: Dict[str, Any] = dict()
by_metric: Dict[str, Dict[Any, Any]] = dict()

for xx in states:
    by_state[xx] = {
        "Vf": None,
        "seats_vs_votes_bias": 0,
        "geometric_seats_bias_vs_others": 0,
        "packing_cracking_metrics": 0,
        "symmetry_vs_packing_cracking": 0
    }

for m in partisan_metrics[1:]:
    by_metric[m] = dict()
    for xx in states:
        for chamber in chambers:
            for ensemble in ensembles:
                combo = (xx, chamber, ensemble)
                by_metric[m][combo] = {
                    "conflicts": 0,
                    "example": None,
                    "value": None,
                    "disproportionality": None,
                    "delta": None,
                }

for index, row in scores_df.iterrows():
    if row["ensemble"] not in ensembles:
        continue

    xx, chamber, ensemble = (row["state"], row["chamber"], row["ensemble"])
    combo = (xx, chamber, ensemble)

    total += 1
    if by_state[xx]["Vf"] is None:
        by_state[xx]["Vf"] = row["estimated_vote_pct"]

    if not same_sign(row["seats_bias"], row["votes_bias"]):
        by_state[xx]["seats_vs_votes_bias"] += 1
    if not same_sign(row["seats_bias"], row["geometric_seats_bias"]) or not same_sign(row["votes_bias"], row["geometric_seats_bias"]):
        by_state[xx]["geometric_seats_bias_vs_others"] += 1

    if not same_sign(row["mean_median_average_district"], row["lopsided_outcomes"]) or not same_sign(row["mean_median_average_district"], row["declination"]) or not same_sign(row["lopsided_outcomes"], row["declination"]):  
        by_state[xx]["packing_cracking_metrics"] += 1

    for i, m in enumerate(partisan_metrics[1:]):
        if same_sign(row[m], row["disproportionality"]):
            continue
        
        else:
            by_metric[m][combo]["conflicts"] += 1
            delta: float = abs(row[m] - row["disproportionality"])

            if by_metric[m][combo]["example"] is None or delta > by_metric[m][combo]["delta"]:
                by_metric[m][combo]["example"] = f"{combo} / {row['map']:09d}"
                by_metric[m][combo]["value"] = row[m]
                by_metric[m][combo]["disproportionality"] = row["disproportionality"]
                by_metric[m][combo]["delta"] = delta

(total, by_state, by_metric)

(4619979,
 {'FL': {'Vf': 0.4837,
   'seats_vs_votes_bias': 0,
   'geometric_seats_bias_vs_others': 23783,
   'packing_cracking_metrics': 123699,
   'symmetry_vs_packing_cracking': 0},
  'IL': {'Vf': 0.5817,
   'seats_vs_votes_bias': 0,
   'geometric_seats_bias_vs_others': 101,
   'packing_cracking_metrics': 32868,
   'symmetry_vs_packing_cracking': 0},
  'MI': {'Vf': 0.5188,
   'seats_vs_votes_bias': 0,
   'geometric_seats_bias_vs_others': 64,
   'packing_cracking_metrics': 2125,
   'symmetry_vs_packing_cracking': 0},
  'NC': {'Vf': 0.4943,
   'seats_vs_votes_bias': 0,
   'geometric_seats_bias_vs_others': 191,
   'packing_cracking_metrics': 77822,
   'symmetry_vs_packing_cracking': 0},
  'NY': {'Vf': 0.6478,
   'seats_vs_votes_bias': 0,
   'geometric_seats_bias_vs_others': 948,
   'packing_cracking_metrics': 75853,
   'symmetry_vs_packing_cracking': 0},
  'OH': {'Vf': 0.4638,
   'seats_vs_votes_bias': 0,
   'geometric_seats_bias_vs_others': 18048,
   'packing_cracking_metrics': 462549,

Aggregate the results by state

In [48]:
by_metric_and_state: Dict[str, Dict[Any, Any]] = dict()

for m in partisan_metrics[1:]:
    by_metric_and_state[m] = dict()
    for xx in states:
        by_metric_and_state[m][xx] = {
            # "Vf": None,
            # "total": 0,
            "conflicts": 0,
            "conflict-rate": None,
            "example": None,
            "value": None,
            "disproportionality": None,
            "delta": None,
        }

for m in by_metric.keys():
    for combo, _data in by_metric[m].items():
        xx = combo[0]
        # if by_metric_and_state[m][xx]["Vf"] is None:
        #     by_state[xx]["Vf"] = _data["Vf"]
        # by_metric_and_state[m][xx]["total"] += _data["total"]
        by_metric_and_state[m][xx]["conflicts"] += _data["conflicts"]

        if by_metric_and_state[m][xx]["example"] is None or (_data["delta"] and _data["delta"] > by_metric_and_state[m][xx]["delta"]):
            by_metric_and_state[m][xx]["example"] = _data["example"]
            by_metric_and_state[m][xx]["value"] = _data["value"]
            by_metric_and_state[m][xx]["disproportionality"] = _data["disproportionality"]
            by_metric_and_state[m][xx]["delta"] = _data["delta"]

for m in by_metric.keys():
    for combo, _data in by_metric[m].items():
        xx = combo[0]
        by_metric_and_state[m][xx]["conflict-rate"] = by_metric_and_state[m][xx]["conflicts"] / total


by_metric_and_state


{'efficiency_gap': {'FL': {'conflicts': 19389,
   'conflict-rate': 0.004196772322991079,
   'example': "('FL', 'congress', 'A0') / 000797500",
   'value': -0.0042,
   'disproportionality': 0.0122,
   'delta': 0.0164},
  'IL': {'conflicts': 473093,
   'conflict-rate': 0.10240154771266276,
   'example': "('IL', 'congress', 'A0') / 000010000",
   'value': 0.0152,
   'disproportionality': -0.0665,
   'delta': 0.08170000000000001},
  'MI': {'conflicts': 13610,
   'conflict-rate': 0.002945900836345793,
   'example': "('MI', 'congress', 'A0') / 000012500",
   'value': 0.0183,
   'disproportionality': -0.0006,
   'delta': 0.0189},
  'NC': {'conflicts': 6854,
   'conflict-rate': 0.0014835565269885427,
   'example': "('NC', 'congress', 'A0') / 000242500",
   'value': -0.0023,
   'disproportionality': 0.0034,
   'delta': 0.0057},
  'NY': {'conflicts': 160598,
   'conflict-rate': 0.03476162986888036,
   'example': "('NY', 'upper', 'A0') / 007460000",
   'value': 0.005,
   'disproportionality': -0.

Format for reporting & plotting

In [49]:
report: Dict[str, Dict[Any, Any]] = dict()

def partisan_balance(Vf: float) -> float:
    balance: float = Vf - 0.5
    if Vf < 0.5:
        balance = -balance
    return balance

for m, _data in by_metric_and_state.items():
    report[m] = dict()

    xx: str = list(_data.keys())[0]
    report[m]["total"] = total
    # report[m]["total"] = _data[xx]["total"]

    conflicts_by_state = [(xx, partisan_balance(by_state[xx]['Vf']), info['conflict-rate']) for xx, info in _data.items()]
    # by_state = [(state, partisan_balance(info['Vf']), info['conflict-rate']) for state, info in _data.items()]
    conflicts_by_state.sort(key=lambda x: x[1])

    max_state = max(_data.items(), key=lambda x: x[1]['delta'])
    xx = max_state[0]

    report[m]["example"] = _data[xx]["example"]
    report[m]["value"] = _data[xx]["value"]
    report[m]["disproportionality"] = _data[xx]["disproportionality"]
    report[m]["delta"] = _data[xx]["delta"]

    report[m]['by-state'] = conflicts_by_state

report


{'efficiency_gap': {'total': 4619979,
  'example': "('NY', 'upper', 'A0') / 007460000",
  'value': 0.005,
  'disproportionality': -0.1428,
  'delta': 0.14780000000000001,
  'by-state': [('NC', 0.005699999999999983, 0.0014835565269885427),
   ('WI', 0.006800000000000028, 0.0005028161383417544),
   ('FL', 0.01629999999999998, 0.004196772322991079),
   ('MI', 0.01880000000000004, 0.002945900836345793),
   ('OH', 0.03620000000000001, 0.001056281857558227),
   ('IL', 0.0817, 0.10240154771266276),
   ('NY', 0.14780000000000004, 0.03476162986888036)]},
 'geometric_seats_bias': {'total': 4619979,
  'example': "('NY', 'congress', 'D') / 031252500",
  'value': 0.0109,
  'disproportionality': -0.2466,
  'delta': 0.2575,
  'by-state': [('NC', 0.005699999999999983, 0.004144391132513806),
   ('WI', 0.006800000000000028, 0.0008058478187887867),
   ('FL', 0.01629999999999998, 0.02604578938562275),
   ('MI', 0.01880000000000004, 0.0038272901240460184),
   ('OH', 0.03620000000000001, 0.01406283448474549

Find conflict coverage

In [50]:
coverage = dict()
for m in partisan_metrics[1:]:
    coverage[m] = {
        "conflicts": 0,
        "total": 0,
        "combos": 0
    }

    for combo, _data in by_metric[m].items():
        coverage[m]["total"] = total
        # coverage[m]["total"] += _data["total"]
        coverage[m]["conflicts"] += _data["conflicts"]
        coverage[m]["combos"] += 1

coverage

{'efficiency_gap': {'conflicts': 680747, 'total': 4619979, 'combos': 231},
 'geometric_seats_bias': {'conflicts': 1545812,
  'total': 4619979,
  'combos': 231},
 'seats_bias': {'conflicts': 1510956, 'total': 4619979, 'combos': 231},
 'votes_bias': {'conflicts': 1509410, 'total': 4619979, 'combos': 231},
 'mean_median_average_district': {'conflicts': 1566306,
  'total': 4619979,
  'combos': 231},
 'lopsided_outcomes': {'conflicts': 2042083, 'total': 4619979, 'combos': 231},
 'declination': {'conflicts': 1385292, 'total': 4619979, 'combos': 231}}

Report the percentage of conflicts by metric and state

In [51]:
print("Partisan Conflicts Summary")
print("==========================")

print()

precision: int = 4
for m, _data in report.items():
    name: str = _data["example"]
    sample: float = _data["value"]
    disp: float = _data["disproportionality"]

    by_metric_conflicts: int = coverage[m]["conflicts"]
    total_plans: int = coverage[m]["total"]
    combos: int = coverage[m]["combos"]

    by_state_output = [(state, f"{round(val1, precision):.2%}", f"{round(val2, precision):.2%}") for state, val1, val2 in _data['by-state']]


    print(f"{m}:")
    print(
        f"  {by_metric_conflicts:,} of {total_plans:,} plans conflict ({by_metric_conflicts / total_plans:.1%} across {combos} of 231 = 7 x 3 x 11 state, chamber, and ensemble combinations."
    )
    if m != 'declination':
        print(f"  Example: Map ({name}) has {sample:.2%} vs. disproportionality {disp:.2%}.")
    else:
        print(f"  Example: Map ({name}) has {sample:.4f} degrees vs. disproportionality {disp:.2%}.")
    print("  State, Deviation from 50-50 Balance, Conflict Rate")
    print(f"  {by_state_output}")
    print()

print()
print(
    f"Where a 'conflict' is when the sign of the metric is the *opposite* of the sign for simple 'disproportionality'."
)

Partisan Conflicts Summary

efficiency_gap:
  680,747 of 4,619,979 plans conflict (14.7% across 231 of 231 = 7 x 3 x 11 state, chamber, and ensemble combinations.
  Example: Map (('NY', 'upper', 'A0') / 007460000) has 0.50% vs. disproportionality -14.28%.
  State, Deviation from 50-50 Balance, Conflict Rate
  [('NC', '0.57%', '0.15%'), ('WI', '0.68%', '0.05%'), ('FL', '1.63%', '0.42%'), ('MI', '1.88%', '0.29%'), ('OH', '3.62%', '0.11%'), ('IL', '8.17%', '10.24%'), ('NY', '14.78%', '3.48%')]

geometric_seats_bias:
  1,545,812 of 4,619,979 plans conflict (33.5% across 231 of 231 = 7 x 3 x 11 state, chamber, and ensemble combinations.
  Example: Map (('NY', 'congress', 'D') / 031252500) has 1.09% vs. disproportionality -24.66%.
  State, Deviation from 50-50 Balance, Conflict Rate
  [('NC', '0.57%', '0.41%'), ('WI', '0.68%', '0.08%'), ('FL', '1.63%', '2.60%'), ('MI', '1.88%', '0.38%'), ('OH', '3.62%', '1.41%'), ('IL', '8.17%', '14.29%'), ('NY', '14.78%', '14.28%')]

seats_bias:
  1,510,956

In [52]:
precision: int = 4
for m, _data in report.items():
    x = [(state, round(val1, precision), round(val2, precision)) for state, val1, val2 in _data['by-state']]

    formatted_items = [f"({state}, {float(val1):>6.2%}, {float(val2):>7.2%})" for state, val1, val2 in x]
    print(f"{m:>30}: {', '.join(formatted_items)}")



                efficiency_gap: (NC,  0.57%,   0.15%), (WI,  0.68%,   0.05%), (FL,  1.63%,   0.42%), (MI,  1.88%,   0.29%), (OH,  3.62%,   0.11%), (IL,  8.17%,  10.24%), (NY, 14.78%,   3.48%)
          geometric_seats_bias: (NC,  0.57%,   0.41%), (WI,  0.68%,   0.08%), (FL,  1.63%,   2.60%), (MI,  1.88%,   0.38%), (OH,  3.62%,   1.41%), (IL,  8.17%,  14.29%), (NY, 14.78%,  14.28%)
                    seats_bias: (NC,  0.57%,   0.41%), (WI,  0.68%,   0.08%), (FL,  1.63%,   2.07%), (MI,  1.88%,   0.38%), (OH,  3.62%,   1.22%), (IL,  8.17%,  14.28%), (NY, 14.78%,  14.27%)
                    votes_bias: (NC,  0.57%,   0.40%), (WI,  0.68%,   0.08%), (FL,  1.63%,   2.05%), (MI,  1.88%,   0.38%), (OH,  3.62%,   1.21%), (IL,  8.17%,  14.28%), (NY, 14.78%,  14.26%)
  mean_median_average_district: (NC,  0.57%,   1.29%), (WI,  0.68%,   0.08%), (FL,  1.63%,   0.85%), (MI,  1.88%,   0.35%), (OH,  3.62%,   2.78%), (IL,  8.17%,  14.27%), (NY, 14.78%,  14.29%)
             lopsided_outcomes: (NC,  0.

Report other conflicts

In [53]:
sorted_states = sorted(by_state.keys(), key=lambda state: by_state[state]["Vf"] or 0)

print(f"xx: Vf | seat vs. vote bias | geometric seat bias vs. other | packing & cracking metrics | symmetry vs. packing & cracking ")
for xx in sorted_states:
    Vf: float = by_state[xx]["Vf"]
    a: float = by_state[xx]["seats_vs_votes_bias"] / total
    b: float = by_state[xx]["geometric_seats_bias_vs_others"] / total
    c: float = by_state[xx]["packing_cracking_metrics"] / total
    d: float = by_state[xx]["symmetry_vs_packing_cracking"] / total

    print(f"{xx}: {Vf:>6.2%} | {a:>6.2%} | {b:>6.2%} | {c:>6.2%} | {d:>6.2%}")


xx: Vf | seat vs. vote bias | geometric seat bias vs. other | packing & cracking metrics | symmetry vs. packing & cracking 
OH: 46.38% |  0.00% |  0.39% | 10.01% |  0.00%
FL: 48.37% |  0.00% |  0.51% |  2.68% |  0.00%
NC: 49.43% |  0.00% |  0.00% |  1.68% |  0.00%
WI: 50.68% |  0.00% |  0.00% |  0.07% |  0.00%
MI: 51.88% |  0.00% |  0.00% |  0.05% |  0.00%
IL: 58.17% |  0.00% |  0.00% |  0.71% |  0.00%
NY: 64.78% |  0.00% |  0.02% |  1.64% |  0.00%
