The purpose of this notebook is to identify the frequency of conflicts between metrics that purport to measure the degree to which a plan favors one party or another.

In [None]:
from typing import List, Dict, Any, Set

import os
import pandas as pd
from collections import defaultdict

from rdametrics import states, chambers, ensembles

Load the scores dataframe

In [None]:
scores_path: str = "~/local/beta-ensembles/prepackaged/scores/scores.parquet"
scores_df = pd.read_parquet(os.path.expanduser(scores_path))

Helper code

In [None]:
# from typing import Union
import pandas as pd
# import numpy as np

ensembles = [e for e in ensembles if e not in ["A1", "A2", "A3", "A4", "Rev*"]]

partisan_bias = {
    "partisan_advantage": ["disproportionality", "efficiency_gap"],
    "symmetry": ["geometric_seats_bias", "seats_bias", "votes_bias"],
    "packing_cracking": ["mean_median_average_district", "lopsided_outcomes", "declination"],
}

categories: List[str] = list(partisan_bias.keys())

def same_sign(a: float | int | None, b: float | int | None) -> bool:
    if a == 0.0 or b == 0.0:
        return True
    
    if pd.isna(a) or pd.isna(b):
        return True

    return a * b > 0

# def same_sign(a, b):
#     if a == 0.0 or b == 0.0:
#         return True
    
#     if pd.isna(a) or pd.isna(b):
#         return True

#     return a * b > 0

# def same_signs(list1, list2):
#     # Combine both lists and filter out zero values and NaN values
#     all_values = list1 + list2
#     non_zero_defined_values = [x for x in all_values if x != 0 and not pd.isna(x)]
    
#     # If no valid values or only one valid value, return True
#     if len(non_zero_defined_values) <= 1:
#         return True
    
#     # Check if all valid values have the same sign
#     all_positive = all(x > 0 for x in non_zero_defined_values)
#     all_negative = all(x < 0 for x in non_zero_defined_values)
    
#     return all_positive or all_negative



Setup counters for various kinds of conflicts.

In [None]:
from itertools import combinations
import copy

by_state: Dict[str, Any] = dict()

for xx in states:
    by_state[xx] = {
        "Vf": None,
        "total": 0
    }

conflicts = dict()
ledger = {
    "count": 0,
    "combos": set(),
    "example": None,
    "value1": None,
    "value2": None,
}

for c in categories:
    conflicts[c] = dict()
    pairs = combinations(partisan_bias[c], 2)
    for p in pairs:
        conflicts[c][p] = copy.deepcopy(ledger)
conflicts["cross_category"] = dict()
pairs = combinations(categories, 2)
for p in pairs:
    conflicts["cross_category"][p] = copy.deepcopy(ledger)

conflicts

Count instances where the scores conflict. Keep track by state / chamber / ensemble combination.

In [None]:
for index, row in scores_df.iterrows():
    if row["ensemble"] not in ensembles:
        continue

    xx, chamber, ensemble = (row["state"], row["chamber"], row["ensemble"])
    combo = (xx, chamber, ensemble)

    by_state[xx]["total"] += 1
    if by_state[xx]["Vf"] is None:
        by_state[xx]["Vf"] = row["estimated_vote_pct"]

    partisan_advantage_consistent: bool = True
    packing_cracking_consistent: bool = True
    partisan_symmetry_consistent: bool = True

    # Check consistency within each category

    for c in categories:
        for pair, ledger in conflicts[c].items():
            m1, m2 = pair
            v1 = float(row[m1]) if pd.notna(row[m1]) else None
            v2 = float(row[m2]) if pd.notna(row[m2]) else None
            if not same_sign(v1, v2):
                ledger["count"] += 1
                ledger["combos"].add(combo)
                if ledger["example"] is None:
                    ledger["example"] = combo
                    ledger["value1"] = v1
                    ledger["value2"] = v2

    # Compare consistency across categories

Aggregate the results by state

In [None]:
conflicts_summary: Dict[str, Any] = dict()

for c in categories[1:]:
    conflicts_summary[c] = dict()
    for sc in subcategories[c]:
        conflicts_summary[c][sc] = dict()
        for xx in states:
            conflicts_summary[c][sc][xx] = {
                "count": 0,
                "conflict-rate": None,
                "example": None,
            }

conflicts_summary["with_proportionality"] = dict()
for sc in partisan_metrics[1:]:
    conflicts_summary["with_proportionality"][sc] = dict()
    for xx in states:
        conflicts_summary["with_proportionality"][sc][xx] = {
            "count": 0,
            "conflict-rate": None,
            "example": None,
            "value": None,
            "disproportionality": None,
            "delta": None,
        }

for sc in partisan_metrics[1:]:
    for combo, _data in conflicts["with_proportionality"][sc].items():
        xx, chamber, ensemble = combo
        conflicts_summary["with_proportionality"][sc][xx]["count"] += _data["count"]
        if conflicts_summary["with_proportionality"][sc][xx]["example"] is None or (_data["delta"] is not None and (_data["delta"] > conflicts_summary["with_proportionality"][sc][xx]["delta"])):
            conflicts_summary["with_proportionality"][sc][xx]["example"] = _data["example"]
            conflicts_summary["with_proportionality"][sc][xx]["value"] = _data["value"]
            conflicts_summary["with_proportionality"][sc][xx]["disproportionality"] = _data["disproportionality"]
            conflicts_summary["with_proportionality"][sc][xx]["delta"] = _data["delta"]

for sc in partisan_metrics[1:]:
    for xx in states:
        conflicts_summary["with_proportionality"][sc][xx]["conflict-rate"] = conflicts_summary["with_proportionality"][sc][xx]["count"] / by_state[xx]["total"]

for c in categories[1:]:
    for sc in subcategories[c]:
        for combo, _data in conflicts[c][sc].items():
            xx, chamber, ensemble = combo
            conflicts_summary[c][sc][xx]["count"] += _data["count"]
            if conflicts_summary[c][sc][xx]["example"] is None:
                conflicts_summary[c][sc][xx]["example"] = _data["example"]

for c in categories[1:]:
    for sc in subcategories[c]:
        for xx in states:
            conflicts_summary[c][sc][xx]["conflict-rate"] = conflicts_summary[c][sc][xx]["count"] / by_state[xx]["total"]


In [None]:
conflicts_summary

Format for reporting & plotting

In [None]:
report: Dict[str, Dict[Any, Any]] = dict()

def partisan_balance(Vf: float) -> float:
    balance: float = Vf - 0.5
    if Vf < 0.5:
        balance = -balance
    return balance

for sc, _data in conflicts_summary["with_proportionality"].items():
    report[sc] = dict()

    first_xx: str = list(_data.keys())[0]
    report[sc]["total"] = by_state[first_xx]["total"]

    conflicts_by_state = [(xx, partisan_balance(by_state[xx]['Vf']), _info['conflict-rate']) for xx, _info in _data.items()]
    conflicts_by_state.sort(key=lambda x: x[1])

    max_xx = max(_data.items(), key=lambda x: x[1]['delta'])[0]

    report[sc]["example"] = _data[max_xx]["example"]
    report[sc]["value"] = _data[max_xx]["value"]
    report[sc]["disproportionality"] = _data[max_xx]["disproportionality"]
    report[sc]["delta"] = _data[max_xx]["delta"]

    report[sc]['by-state'] = conflicts_by_state

report


Find conflict coverage

In [None]:
coverage = dict()
total_sum = sum(by_state[xx]["total"] for xx in states)

for sc in partisan_metrics[1:]:
    coverage[sc] = {
        "count": 0,
        "total": 0,
        "combos": 0
    }

    for combo, _data in conflicts["with_proportionality"][sc].items():
        xx, chamber, ensemble = combo
        coverage[sc]["total"] = total_sum
        # coverage[sc]["total"] = by_state[xx]["total"]
        coverage[sc]["count"] += _data["count"]
        if _data["count"] > 0:
            coverage[sc]["combos"] += 1

coverage

Report the percentage of conflicts by metric and state.

In [None]:
print("Conflicts With Proportionality")
print("==============================")
print()

precision: int = 4
for sc, _data in report.items():
    name: str = _data["example"]
    sample: float = _data["value"]
    disp: float = _data["disproportionality"]

    by_metric_conflicts: int = coverage[sc]["count"]
    total_plans: int = coverage[sc]["total"]
    combos: int = coverage[sc]["combos"]

    by_state_output = [(state, f"{round(val1, precision):.2%}", f"{round(val2, precision):.2%}") for state, val1, val2 in _data['by-state']]


    print(f"{sc}:")
    print(
        f"  {by_metric_conflicts:,} of {total_plans:,} plans conflict ({by_metric_conflicts / total_plans:.1%} across {combos} of 231 = 7 x 3 x 11 state, chamber, and ensemble combinations."
    )
    if sc != 'declination':
        print(f"  Example: Map ({name}) has {sample:.2%} vs. disproportionality {disp:.2%}.")
    else:
        print(f"  Example: Map ({name}) has {sample:.4f} degrees vs. disproportionality {disp:.2%}.")
    print("  State, Deviation from 50-50 Balance, Conflict Rate")
    print(f"  {by_state_output}")
    print()

print()
print(
    f"Where a 'conflict' is when the sign of the metric is the *opposite* of the sign for simple 'disproportionality'."
)

Report conflicts by metric and state sorted by partisan balance

In [None]:
precision: int = 4
for sc, _data in report.items():
    x = [(state, round(val1, precision), round(val2, precision)) for state, val1, val2 in _data['by-state']]

    formatted_items = [f"({state}, {float(val1):>6.2%}, {float(val2):>7.2%})" for state, val1, val2 in x]
    print(f"{sc:>30}: {', '.join(formatted_items)}")



Report other conflicts

In [None]:
report2: Dict[str, Dict[Any, Any]] = dict()

for c in categories[1:]:
    report2[c] = dict()
    for sc, _data in conflicts_summary[c].items():
        report2[c][sc] = dict()

        first_xx: str = list(_data.keys())[0]
        report2[c][sc]["total"] = by_state[first_xx]["total"]

        conflicts_by_state = [(xx, partisan_balance(by_state[xx]['Vf']), _info['conflict-rate']) for xx, _info in _data.items()]
        conflicts_by_state.sort(key=lambda x: x[1])

        report2[c][sc]["example"] = _data[max_xx]["example"]
        report2[c][sc]['by-state'] = conflicts_by_state

In [None]:
titles: Dict[str, str] = {
    "with_proportionality": "N/A", 
    "within_symmetry": "Conflicts Within Symmetry",
    "within_packing_cracking": "Conflicts Within Packing & Cracking",
    "between_symmetry_and_packing_cracking": "Conflicts Between Symmetry & Packing & Cracking"
}

for c in categories[1:]:
    print(titles[c])
    print("=" * len(titles[c]))

    for sc, _data in report2[c].items():
        by_state_output = [(state, f"{round(val1, precision):.2%}", f"{round(val2, precision):.2%}") for state, val1, val2 in _data['by-state']]

        print(f"Subcategory: {sc}")
        print(f"Example: {_data['example']}")
        # print(f"Total: {_data['total']}")
        print("State, Deviation from 50-50 Balance, Conflict Rate")
        print(f"{by_state_output}")
        print()

    print()
