The purpose of this notebook is to identify the frequency of conflicts between metrics that purport to measure the degree to which a plan favors one party or another.

In [1]:
from typing import List, Dict, Any, Set

import os
import pandas as pd
from collections import defaultdict

from rdametrics import states, chambers, ensembles

Load the scores dataframe

In [2]:
scores_path: str = "~/local/beta-ensembles/prepackaged/scores/scores.parquet"
scores_df = pd.read_parquet(os.path.expanduser(scores_path))

Helper code

In [6]:
import pandas as pd

ensembles = [e for e in ensembles if e not in ["A1", "A2", "A3", "A4", "Rev*"]]

partisan_bias = {
    "partisan_advantage": ["disproportionality", "efficiency_gap"],
    "partisan_symmetry": ["seats_bias", "votes_bias", "geometric_seats_bias"],
    "packing_cracking": ["mean_median_average_district", "lopsided_outcomes", "declination"],
}
metrics: List[str] = [m for ms in partisan_bias.values() for m in ms]

categories: List[str] = list(partisan_bias.keys())

def same_sign(a, b):
    if a == 0.0 or b == 0.0:
        return True
    
    if pd.isna(a) or pd.isna(b):
        return True
    
    assert isinstance(a, (int, float)) and isinstance(b, (int, float)), f"Unexpected types: {type(a)}, {type(b)}"

    return a * b > 0

def same_signs(values):
    # Filter out zero values and NaN values
    non_zero_defined_values = [x for x in values if x != 0 and not pd.isna(x)]
    
    # If no valid values or only one valid value, return True
    if len(non_zero_defined_values) <= 1:
        return True
    
    # Check if all valid values have the same sign
    all_positive = all(x > 0 for x in non_zero_defined_values)
    all_negative = all(x < 0 for x in non_zero_defined_values)
    
    return all_positive or all_negative

# def same_signs(list1, list2):
#     # Combine both lists and filter out zero values and NaN values
#     all_values = list1 + list2
#     non_zero_defined_values = [x for x in all_values if x != 0 and not pd.isna(x)]
    
#     # If no valid values or only one valid value, return True
#     if len(non_zero_defined_values) <= 1:
#         return True
    
#     # Check if all valid values have the same sign
#     all_positive = all(x > 0 for x in non_zero_defined_values)
#     all_negative = all(x < 0 for x in non_zero_defined_values)
    
#     return all_positive or all_negative

def partisan_balance(Vf: float) -> float:
    balance: float = Vf - 0.5
    if Vf < 0.5:
        balance = -balance
    return balance

def metric_pair(string_tuple):
    return tuple(sorted(string_tuple))

Setup counters for various kinds of conflicts.

In [10]:
from itertools import combinations
import copy
from typing import defaultdict

total_plans: int = 0
by_state: Dict[str, Any] = dict()
for xx in states:
    by_state[xx] = {
        "Vf": None,
        "total": 0
    }

conflicts = dict()
_ledger = {
    "counts": {xx: 0 for xx in states},
    "combos": set(),
    "example": None,
    "values": None,
    "delta": None,
}

cross_categories = list(combinations(categories, 2))
for c in categories + cross_categories:
    conflicts[c] = copy.deepcopy(_ledger)

pairwise_metric_conflicts: Dict[tuple, int] = defaultdict(int)

# conflicts

Count instances where the scores conflict. Keep track of state / chamber / ensemble combinations.

In [None]:
for index, row in scores_df.iterrows():
    if row["ensemble"] not in ensembles:
        continue

    xx, chamber, ensemble = (row["state"], row["chamber"], row["ensemble"])
    combo = (xx, chamber, ensemble)
    plan = f"{row['map']:09}"

    total_plans += 1

    by_state[xx]["total"] += 1
    if by_state[xx]["Vf"] is None:
        by_state[xx]["Vf"] = row["estimated_vote_pct"]

    consistent = {
        "partisan_advantage": True,
        "packing_cracking": True,
        "partisan_symmetry": True
    }

    # Check pairwise metric conflicts

    for m1, m2 in combinations(metrics, 2):
        v1 = row[m1]
        v2 = row[m2]
        if not same_sign(v1, v2):
            pairwise_metric_conflicts[metric_pair((m1, m2))] += 1

    # # Check consistency within each category

    # for c in categories:
    #     values = [row[m] for m in partisan_bias[c]]
    #     delta = abs(max(values) - min(values))
    #     if not same_signs(values):
    #         consistent[c] = False
    #         conflicts[c]["counts"][xx] += 1
    #         conflicts[c]["combos"].add(combo)
    #         if conflicts[c]["example"] is None or delta > conflicts[c]["delta"]:
    #             conflicts[c]["example"] = (combo, plan)
    #             conflicts[c]["values"] = values
    #             conflicts[c]["delta"] = delta

    # # Compare consistency across categories
    
    # for cat1, cat2 in cross_categories:
    #     if consistent[cat1] and consistent[cat2]:
    #         v1 = row[partisan_bias[cat1][0]]
    #         v2 = row[partisan_bias[cat2][0]]
    #         if not same_sign(v1, v2):
    #             conflicts[(cat1, cat2)]["counts"][xx] += 1
    #             conflicts[(cat1, cat2)]["combos"].add(combo)
    #             if conflicts[(cat1, cat2)]["example"] is None:
    #                 conflicts[(cat1, cat2)]["example"] = (combo, plan)

# (by_state, conflicts)
# pairwise_metric_conflicts

Make the matrix data structure

In [24]:
# rows = list()
# headers = [""] + metrics
line: str =",".join(metrics)
print(f"{line}")
# rows.append(headers)
for m1 in metrics:
    row = [m1]
    for m2 in metrics:
        if m1 == m2:
            row.append("0.00%")
        else:
            cell: float = pairwise_metric_conflicts[metric_pair((m1, m2))] / total_plans
            row.append(f"{cell:.2%}")
    # rows.append(row)
    line = ",".join(row)
    print(f"{line}")

# rows


disproportionality,efficiency_gap,seats_bias,votes_bias,geometric_seats_bias,mean_median_average_district,lopsided_outcomes,declination
disproportionality,0.00%,14.73%,32.70%,32.67%,33.46%,33.90%,44.20%,28.35%
efficiency_gap,14.73%,0.00%,17.93%,17.90%,18.65%,19.56%,29.39%,14.25%
seats_bias,32.70%,17.93%,0.00%,0.00%,0.93%,4.59%,12.09%,3.54%
votes_bias,32.67%,17.90%,0.00%,0.00%,0.90%,4.56%,12.06%,3.50%
geometric_seats_bias,33.46%,18.65%,0.93%,0.90%,0.00%,5.21%,11.87%,4.00%
mean_median_average_district,33.90%,19.56%,4.59%,4.56%,5.21%,0.00%,11.28%,4.86%
lopsided_outcomes,44.20%,29.39%,12.09%,12.06%,11.87%,11.28%,0.00%,14.22%
declination,28.35%,14.25%,3.54%,3.50%,4.00%,4.86%,14.22%,0.00%


Print the pairwise conflicts

Aggregate the results into a reportable format.

In [None]:
report = dict()

for c, _ledger in conflicts.items():
    report[c] = dict()

    diffs_by_state = [(xx, partisan_balance(by_state[xx]['Vf']), count / by_state[xx]['total']) for xx, count in _ledger["counts"].items()]
    diffs_by_state.sort(key=lambda x: x[1])

    diffs = [(xx, f"{Vf:.2%}", f"{rate:.2%}") for xx, Vf, rate in diffs_by_state]

    states_with_conflicts: Set[str] = set()
    for combo in _ledger["combos"]:
        xx = combo[0]
        states_with_conflicts.add(xx)

    report[c] = {
        "conflict-rate": diffs,
        "states": states_with_conflicts,
        "combos": len(_ledger["combos"]),
        "example": _ledger["example"],
        "values": _ledger["values"],
        "delta": _ledger["delta"],
    }

# report

Generate the report.

In [None]:
for _category, _ledger in report.items():
    print(f"Category: {_category}")
    print(f"  {len(_ledger['states'])} states ({_ledger['states']}) with conflicts / {_ledger['combos']} conflicting combos")
    print(f"  State, Deviation from 50-50 Balance, Conflict Rate")
    print(f"  { _ledger["conflict-rate"]}")
    if _ledger["example"] is not None:
        print(f"  Example conflict: {_ledger['example']}")
        print(f"  Values = {_ledger['values']}  (delta = {_ledger['delta']})")
    print()