This notebook shows you how to use the scores dataframe.

First, load the .parquet file into a pandas dataframe.

In [None]:
import pandas as pd

from helpers import load_scores 

scores_path: str = "~/local/beta-ensembles/prepackaged/scores/scores.parquet"
scores_df: pd.DataFrame = load_scores(scores_path)
scores_df.head()

Unnamed: 0,map,estimated_vote_pct,pr_deviation,estimated_seats,fptp_seats,disproportionality,efficiency_gap_wasted_votes,efficiency_gap_statewide,efficiency_gap,seats_bias,...,proportional_coalitions,minority,county_splitting,district_splitting,counties_split,county_splits,splitting,state,chamber,ensemble
0,2500,0.4837,0.0762,11.8678,11,0.0598,0.1075,0.0744,0.0435,0.0299,...,13,68,1.5858,1.5834,35,64,2,FL,congress,A0
1,5000,0.4837,0.0827,11.6842,11,0.0664,0.0973,0.0744,0.05,0.0415,...,13,75,1.6487,1.6488,38,71,0,FL,congress,A0
2,7500,0.4837,0.0646,12.1917,12,0.0482,0.0673,0.0387,0.0319,0.0232,...,13,70,1.602,1.6233,39,70,0,FL,congress,A0
3,10000,0.4837,0.0647,12.1873,12,0.0484,0.0745,0.0387,0.032,0.0271,...,13,71,1.6085,1.5958,33,67,0,FL,congress,A0
4,12500,0.4837,0.0408,12.8575,13,0.0245,0.0399,0.003,0.0081,0.0042,...,13,74,1.5676,1.5361,34,60,8,FL,congress,A0


Then you can work with the dataframe several different ways.

You iterate over the rows of the dataframe checking the values of the columns.

In [4]:
for index, row in scores_df.iterrows():
    print(f"{row['state']} / {row['chamber']} / {row['ensemble']} -- {row['estimated_seats']:.2f} seats")

    if index > 10: # type: ignore
        break

FL / congress / A0 -- 11.87 seats
FL / congress / A0 -- 11.68 seats
FL / congress / A0 -- 12.19 seats
FL / congress / A0 -- 12.19 seats
FL / congress / A0 -- 12.86 seats
FL / congress / A0 -- 12.55 seats
FL / congress / A0 -- 12.17 seats
FL / congress / A0 -- 11.96 seats
FL / congress / A0 -- 12.85 seats
FL / congress / A0 -- 12.57 seats
FL / congress / A0 -- 11.56 seats
FL / congress / A0 -- 11.74 seats


You can get the values for one metric for one state, chamber, and ensemble combination.

In [5]:
import numpy as np

from helpers import arr_from_scores

xx: str = "NC"
chamber: str = "congress"
ensemble: str = "A0"
metric: str = "estimated_seats"

arr: np.ndarray = arr_from_scores(xx, chamber, ensemble, metric, scores_df)
print(f"{xx} / {chamber} / {ensemble} -- {arr.shape}")
arr

NC / congress / A0 -- (20000,)


array([5.6421, 6.7154, 6.0959, ..., 6.8823, 7.2029, 6.3551])

You can subset the scores dataframe for a specific state, chamber, and ensemble combination.

In [6]:
from helpers import df_from_scores

xx: str = "NC"
chamber: str = "congress"
ensemble: str = "A0"

df: pd.DataFrame = df_from_scores(xx, chamber, ensemble, scores_df)
df.head()

Unnamed: 0,map,estimated_vote_pct,pr_deviation,estimated_seats,fptp_seats,disproportionality,efficiency_gap_wasted_votes,efficiency_gap_statewide,efficiency_gap,seats_bias,...,proportional_coalitions,minority,county_splitting,district_splitting,counties_split,county_splits,splitting,state,chamber,ensemble
2699989,2500,0.4943,0.097,5.6421,5,0.0913,0.1351,0.1315,0.0857,0.0797,...,5,62,1.3703,2.3252,45,65,0,NC,congress,A0
2699990,5000,0.4943,0.0203,6.7154,6,0.0147,0.0554,0.0601,0.009,-0.0013,...,5,62,1.3505,2.4011,53,70,2,NC,congress,A0
2699991,7500,0.4943,0.0646,6.0959,6,0.0589,0.0564,0.0601,0.0532,0.0379,...,5,56,1.3649,2.327,47,63,0,NC,congress,A0
2699992,10000,0.4943,0.0512,6.2831,7,0.0455,-0.0077,-0.0113,0.0399,0.0258,...,5,56,1.3772,2.2562,42,60,0,NC,congress,A0
2699993,12500,0.4943,0.0353,6.5055,7,0.0297,-0.0056,-0.0113,0.024,0.0105,...,5,61,1.35,2.3725,50,67,2,NC,congress,A0


You can loop over the datasets in the dataframe, using predefined constants.

In [7]:
from constants import states, chambers, ensembles, metrics, aggregates

print("States:", states)
print("Chambers:", chambers)
print("Ensembles:", ensembles)
print("Metrics:", metrics)
print("Aggregates:", aggregates)

States: ['FL', 'IL', 'MI', 'NC', 'NY', 'OH', 'WI']
Chambers: ['congress', 'upper', 'lower']
Ensembles: ['A0', 'A1', 'A2', 'A3', 'A4', 'Pop-', 'Pop+', 'B', 'C', 'D', 'Rev*', 'Rev', 'R25', 'R50', 'R75', 'R100']
Metrics: ['population_deviation', 'estimated_vote_pct', 'estimated_seats', 'pr_deviation', 'disproportionality', 'fptp_seats', 'efficiency_gap_wasted_votes', 'efficiency_gap_statewide', 'efficiency_gap', 'seats_bias', 'votes_bias', 'geometric_seats_bias', 'declination', 'mean_median_statewide', 'mean_median_average_district', 'turnout_bias', 'lopsided_outcomes', 'proportionality', 'competitive_district_count', 'competitive_districts', 'average_margin', 'responsiveness', 'responsive_districts', 'overall_responsiveness', 'competitiveness', 'mmd_black', 'mmd_hispanic', 'mmd_coalition', 'opportunity_districts', 'proportional_opportunities', 'coalition_districts', 'proportional_coalitions', 'minority', 'cut_score', 'reock', 'polsby_popper', 'population_compactness', 'compactness', 'cou