In [1]:
import os
import glob
import json
import pandas as pd

In [39]:
levels = ('HD',)
level_to_enacted_data = {
    'CD': 'enacted_race_data/cd113_vap_with_race/',
    'SD': 'enacted_race_data/leg2012_upper_vap_with_race/',
    'HD': 'enacted_race_data/leg2012_lower_vap_with_race/'
}

In [40]:
state_order = 'AL AZ AR CA DE FL GA IL LA MD MS NV NJ NM NY NC SC TN TX VA'.split()

In [41]:
bvap_over = 40

In [42]:
stats = {}
for level in levels:
    for filename in glob.glob(f'results/*__{level}.jld.json'):
        postal = filename.split('/')[-1].split('_')[0]
    
        enacted_filename = glob.glob(
            os.path.join(level_to_enacted_data[level], f'*{postal}*')
        )[0]
        enacted_df = pd.read_csv(enacted_filename)
        n_districts = len(enacted_df[enacted_df['district'] != 'ZZ'])
        enacted_maj_black = len(enacted_df[enacted_df['black_pct'] > bvap_over])
        state_bvap = round(100 * enacted_df['black'].sum() / enacted_df['total'].sum(), 3)
        with open(filename) as f:
            ensemble_data = json.load(f)
            ensemble_maj_black_min = ensemble_data[f'bvap_over_{bvap_over}']['min']
            ensemble_maj_black_max = ensemble_data[f'bvap_over_{bvap_over}']['max']
            ensemble_maj_black_median = ensemble_data[f'bvap_over_{bvap_over}']['median']
        stats[postal] = {
            'bvap': state_bvap,
            'n_districts': n_districts,
            'enacted_maj_black': enacted_maj_black,
            'ensemble_maj_black_min': ensemble_maj_black_min,
            'ensemble_maj_black_max': ensemble_maj_black_max,
            'ensemble_maj_black_median': ensemble_maj_black_median
        }

In [43]:
stats.keys()

dict_keys(['DE', 'LA', 'NY', 'NC', 'VA', 'SC', 'CA', 'MS', 'GA', 'AR', 'IL', 'NM', 'TN', 'AL', 'NV', 'TX', 'NJ', 'FL', 'AZ'])

In [44]:
out_data = ''
for idx, postal in enumerate(state_order):
    if postal not in stats:
        continue
    bvap = stats[postal]['bvap']
    n = stats[postal]['n_districts']
    enac = stats[postal]['enacted_maj_black']
    ens_min = stats[postal]['ensemble_maj_black_min']
    ens_max = stats[postal]['ensemble_maj_black_max']
    ens_med = stats[postal]['ensemble_maj_black_median']
    out_data += f'{idx + 1}/{postal}/{bvap}/{n}/{enac}/{ens_min}/{ens_max}/{ens_med},'

In [45]:
out_data

'1/AL/25.129/105/26/14/28/21.0,2/AZ/3.836/30/0/0/0/0.0,3/AR/14.31/100/13/5/14/10.0,4/CA/5.776/80/0/0/1/0.0,5/DE/19.9/41/6/1/7/3.0,6/FL/14.436/120/13/1/10/5.0,7/GA/29.147/180/52/33/52/43.0,8/IL/13.769/119/18/8/17/12.0,9/LA/30.048/106/30/16/37/26.0,11/MS/34.701/122/43/33/52/43.0,12/NV/7.789/42/0/0/0/0.0,13/NJ/13.134/40/2/1/7/4.0,14/NM/2.014/70/0/0/0/0.0,15/NY/15.231/150/20/8/18/13.0,16/NC/20.644/120/25/4/19/12.0,17/SC/26.502/124/36/18/35/26.0,18/TN/15.577/99/13/7/15/11.0,19/TX/11.619/150/12/0/7/3.0,20/VA/18.737/100/12/4/15/10.0,'

In [47]:
stats

{'NC': {'bvap': 20.644,
  'n_districts': 13,
  'enacted_maj_black': 1,
  'ensemble_maj_black_min': 0,
  'ensemble_maj_black_max': 0,
  'ensemble_maj_black_median': 0.0},
 'FL': {'bvap': 14.436,
  'n_districts': 27,
  'enacted_maj_black': 1,
  'ensemble_maj_black_min': 0,
  'ensemble_maj_black_max': 0,
  'ensemble_maj_black_median': 0.0},
 'LA': {'bvap': 30.048,
  'n_districts': 6,
  'enacted_maj_black': 1,
  'ensemble_maj_black_min': 0,
  'ensemble_maj_black_max': 1,
  'ensemble_maj_black_median': 0.0},
 'MD': {'bvap': 28.414,
  'n_districts': 8,
  'enacted_maj_black': 2,
  'ensemble_maj_black_min': 0,
  'ensemble_maj_black_max': 2,
  'ensemble_maj_black_median': 1.0},
 'GA': {'bvap': 29.147,
  'n_districts': 14,
  'enacted_maj_black': 3,
  'ensemble_maj_black_min': 0,
  'ensemble_maj_black_max': 3,
  'ensemble_maj_black_median': 1.0},
 'MS': {'bvap': 34.701,
  'n_districts': 4,
  'enacted_maj_black': 1,
  'ensemble_maj_black_min': 0,
  'ensemble_maj_black_max': 1,
  'ensemble_maj_blac