# Minority Rule data preparation
Joining Census Bureau demographic data with election data for future mapping.

In [262]:
import pandas as pd

## Counties
### County Demographics
Source: [2020 Census redistricting data](https://www.census.gov/programs-surveys/decennial-census/about/rdo/summary-files.html)


In [263]:
abbr_df = pd.read_csv('abbr.tsv', delimiter='\t', usecols=['State', 'Code'], index_col='Code')
abbr_lookup = abbr_df.to_dict()['State']
abbr_lookup

{'AL': 'Alabama',
 'AK': 'Alaska',
 'AZ': 'Arizona',
 'AR': 'Arkansas',
 'CA': 'California',
 'CO': 'Colorado',
 'CT': 'Connecticut',
 'DE': 'Delaware',
 'DC': 'District of Columbia',
 'FL': 'Florida',
 'GA': 'Georgia',
 'HI': 'Hawaii',
 'ID': 'Idaho',
 'IL': 'Illinois',
 'IN': 'Indiana',
 'IA': 'Iowa',
 'KS': 'Kansas',
 'KY': 'Kentucky',
 'LA': 'Louisiana',
 'ME': 'Maine',
 'MD': 'Maryland',
 'MA': 'Massachusetts',
 'MI': 'Michigan',
 'MN': 'Minnesota',
 'MS': 'Mississippi',
 'MO': 'Missouri',
 'MT': 'Montana',
 'NE': 'Nebraska',
 'NV': 'Nevada',
 'NH': 'New Hampshire',
 'NJ': 'New Jersey',
 'NM': 'New Mexico',
 'NY': 'New York',
 'NC': 'North Carolina',
 'ND': 'North Dakota',
 'OH': 'Ohio',
 'OK': 'Oklahoma',
 'OR': 'Oregon',
 'PA': 'Pennsylvania',
 'RI': 'Rhode Island',
 'SC': 'South Carolina',
 'SD': 'South Dakota',
 'TN': 'Tennessee',
 'TX': 'Texas',
 'UT': 'Utah',
 'VT': 'Vermont',
 'VA': 'Virginia',
 'WA': 'Washington',
 'WV': 'West Virginia',
 'WI': 'Wisconsin',
 'WY': 'Wyoming

In [264]:
# FIPS codes and county names to add FIPS code to demog data later on
# https://www.nrcs.usda.gov/wps/portal/nrcs/detail/national/home/?cid=nrcs143_013697

fips_df = pd.read_csv('county_fips.csv', dtype={'FIPS': str})
fips_df['state_name'] = fips_df['State'].map(abbr_lookup)
fips_df['full_name'] = fips_df['Name'] + ', ' + fips_df['state_name']
fips_df = fips_df.set_index('full_name')
# fips_df.to_csv('county_fips.csv')
fips_df

Unnamed: 0_level_0,FIPS,Name,State,state_name
full_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"Autauga, Alabama",01001,Autauga,AL,Alabama
"Baldwin, Alabama",01003,Baldwin,AL,Alabama
"Barbour, Alabama",01005,Barbour,AL,Alabama
"Bibb, Alabama",01007,Bibb,AL,Alabama
"Blount, Alabama",01009,Blount,AL,Alabama
...,...,...,...,...
,72151,Yabucoa,PR,
,72153,Yauco,PR,
,78010,St. Croix,VI,
,78020,St. John,VI,


In [265]:
# census 2020 redistricting data
county_demog_df = pd.read_csv(
    'county-demog.csv',
                              index_col='Label (Grouping)',
    skip_blank_lines=True,
    thousands=','
).transpose()
county_demog_df.index.name = 'county'
# remove county and parish to standardize across states
county_demog_df.index = county_demog_df.index.map(lambda x: x.replace(' County,',',').replace(' Parish,',','))
# combine demographic data with FIPS codes
county_demog_df = county_demog_df.join(fips_df).set_index('FIPS')
county_demog_df

Unnamed: 0_level_0,Total:,Population of one race:,White alone,Black or African American alone,American Indian and Alaska Native alone,Asian alone,Native Hawaiian and Other Pacific Islander alone,Some Other Race alone,Population of two or more races:,Population of two races:,...,White; Black or African American; American Indian and Alaska Native; Asian; Some Other Race,White; Black or African American; American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander; Some Other Race,White; Black or African American; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,White; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Black or African American; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Population of six races:,White; Black or African American; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Name,State,state_name
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
45001,24295,23408,16877,6205,44,73,11,198,887,847,...,0,0,0,0,0,0,0,Abbeville,SC,South Carolina
22001,57576,55603,44480,9989,152,163,2,817,1973,1846,...,0,0,0,0,0,0,0,Acadia,LA,Louisiana
51001,33413,31655,20261,8670,297,255,4,2168,1758,1658,...,5,1,0,0,0,0,0,Accomack,VA,Virginia
16001,494967,453110,410263,8058,3274,13929,1318,16268,41857,39497,...,10,0,0,0,1,13,13,Ada,ID,Idaho
19001,7496,7262,7149,46,19,23,2,23,234,219,...,0,0,0,0,0,0,0,Adair,IA,Iowa
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
04027,203881,152053,90318,4099,3522,2587,283,51244,51828,50722,...,18,0,2,0,0,1,1,Yuma,AZ,Arizona
08125,9988,8937,7437,22,73,34,6,1365,1051,1018,...,0,0,0,0,0,0,0,Yuma,CO,Colorado
48505,13889,7570,5421,18,67,16,0,2048,6319,6278,...,1,0,1,0,0,0,0,Zapata,TX,Texas
48507,9670,6344,4598,89,74,19,1,1563,3326,3306,...,0,0,1,0,0,0,0,Zavala,TX,Texas


### County 2020 Presidential Election Results
Source: [County Presidential Election Returns 2000-2020](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ)

Dataset:
```
MIT Election Data and Science Lab, 2018, "County Presidential Election Returns 2000-2020",
 https://doi.org/10.7910/DVN/VOQCHQ, Harvard Dataverse, V10,
 UNF:6:pVAMya52q7VM1Pl7EZMW0Q== [fileUNF]
```

File:
```
MIT Election Data and Science Lab, 2018, "County Presidential Election Returns 2000-2020",
 https://doi.org/10.7910/DVN/VOQCHQ, Harvard Dataverse, V10;
 countypres_2000-2020.tab [fileName], UNF:6:pVAMya52q7VM1Pl7EZMW0Q== [fileUNF]
```

In [266]:
county_pres_df = pd.read_csv(
    'countypres_2000-2020.tab',
    index_col=['county_fips'],
    delimiter='\t',
    dtype={'county_fips': str, 'version': str }
)

# limit to major parties for now
county_pres_df = county_pres_df[
    (county_pres_df['year'] == 2020) &
    (county_pres_df['party'].isin(('DEMOCRAT', 'REPUBLICAN')))
    ]

total_series = county_pres_df['totalvotes'].groupby(county_pres_df.index).max()

county_pres_df = county_pres_df.pivot_table(
    index=county_pres_df.index,
    columns='party',
    values='candidatevotes'
)
county_pres_df['TOTAL'] = total_series

county_pres_df['dem_percent'] = 100 * county_pres_df['DEMOCRAT'] / county_pres_df['TOTAL']
county_pres_df['rep_percent'] = 100 * county_pres_df['REPUBLICAN'] / county_pres_df['TOTAL']
county_pres_df['dem_lead'] = 100 * county_pres_df['DEMOCRAT'] - county_pres_df['REPUBLICAN']
county_pres_df['dem_percent_lead'] = county_pres_df['dem_percent'] - county_pres_df['rep_percent']

county_pres_df.to_csv('county_results.csv')
county_pres_df

party,DEMOCRAT,REPUBLICAN,TOTAL,dem_percent,rep_percent,dem_lead,dem_percent_lead
county_fips,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
01001,7503.0,19838.0,27770,27.018365,71.436802,730462.0,-44.418437
01003,24578.0,83544.0,109679,22.409030,76.171373,2374256.0,-53.762343
01005,4816.0,5622.0,10518,45.788173,53.451226,475978.0,-7.663054
01007,1986.0,7525.0,9595,20.698280,78.426264,191075.0,-57.727983
01009,2640.0,24711.0,27588,9.569378,89.571553,239289.0,-80.002175
...,...,...,...,...,...,...,...
56037,3823.0,12229.0,16698,22.894957,73.236316,370071.0,-50.341358
56039,9848.0,4341.0,14787,66.599040,29.356868,980459.0,37.242172
56041,1591.0,7496.0,9459,16.819960,79.247278,151604.0,-62.427318
56043,651.0,3245.0,4032,16.145833,80.481151,61855.0,-64.335317


### Combine to one table

In [267]:
county_full_df = county_pres_df.join(county_demog_df)
county_full_df.index.name = 'FIPS'
county_full_df.to_csv('county-combined.csv')
county_full_df

Unnamed: 0_level_0,DEMOCRAT,REPUBLICAN,TOTAL,dem_percent,rep_percent,dem_lead,dem_percent_lead,Total:,Population of one race:,White alone,...,White; Black or African American; American Indian and Alaska Native; Asian; Some Other Race,White; Black or African American; American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander; Some Other Race,White; Black or African American; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,White; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Black or African American; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Population of six races:,White; Black or African American; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Name,State,state_name
FIPS,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
01001,7503.0,19838.0,27770,27.018365,71.436802,730462.0,-44.418437,58805.0,55648.0,42160.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Autauga,AL,Alabama
01003,24578.0,83544.0,109679,22.409030,76.171373,2374256.0,-53.762343,231767.0,216743.0,189399.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Baldwin,AL,Alabama
01005,4816.0,5622.0,10518,45.788173,53.451226,475978.0,-7.663054,25223.0,24523.0,11317.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Barbour,AL,Alabama
01007,1986.0,7525.0,9595,20.698280,78.426264,191075.0,-57.727983,22293.0,21534.0,16555.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Bibb,AL,Alabama
01009,2640.0,24711.0,27588,9.569378,89.571553,239289.0,-80.002175,59134.0,55478.0,50663.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,Blount,AL,Alabama
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56037,3823.0,12229.0,16698,22.894957,73.236316,370071.0,-50.341358,42272.0,38061.0,34389.0,...,0.0,0.0,0.0,2.0,0.0,0.0,0.0,Sweetwater,WY,Wyoming
56039,9848.0,4341.0,14787,66.599040,29.356868,980459.0,37.242172,23331.0,21179.0,19202.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Teton,WY,Wyoming
56041,1591.0,7496.0,9459,16.819960,79.247278,151604.0,-62.427318,20450.0,19269.0,18007.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,Uinta,WY,Wyoming
56043,651.0,3245.0,4032,16.145833,80.481151,61855.0,-64.335317,7685.0,7113.0,6600.0,...,0.0,0.0,0.0,0.0,0.0,2.0,2.0,Washakie,WY,Wyoming


## States
### State Demographics
Source: [2020 Census redistricting data](https://www.census.gov/programs-surveys/decennial-census/about/rdo/summary-files.html)



In [268]:
state_demog_df = pd.read_csv(
    'state-demog.csv',
    index_col='Label (Grouping)',
    skip_blank_lines=True,
    thousands=','
).transpose()
state_demog_df.index.name = 'state'
state_demog_df.index = state_demog_df.index.str.upper()
state_demog_df

Label (Grouping),Total:,Population of one race:,White alone,Black or African American alone,American Indian and Alaska Native alone,Asian alone,Native Hawaiian and Other Pacific Islander alone,Some Other Race alone,Population of two or more races:,Population of two races:,...,American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Population of five races:,White; Black or African American; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander,White; Black or African American; American Indian and Alaska Native; Asian; Some Other Race,White; Black or African American; American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander; Some Other Race,White; Black or African American; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,White; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Black or African American; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Population of six races:,White; Black or African American; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ALABAMA,5024279,4767326,3220452,1296162,33625,76660,2984,137443,256953,243473,...,4,187,89,78,13,0,5,2,9,9
ALASKA,733391,643867,435392,21898,111575,44032,12698,18272,89524,81221,...,11,97,44,29,4,4,14,2,13,13
ARIZONA,7151502,6154696,4322337,339150,319512,257430,16397,899870,996806,948897,...,18,440,174,215,12,7,28,4,70,70
ARKANSAS,3011524,2797949,2114512,453783,27177,51839,14533,136105,213575,203299,...,0,118,60,38,2,15,2,1,12,12
CALIFORNIA,39538223,33777988,16296122,2237044,631016,6085947,157263,8370596,5760235,5380042,...,109,4107,1345,2002,236,208,284,32,450,450
COLORADO,5773714,5066044,4082927,234828,74129,199827,10287,464046,707670,665645,...,10,423,195,152,30,10,31,5,48,48
CONNECTICUT,3605944,3273040,2395128,388675,16051,172455,1598,299133,332904,313228,...,1,185,76,87,8,2,11,1,25,25
DELAWARE,989948,913430,597763,218899,5148,42699,412,48509,76518,71461,...,4,42,11,19,0,12,0,0,5,5
DISTRICT OF COLUMBIA,689545,633468,273194,285810,3193,33545,432,37294,56077,51147,...,0,51,20,18,0,4,4,5,4,4
FLORIDA,21538187,17986115,12422961,3246381,94795,643682,14014,1564282,3552072,3428042,...,23,1263,353,719,90,40,25,36,132,132


## State 2020 Presidential Election Results
Source: [U.S. President 1976–2020](https://dataverse.harvard.edu/file.xhtml?fileId=4299753&version=6.0)

Dataset
```
 MIT Election Data and Science Lab, 2017, "U.S. President 1976–2020",
  https://doi.org/10.7910/DVN/42MVDX, Harvard Dataverse, V6,
  UNF:6:4KoNz9KgTkXy0ZBxJ9ZkOw== [fileUNF]
```

File
```
 MIT Election Data and Science Lab, 2017, "U.S. President 1976–2020",
  https://doi.org/10.7910/DVN/42MVDX, Harvard Dataverse, V6;
  1976-2020-president.tab [fileName], UNF:6:4KoNz9KgTkXy0ZBxJ9ZkOw== [fileUNF]
```


In [269]:
state_elect_df = pd.read_csv('1976-2020-president.tab', delimiter='\t', index_col='state')
state_elect_df = state_elect_df[
    (state_elect_df['year'] == 2020) & (state_elect_df['party_simplified'].isin(['DEMOCRAT', 'REPUBLICAN']))]

total_series = state_elect_df['totalvotes'].groupby(state_elect_df.index).max()

state_elect_df = state_elect_df.pivot_table(index=state_elect_df.index, columns='party_simplified', values='candidatevotes')
state_elect_df['TOTAL'] = total_series
state_elect_df['dem_percent'] = 100 * state_elect_df['DEMOCRAT'] / state_elect_df['TOTAL']
state_elect_df['rep_percent'] = 100 * state_elect_df['REPUBLICAN'] / state_elect_df['TOTAL']
state_elect_df['dem_lead'] = 100 * state_elect_df['DEMOCRAT'] - state_elect_df['REPUBLICAN']
state_elect_df['dem_percent_lead'] = state_elect_df['dem_percent'] - state_elect_df['rep_percent']
state_elect_df.to_csv('state_results.csv')
state_elect_df

party_simplified,DEMOCRAT,REPUBLICAN,TOTAL,dem_percent,rep_percent,dem_lead,dem_percent_lead
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
ALABAMA,849624,1441170,2323282,36.56999,62.031643,83521230,-25.461653
ALASKA,153778,189951,359530,42.771952,52.833143,15187849,-10.061191
ARIZONA,1672143,1661686,3387326,49.364691,49.055981,165552614,0.30871
ARKANSAS,423932,760647,1219069,34.775062,62.39573,41632553,-27.620668
CALIFORNIA,11110250,6006429,17500881,63.483947,34.320724,1105018571,29.163223
COLORADO,1804352,1364607,3279980,55.011067,41.604126,179070593,13.406942
CONNECTICUT,1080831,714717,1823857,59.260732,39.187118,107368383,20.073613
DELAWARE,296268,200603,504346,58.743006,39.774877,29426197,18.968129
DISTRICT OF COLUMBIA,317323,18586,344356,92.149694,5.397321,31713714,86.752373
FLORIDA,5297045,5668731,11067456,47.861451,51.21982,524035769,-3.358369


### Combine to one table

In [270]:
state_full_df = state_elect_df.join(state_demog_df)
state_full_df.index.name = 'state'
state_full_df.to_csv('state-combined.csv')
state_full_df

Unnamed: 0_level_0,DEMOCRAT,REPUBLICAN,TOTAL,dem_percent,rep_percent,dem_lead,dem_percent_lead,Total:,Population of one race:,White alone,...,American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Population of five races:,White; Black or African American; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander,White; Black or African American; American Indian and Alaska Native; Asian; Some Other Race,White; Black or African American; American Indian and Alaska Native; Native Hawaiian and Other Pacific Islander; Some Other Race,White; Black or African American; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,White; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Black or African American; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race,Population of six races:,White; Black or African American; American Indian and Alaska Native; Asian; Native Hawaiian and Other Pacific Islander; Some Other Race
state,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ALABAMA,849624,1441170,2323282,36.56999,62.031643,83521230,-25.461653,5024279,4767326,3220452,...,4,187,89,78,13,0,5,2,9,9
ALASKA,153778,189951,359530,42.771952,52.833143,15187849,-10.061191,733391,643867,435392,...,11,97,44,29,4,4,14,2,13,13
ARIZONA,1672143,1661686,3387326,49.364691,49.055981,165552614,0.30871,7151502,6154696,4322337,...,18,440,174,215,12,7,28,4,70,70
ARKANSAS,423932,760647,1219069,34.775062,62.39573,41632553,-27.620668,3011524,2797949,2114512,...,0,118,60,38,2,15,2,1,12,12
CALIFORNIA,11110250,6006429,17500881,63.483947,34.320724,1105018571,29.163223,39538223,33777988,16296122,...,109,4107,1345,2002,236,208,284,32,450,450
COLORADO,1804352,1364607,3279980,55.011067,41.604126,179070593,13.406942,5773714,5066044,4082927,...,10,423,195,152,30,10,31,5,48,48
CONNECTICUT,1080831,714717,1823857,59.260732,39.187118,107368383,20.073613,3605944,3273040,2395128,...,1,185,76,87,8,2,11,1,25,25
DELAWARE,296268,200603,504346,58.743006,39.774877,29426197,18.968129,989948,913430,597763,...,4,42,11,19,0,12,0,0,5,5
DISTRICT OF COLUMBIA,317323,18586,344356,92.149694,5.397321,31713714,86.752373,689545,633468,273194,...,0,51,20,18,0,4,4,5,4,4
FLORIDA,5297045,5668731,11067456,47.861451,51.21982,524035769,-3.358369,21538187,17986115,12422961,...,23,1263,353,719,90,40,25,36,132,132
