In [None]:
# Midterm Q: How does a segregation score help determine if a congressional district is racially gerrymandered? How can we visualize this? 
# Data from Sal ty Sal.

# We would like the user to be able to select from VTDs and create their own legeslative district in the state of Virginia. They would reicieve stats on the population, compactness, and S+/- score of their disctrict.
import pandas as pd

This link takes you to a preliminary interface where users can select their districts

https://wm-gis.maps.arcgis.com/home/item.html?id=1938f79f1a9b4bac9c50fff09d17b662


In [None]:
# The following is an example of calculating S+/- scores
 
# Get the column names bc I did not copy them over from original data to state data csv
# It does not include block_geoid_end, but thats not a necessary column. 
header = pd.read_csv('/Users/rebeccawagner/Documents/GitHub/GerryGainMCMC/local_environments_US.csv', index_col=0, nrows=0).columns.tolist()

# Get the data for local environments of VA census blocks 
# For some states with FIPS that have leading 0s, we need the dtype parameter. Not here, but for the future I left it.
# Reset the index to overwrite block_geoid_end with regular indecies
VA_data = pd.read_csv('/Users/rebeccawagner/Documents/GitHub/GerryGainMCMC/state_environs/VA_LE', names = header, dtype={'block_geoid_start':str}).reset_index(drop=True)

In [None]:
VA_data

In [None]:
# We only need these columns to create a S+/- score:
    # block_geoid_start = census block
    # rn_total_sl_dist = population of block's local environment
    # rn_nh_black_sl_dist = black population of block's local environment
    # total = population of block
    # nh_black = black population of block 

VA_data.info()

In [None]:
# Subset the columns we need
VA_data_score = VA_data[['block_geoid_start', 'rn_total_sl_dist', 'rn_nh_black_sl_dist', 'total', 'nh_black']]
VA_data_score

In [None]:
# Create the percentages we need

# Calculate the percentage of racial group m (black) in the local environment of the census block
# running total black / running total 
VA_data_score['pct_m_le'] = VA_data_score['rn_nh_black_sl_dist']/VA_data_score['rn_total_sl_dist']*100

# Weight this percent by the number of people in the census block
# weighted_m_le * total
VA_data_score['weighted_pct_m_le'] = VA_data_score['pct_m_le'] * VA_data_score['total']

# Calculate the percent of racial group m (black) in each block
VA_data_score['pct_black_block'] = VA_data_score['nh_black']/VA_data_score['total']*100

VA_data_score

In [None]:
# Now we have all the data points we need to create S+/- scores for any combination of census blocks. In our case, we would like to use VTDs to create S+/- scores, so we will need to know which blocks are in each VTD and aggregate that data. 

# Let's run a toy example with the first 20 census blocks in VA. 
toy_example = VA_data_score.head(20)

# Pretend the first 10 blocks are in VTD 1 and the next 10 blocks are in VTD 2. Add this column for reference.
example_VTDs = [1] * 10 + [2] * 10
toy_example.insert(1,'VTD',example_VTDs)

toy_example

In [None]:
# Total population in VTD 1
VTD_1_total = toy_example.loc[toy_example['VTD']==1,'total'].sum()
# Total black population in VTD 1
VTD_1_black_total = toy_example.loc[toy_example['VTD']==1,'nh_black'].sum()
# Total weighted black population in LE of VTD 1 
VTD_1_weighted_pct_m_le = toy_example.loc[toy_example['VTD']==1, 'weighted_pct_m_le'].sum()
# Percent of group m (black) in VTD 1
VTD_1_pct_black_block = (VTD_1_black_total/VTD_1_total) * 100

# Total population in VTD 2
VTD_2_total = toy_example.loc[toy_example['VTD']==2,'total'].sum()
# Total black population in VTD 2
VTD_2_black_total = toy_example.loc[toy_example['VTD']==2,'nh_black'].sum()
# Total weighted black population in LE of VTD 2
VTD_2_weighted_pct_m_le = toy_example.loc[toy_example['VTD']==2, 'weighted_pct_m_le'].sum()
# Percent of group m (black) in VTD 2
VTD_2_pct_black_block = (VTD_2_black_total/VTD_2_total) * 100

In [None]:
# Calculate S+/- for just VTD 1
# Divide sum of weighted percent population m in LE by total population in district 
# and subtract from percentage of people in districct who are members of group m
VTD_1_score = VTD_1_pct_black_block - (VTD_1_weighted_pct_m_le/VTD_1_total)

# Calculate S+/- for just VTD 2
VTD_2_score = VTD_2_pct_black_block - (VTD_2_weighted_pct_m_le/VTD_2_total)

(VTD_1_score, VTD_2_score)

In [None]:
# Now say we select both VTDs. What is the S+/- score then?

# Total population in district
district_total = VTD_1_total + VTD_2_total
# Total black population in district 
district_black_total = VTD_1_black_total + VTD_2_black_total
# Total weighted black population in LE of district
district_weighted_pct_m_le = VTD_1_weighted_pct_m_le + VTD_2_weighted_pct_m_le
# Percent of group m (black) in district
district_pct_black_block = (district_black_total/district_total) * 100

# Cacoluate S+/-
district_score = district_pct_black_block - (district_weighted_pct_m_le/district_total)

district_score

In [None]:
# So to calculate S+/- scores in time with a user selecting VTDs, we will need to know these three stats for each VTD:
    # the total population of the VTD
    # the total minority population of the VTD
    # The total (summed) weighted percent of minority population in local evironments 
# From there we sum these stats for each VTD and calculate the minority percent in the district
# Subtract the district weighted percent / district total from the minority district population