In [1]:
import pandas as pd
import os
import geopandas as gpd

Now that I've assigned a PVI-score to all the State Legislative Districts' Upper chambers, it's time to set about mapping to create a chloropleth map.

First I downloaded all the shapefiles for State Legislative Districts from the Census Bureau [here](https://www2.census.gov/geo/tiger/TIGER2023/SLDU/)

The shapefiles had to be downloaded individually state-by-state and unzipped. The naming conventions followed the Federal Information Processing System (FIPS) Codes for States and Counties, so "tl_2023_01_sldu" corresponds to Alabama (01). In each folder were 7 files, but I want to start off by exploring the shapefile, which has the extension .shp to see how to relate my PVI scores Dataframe to the shapefile to merge those scores on.

In [2]:
# Path to the shapefile
shapefile_path = 'data/sldu_shapefiles/original/tl_2023_01_sldu/tl_2023_01_sldu.shp'

# Load the shapefile
gdf = gpd.read_file(shapefile_path)

# Display the first few rows of the GeoDataFrame
print(gdf.head())


  STATEFP SLDUST  GEOID         GEOIDFQ                  NAMELSAD LSAD   LSY  \
0      01    004  01004  610U800US01004   State Senate District 4   LU  2022   
1      01    026  01026  610U800US01026  State Senate District 26   LU  2022   
2      01    005  01005  610U800US01005   State Senate District 5   LU  2022   
3      01    027  01027  610U800US01027  State Senate District 27   LU  2022   
4      01    028  01028  610U800US01028  State Senate District 28   LU  2022   

   MTFCC FUNCSTAT       ALAND     AWATER     INTPTLAT      INTPTLON  \
0  G5210        N  5413280608  104587341  +34.1280167  -087.3603043   
1  G5210        N   733846256   24874680  +32.3151157  -086.3030980   
2  G5210        N  7446353264  104880595  +33.6831584  -087.5989849   
3  G5210        N  2832119021   97219299  +32.7206526  -085.6135364   
4  G5210        N  8420147207   98988580  +31.9939325  -085.4502090   

                                            geometry  
0  POLYGON ((-88.20722 34.05840, -88.

Great, so first I can immediately see that GEOIDFQ may correspond directly to my previous Dataframe's 'GEOID' column. Let's load the PVI dataframe

In [3]:
pvi_df = pd.read_csv('data/sldu_pvi_adjusted.csv')
pvi_df.head(50)

Unnamed: 0.1,Unnamed: 0,GEOID,PVI,Formatted_District,state_fips,state,Senate Pred Dem Adjusted,Senate Pred GOP Adjusted
0,0,610U800US0200A,-9.685082,"State Senate District A (2022), Alaska",2,AK,0,1
1,1,610U800US0200B,6.186881,"State Senate District B (2022), Alaska",2,AK,1,0
2,2,610U800US0200C,-3.191946,"State Senate District C (2022), Alaska",2,AK,0,1
3,3,610U800US0200D,-22.635338,"State Senate District D (2022), Alaska",2,AK,0,1
4,4,610U800US0200E,4.823058,"State Senate District E (2022), Alaska",2,AK,1,0
5,5,610U800US0200F,1.52409,"State Senate District F (2022), Alaska",2,AK,1,0
6,6,610U800US0200G,0.068112,"State Senate District G (2022), Alaska",2,AK,1,0
7,7,610U800US0200H,-2.752143,"State Senate District H (2022), Alaska",2,AK,0,1
8,8,610U800US0200I,14.371795,"State Senate District I (2022), Alaska",2,AK,1,0
9,9,610U800US0200J,20.299703,"State Senate District J (2022), Alaska",2,AK,1,0


In [4]:
pvi_df[pvi_df['Formatted_District'] == 'AL-Sen-04']


Unnamed: 0.1,Unnamed: 0,GEOID,PVI,Formatted_District,state_fips,state,Senate Pred Dem Adjusted,Senate Pred GOP Adjusted
23,23,610U800US01004,-31.60966,AL-Sen-04,1,AL,0,1


Ok, I confirmed the GEOID in my PVI Datframe, pvi_df, is an exact match to the GEOIDFQ in the GeoDataFrame, gdf. Time to attach the PVI scores

In [5]:
# filter and rename columns for merging, I don't need all the demographic info anymore
pvi_filtered = pvi_df[['GEOID', 'Formatted_District', 'PVI']]
pvi_filtered = pvi_filtered.rename(columns={'PVI': 'pred_PVI',
                                           'Formatted_District': 'district'}) #ESRI column names can only have 10 characters

# Merge the filtered DataFrame info to the gdf
merged_gdf = gdf.merge(pvi_filtered, left_on='GEOIDFQ', right_on='GEOID', how='left')


print(merged_gdf.head())


  STATEFP SLDUST GEOID_x         GEOIDFQ                  NAMELSAD LSAD   LSY  \
0      01    004   01004  610U800US01004   State Senate District 4   LU  2022   
1      01    026   01026  610U800US01026  State Senate District 26   LU  2022   
2      01    005   01005  610U800US01005   State Senate District 5   LU  2022   
3      01    027   01027  610U800US01027  State Senate District 27   LU  2022   
4      01    028   01028  610U800US01028  State Senate District 28   LU  2022   

   MTFCC FUNCSTAT       ALAND     AWATER     INTPTLAT      INTPTLON  \
0  G5210        N  5413280608  104587341  +34.1280167  -087.3603043   
1  G5210        N   733846256   24874680  +32.3151157  -086.3030980   
2  G5210        N  7446353264  104880595  +33.6831584  -087.5989849   
3  G5210        N  2832119021   97219299  +32.7206526  -085.6135364   
4  G5210        N  8420147207   98988580  +31.9939325  -085.4502090   

                                            geometry         GEOID_y  \
0  POLYGON ((-

In [6]:
# Define the path for the new shapefile
output_path = 'data/sldu_shapefiles/processed/AL_SLDU/AL_SLDU_PVI.shp'

# Save the merged GeoDataFrame as a new shapefile
merged_gdf.to_file(output_path, driver='ESRI Shapefile')


### Add my NC-specific things

In [7]:
nc_pvi = pd.read_csv('data/nc_pvi.csv')

Ok, so now we can create a system based on our above steps to iterate over all the other state codes. First, let's map the FIPS codes to the states

In [8]:
fips_to_state = {
    '01': 'AL', '02': 'AK', '04': 'AZ', '05': 'AR', '06': 'CA',
    '08': 'CO', '09': 'CT', '10': 'DE', '11': 'DC', '12': 'FL',
    '13': 'GA', '15': 'HI', '16': 'ID', '17': 'IL', '18': 'IN',
    '19': 'IA', '20': 'KS', '21': 'KY', '22': 'LA', '23': 'ME',
    '24': 'MD', '25': 'MA', '26': 'MI', '27': 'MN', '28': 'MS',
    '29': 'MO', '30': 'MT', '31': 'NE', '32': 'NV', '33': 'NH',
    '34': 'NJ', '35': 'NM', '36': 'NY', '37': 'NC', '38': 'ND',
    '39': 'OH', '40': 'OK', '41': 'OR', '42': 'PA', '44': 'RI',
    '45': 'SC', '46': 'SD', '47': 'TN', '48': 'TX', '49': 'UT',
    '50': 'VT', '51': 'VA', '53': 'WA', '54': 'WV', '55': 'WI',
    '56': 'WY', '72': 'PR'  # Including Puerto Rico
}


This following code automates the process of merging the predicted PVI sccores with the shapefiles for each state's legislative upper chambers (SLDU). Steps:
1. Iterate over each state using a dictionary map above of FIPS codes to state abbreviations.
2. Use the FIPS codes to identify file paths for input shapefiles and output directories based on these codes.
3. Check if the output folder exists and creating it if not
4. Load the shapefiles and PVI data, filtering and renaming columns to be less than 10-characters according to ESRI column naming requirements
5. Merge the PVI and District onto the geographic data of the shapefiles
6. Save the merged data as new shapefiles in state-specific folders.


In [9]:
# Directory where all the shapefiles are stored
base_input_path = 'data/sldu_shapefiles/original/'
base_output_path = 'data/sldu_shapefiles/processed/'

# Iterate over each state in the FIPS code dictionary
for fips, state_abbr in fips_to_state.items():
    input_path = f"{base_input_path}tl_2023_{fips}_sldu/tl_2023_{fips}_sldu.shp"
    output_dir = f"{base_output_path}{state_abbr}_SLDU"
    output_path = f"{output_dir}/{state_abbr}_SLDU_PVI.shp"

    # Check if the output directory exists, create if not
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Load the shapefile
    gdf = gpd.read_file(input_path)
    
    # Prepare PVI data (filtering and renaming)
    pvi_filtered = pvi_df[pvi_df['GEOID'].apply(lambda x: x.split('US')[-1][:2] == fips)] #the FIPS code is after the 'US' in the GEOID
    pvi_filtered = pvi_filtered[['GEOID', 'Formatted_District', 'PVI']].rename(columns={
        'PVI': 'pred_PVI',  # Ensure this is within 10 characters
        'Formatted_District': 'district'  # Ensure this is within 10 characters
    })
    
    # Merge the GeoDataFrame with the PVI DataFrame
    merged_gdf = gdf.merge(pvi_filtered, left_on='GEOIDFQ', right_on='GEOID', how='left')
    
    # Save the merged GeoDataFrame as a new shapefile
    merged_gdf.to_file(output_path, driver='ESRI Shapefile')
    
    print(f"Processed and saved shapefile for {state_abbr}")


Processed and saved shapefile for AL
Processed and saved shapefile for AK
Processed and saved shapefile for AZ
Processed and saved shapefile for AR
Processed and saved shapefile for CA
Processed and saved shapefile for CO
Processed and saved shapefile for CT
Processed and saved shapefile for DE
Processed and saved shapefile for DC
Processed and saved shapefile for FL
Processed and saved shapefile for GA
Processed and saved shapefile for HI
Processed and saved shapefile for ID
Processed and saved shapefile for IL
Processed and saved shapefile for IN
Processed and saved shapefile for IA
Processed and saved shapefile for KS
Processed and saved shapefile for KY
Processed and saved shapefile for LA
Processed and saved shapefile for ME
Processed and saved shapefile for MD
Processed and saved shapefile for MA
Processed and saved shapefile for MI
Processed and saved shapefile for MN
Processed and saved shapefile for MS
Processed and saved shapefile for MO
Processed and saved shapefile for MT
P

## Add my NC-specific things to that shapefile

In [10]:
nc_pvi = pd.read_csv('data/nc_pvi.csv')

In [11]:
nc_pvi

Unnamed: 0.1,Unnamed: 0,GEOID,district,pred_PVI,District_Number,pred_PVI_shifted,predicted_party,Party,District,Member,Counties Represented,competitiveness
0,0,610U800US37001,NC-Sen-01,-19.280423,1,-13.030423,Republican,R,1,Norman W. Sanderson,"Carteret, Chowan, ...",Solid R
1,1,610U800US37002,NC-Sen-02,-10.85444,2,-4.60444,Republican,R,2,Jim Perry,"Beaufort, Craven, ...",Tilt R
2,2,610U800US37003,NC-Sen-03,-4.840893,3,1.409107,Democratic,R,3,Bobby Hanig,"Bertie, Camden, ...",Toss Up
3,3,610U800US37004,NC-Sen-04,-6.718213,4,-0.468213,Republican,R,4,Buck Newton,"Greene, Wayne, ...",Toss Up
4,4,610U800US37005,NC-Sen-05,6.734699,5,12.984699,Democratic,D,5,Kandie D. Smith,"Edgecombe, Pitt",Solid D
5,5,610U800US37006,NC-Sen-06,-19.080914,6,-12.830914,Republican,R,6,Michael A. Lazzara,Onslow,Solid R
6,6,610U800US37007,NC-Sen-07,-2.557613,7,3.692387,Democratic,R,7,Michael V. Lee,New Hanover,Tilt D
7,7,610U800US37008,NC-Sen-08,-15.61487,8,-9.36487,Republican,R,8,Bill Rabon,"Brunswick, Columbus, ...",Likely R
8,8,610U800US37009,NC-Sen-09,-15.626561,9,-9.376561,Republican,R,9,Brent Jackson,"Bladen, Duplin, ...",Likely R
9,9,610U800US37010,NC-Sen-10,-19.624622,10,-13.374622,Republican,R,10,Benton G. Sawrey,Johnston,Solid R


In [12]:
# Path to the shapefile
shapefile_path_nc = 'data/sldu_shapefiles/processed/NC_SLDU/NC_SLDU_PVI.shp'

# Load the shapefile
gdf_nc = gpd.read_file(shapefile_path_nc)

# Display the first few rows of the GeoDataFrame
print(gdf_nc.head())



  STATEFP SLDUST GEOID_x         GEOIDFQ                  NAMELSAD LSAD   LSY  \
0      37    001   37001  610U800US37001   State Senate District 1   LU  2022   
1      37    045   37045  610U800US37045  State Senate District 45   LU  2022   
2      37    002   37002  610U800US37002   State Senate District 2   LU  2022   
3      37    046   37046  610U800US37046  State Senate District 46   LU  2022   
4      37    003   37003  610U800US37003   State Senate District 3   LU  2022   

   MTFCC FUNCSTAT        ALAND      AWATER     INTPTLAT      INTPTLON  \
0  G5210        N   7337196471  8634304882  +35.4708260  -076.2280820   
1  G5210        N   1458893938    44980017  +35.7207490  -081.2800714   
2  G5210        N   5020420966   515666201  +35.3239689  -077.0768755   
3  G5210        N   3275977742    36637476  +35.7001279  -082.0008389   
4  G5210        N  11479387894  1634070383  +36.2159332  -076.9498283   

          GEOID_y   district   pred_PVI  \
0  610U800US37001  NC-Sen-01 -1

In [13]:
filtered_nc_pvi = nc_pvi[["GEOID","pred_PVI_shifted","district","Party", "predicted_party","Member","Counties Represented"]]


# Merge the GeoDataFrame with the NC DataFrame
merged_gdf_nc = gdf_nc.merge(filtered_nc_pvi, left_on='GEOIDFQ', right_on='GEOID', how='left')

In [14]:
# Save the merged GeoDataFrame as a new shapefile
merged_gdf_nc.to_file('data/sldu_shapefiles/processed/NC_SLDU/NC_SLDU_PVI_shifted.shp', driver='ESRI Shapefile')

  merged_gdf_nc.to_file('data/sldu_shapefiles/processed/NC_SLDU/NC_SLDU_PVI_shifted.shp', driver='ESRI Shapefile')


In [15]:
merged_gdf_nc

Unnamed: 0,STATEFP,SLDUST,GEOID_x,GEOIDFQ,NAMELSAD,LSAD,LSY,MTFCC,FUNCSTAT,ALAND,...,district_x,pred_PVI,geometry,GEOID,pred_PVI_shifted,district_y,Party,predicted_party,Member,Counties Represented
0,37,1,37001,610U800US37001,State Senate District 1,LU,2022,G5210,N,7337196471,...,NC-Sen-01,-13.030423,"POLYGON ((-77.16777 34.79100, -77.16772 34.791...",610U800US37001,-13.030423,NC-Sen-01,R,Republican,Norman W. Sanderson,"Carteret, Chowan, ..."
1,37,45,37045,610U800US37045,State Senate District 45,LU,2022,G5210,N,1458893938,...,NC-Sen-45,-19.548338,"POLYGON ((-81.59180 35.81549, -81.59177 35.815...",610U800US37045,-19.548338,NC-Sen-45,R,Republican,Dean Proctor,"Caldwell, Catawba"
2,37,2,37002,610U800US37002,State Senate District 2,LU,2022,G5210,N,5020420966,...,NC-Sen-02,-4.60444,"POLYGON ((-77.83420 35.17889, -77.83415 35.179...",610U800US37002,-4.60444,NC-Sen-02,R,Republican,Jim Perry,"Beaufort, Craven, ..."
3,37,46,37046,610U800US37046,State Senate District 46,LU,2022,G5210,N,3275977742,...,NC-Sen-46,-14.051126,"POLYGON ((-82.60950 35.75606, -82.60941 35.756...",610U800US37046,-14.051126,NC-Sen-46,R,Republican,Warren Daniel,"Buncombe, Burke, ..."
4,37,3,37003,610U800US37003,State Senate District 3,LU,2022,G5210,N,11479387894,...,NC-Sen-03,1.409107,"POLYGON ((-78.32399 36.54382, -78.31796 36.543...",610U800US37003,1.409107,NC-Sen-03,R,Democratic,Bobby Hanig,"Bertie, Camden, ..."
5,37,47,37047,610U800US37047,State Senate District 47,LU,2022,G5210,N,6959482859,...,NC-Sen-47,-14.044515,"POLYGON ((-83.09943 35.77510, -83.09904 35.775...",610U800US37047,-14.044515,NC-Sen-47,R,Republican,Ralph Hise,"Alleghany, Ashe, ..."
6,37,4,37004,610U800US37004,State Senate District 4,LU,2022,G5210,N,3077503001,...,NC-Sen-04,-0.468213,"POLYGON ((-78.30642 35.28784, -78.30624 35.288...",610U800US37004,-0.468213,NC-Sen-04,R,Republican,Buck Newton,"Greene, Wayne, ..."
7,37,48,37048,610U800US37048,State Senate District 48,LU,2022,G5210,N,3046010622,...,NC-Sen-48,-16.101541,"POLYGON ((-82.74514 35.42297, -82.74480 35.423...",610U800US37048,-16.101541,NC-Sen-48,R,Republican,Timothy D. Moffitt,"Henderson, Polk, ..."
8,37,5,37005,610U800US37005,State Senate District 5,LU,2022,G5210,N,2998694072,...,NC-Sen-05,12.984699,"POLYGON ((-77.82844 35.86721, -77.82834 35.867...",610U800US37005,12.984699,NC-Sen-05,D,Democratic,Kandie D. Smith,"Edgecombe, Pitt"
9,37,49,37049,610U800US37049,State Senate District 49,LU,2022,G5210,N,874947340,...,NC-Sen-49,2.165452,"POLYGON ((-82.88811 35.67301, -82.88796 35.673...",610U800US37049,2.165452,NC-Sen-49,D,Democratic,Julie Mayfield,Buncombe


In [16]:
# Path to the shapefile
shapefile_path = 'data/sldu_shapefiles/processed/TX_SLDU/TX_SLDU_PVI.shp'

# Load the shapefile
gdf = gpd.read_file(shapefile_path)

# Display the first few rows of the GeoDataFrame
print(gdf.head())

  STATEFP SLDUST GEOID_x         GEOIDFQ                  NAMELSAD LSAD   LSY  \
0      48    013   48013  610U800US48013  State Senate District 13   LU  2022   
1      48    014   48014  610U800US48014  State Senate District 14   LU  2022   
2      48    015   48015  610U800US48015  State Senate District 15   LU  2022   
3      48    016   48016  610U800US48016  State Senate District 16   LU  2022   
4      48    017   48017  610U800US48017  State Senate District 17   LU  2022   

   MTFCC FUNCSTAT        ALAND      AWATER     INTPTLAT      INTPTLON  \
0  G5210        N    617613393     8635514  +29.6505395  -095.4493287   
1  G5210        N   1333734713    13709274  +30.3276222  -097.6766210   
2  G5210        N    652950020    20167237  +29.9314444  -095.4725906   
3  G5210        N    666191013    14954059  +32.8011437  -096.7930861   
4  G5210        N  14869099752  2140958283  +29.1816806  -096.0594832   

          GEOID_y   district   pred_PVI  \
0  610U800US48013  TX-Sen-13  2

In [17]:
max_abs_pvi = max(abs(overall_min_pvi), abs(overall_max_pvi))
color_scale_min = -max_abs_pvi
color_scale_max = max_abs_pvi


NameError: name 'overall_min_pvi' is not defined

In [None]:
max_abs_pvi

In [None]:
# Path to the shapefile
shapefile_path = 'data/tl_2023_37_tract/tl_2023_37_tract.shp'

# Load the shapefile
gdf_tracts = gpd.read_file(shapefile_path)

# Display the first few rows of the GeoDataFrame
print(gdf_tracts.head())


## Randolph County is 37-151 

In [None]:
randolph_gdf = gdf_tracts[gdf_tracts['COUNTYFP']== '151']

In [None]:
randolph_gdf.head(34)