In [5]:
import IPython
import os
import csv 
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import t

from matplotlib.colors import TwoSlopeNorm

In [6]:
## States of interest

state_fips = {
    'AK': 2,
    'AZ': 4,
    'KS': 20,
    'NE': 31,
    'NH': 33,
    'PA': 42,
    'WI': 55
}

## Helper functions/objects 
current_directory = os.getcwd()

state_abbreviations = {
    "AL": "Alabama",
    "AK": "Alaska",
    "AZ": "Arizona",
    "AR": "Arkansas",
    "CA": "California",
    "CO": "Colorado",
    "CT": "Connecticut",
    "DE": "Delaware",
    "FL": "Florida",
    "GA": "Georgia",
    "HI": "Hawaii",
    "ID": "Idaho",
    "IL": "Illinois",
    "IN": "Indiana",
    "IA": "Iowa",
    "KS": "Kansas",
    "KY": "Kentucky",
    "LA": "Louisiana",
    "ME": "Maine",
    "MD": "Maryland",
    "MA": "Massachusetts",
    "MI": "Michigan",
    "MN": "Minnesota",
    "MS": "Mississippi",
    "MO": "Missouri",
    "MT": "Montana",
    "NE": "Nebraska",
    "NV": "Nevada",
    "NH": "New Hampshire",
    "NJ": "New Jersey",
    "NM": "New Mexico",
    "NY": "New York",
    "NC": "North Carolina",
    "ND": "North Dakota",
    "OH": "Ohio",
    "OK": "Oklahoma",
    "OR": "Oregon",
    "PA": "Pennsylvania",
    "RI": "Rhode Island",
    "SC": "South Carolina",
    "SD": "South Dakota",
    "TN": "Tennessee",
    "TX": "Texas",
    "UT": "Utah",
    "VT": "Vermont",
    "VA": "Virginia",
    "WA": "Washington",
    "WV": "West Virginia",
    "WI": "Wisconsin",
    "WY": "Wyoming"
}

In [7]:
## Global Parameters

DEG_F = 2
SIGMA = 3.5
SELECTED_COLUMNS = ['state_name', 'state', 'congress', 's_upper', 's_lower', 'district_name', 'race_type', 'margins', 'voter_power']
current_directory = os.getcwd()
data_frames = []

## Exceptions 
ALASKA_FOCUS = ['B','D','F','H','J','L','N','P','R','T']
## Nebraska is only odd 
## Pennsylvania is only odd 
## WI is only even 

## Data processing
for index, state_abbreviation in enumerate(state_fips):
    subdirectory = f'State Legislature Senate/{state_abbreviation}_State_Senate'
    directory_path = os.path.join(current_directory, subdirectory,'district-data.csv')
    df = gpd.read_file(directory_path)

    # Labeling
    df['state_name'] = state_abbreviations[state_abbreviation]
    df['state'] = state_fips[state_abbreviation]
    df['congress'] = np.nan
    df['s_upper'] = df['Label']
    df['s_lower'] = np.nan
    df['district_name'] = df['Label'].apply(lambda x: f'State Senate District {x}')
    # df['NAMELSAD'] = df['district_name']
    df['race_type'] = 'State Leg (Upper)'
    # df['cook_rating'] = np.nan

    # Margins and VP calculation
    df['margins'] = ((df['E_16-20_COMP_Dem'].astype(int) - df['E_16-20_COMP_Rep'].astype(int)) / df['E_16-20_COMP_Total'].astype(int))*100

    t_dist = t(DEG_F)

    df['voter_power'] = -999
    # Voter Power (w/ exceptions)
    if state_abbreviation == 'AK':
        df.loc[df['Label'].isin(ALASKA_FOCUS), 'voter_power'] = t_dist.pdf(df.loc[df['Label'].isin(ALASKA_FOCUS), 'margins']/SIGMA)
        df.loc[~df['Label'].isin(ALASKA_FOCUS), 'voter_power'] = 0

    # odd only 
    elif state_abbreviation == 'NE' or state_abbreviation == 'PA': 
        df.loc[df['ID'].astype(int) % 2 == 1, 'voter_power'] = t_dist.pdf(df.loc[df['ID'].astype(int) % 2 == 1, 'margins']/SIGMA)
        df.loc[df['ID'].astype(int) % 2 == 0, 'voter_power'] = 0

    # even only 
    elif state_abbreviation == 'WI': 
        df.loc[df['ID'].astype(int) % 2 == 0, 'voter_power'] = t_dist.pdf(df.loc[df['ID'].astype(int) % 2 == 0, 'margins']/SIGMA)
        df.loc[df['ID'].astype(int) % 2 == 1, 'voter_power'] = 0

    else: 
        df['voter_power'] = t_dist.pdf(df['margins']/SIGMA)

    df['voter_power'] = round((df['voter_power'] / df['voter_power'].max())*100)
    
    # Cleaning up df
    df = df.iloc[1:].reset_index(drop=True)
    df = df[SELECTED_COLUMNS]

    data_frames.append(df)

# Save
df_all = pd.concat(data_frames, ignore_index=True)
output_path = os.path.join(current_directory, 'csv outputs/state_senate_all.csv')
df_all.to_csv(output_path, index=False)

In [8]:
df_all

Unnamed: 0,state_name,state,congress,s_upper,s_lower,district_name,race_type,margins,voter_power
0,Alaska,2,,A,,State Senate District A,State Leg (Upper),-4.576876,0.0
1,Alaska,2,,B,,State Senate District B,State Leg (Upper),23.335151,1.0
2,Alaska,2,,C,,State Senate District C,State Leg (Upper),-8.844834,0.0
3,Alaska,2,,D,,State Senate District D,State Leg (Upper),-45.804795,0.0
4,Alaska,2,,E,,State Senate District E,State Leg (Upper),-6.495364,0.0
...,...,...,...,...,...,...,...,...,...
241,Wisconsin,55,,29,,State Senate District 29,State Leg (Upper),-15.994597,0.0
242,Wisconsin,55,,30,,State Senate District 30,State Leg (Upper),1.552123,100.0
243,Wisconsin,55,,31,,State Senate District 31,State Leg (Upper),6.239827,0.0
244,Wisconsin,55,,32,,State Senate District 32,State Leg (Upper),8.893885,13.0
