In [1]:
import IPython
import os
import csv 
import geopandas as gpd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import t

from matplotlib.colors import TwoSlopeNorm

In [65]:
## States of interest

state_fips = {
    'AK': 2,
    'AZ': 4,
    'KS': 20,
    'MI': 26,
    'MN': 27,
    'NV': 32,
    'PA': 42,
    'WI': 55
}

## Helper functions/objects 
current_directory = os.getcwd()

state_abbreviations = {
    "AL": "Alabama",
    "AK": "Alaska",
    "AZ": "Arizona",
    "AR": "Arkansas",
    "CA": "California",
    "CO": "Colorado",
    "CT": "Connecticut",
    "DE": "Delaware",
    "FL": "Florida",
    "GA": "Georgia",
    "HI": "Hawaii",
    "ID": "Idaho",
    "IL": "Illinois",
    "IN": "Indiana",
    "IA": "Iowa",
    "KS": "Kansas",
    "KY": "Kentucky",
    "LA": "Louisiana",
    "ME": "Maine",
    "MD": "Maryland",
    "MA": "Massachusetts",
    "MI": "Michigan",
    "MN": "Minnesota",
    "MS": "Mississippi",
    "MO": "Missouri",
    "MT": "Montana",
    "NE": "Nebraska",
    "NV": "Nevada",
    "NH": "New Hampshire",
    "NJ": "New Jersey",
    "NM": "New Mexico",
    "NY": "New York",
    "NC": "North Carolina",
    "ND": "North Dakota",
    "OH": "Ohio",
    "OK": "Oklahoma",
    "OR": "Oregon",
    "PA": "Pennsylvania",
    "RI": "Rhode Island",
    "SC": "South Carolina",
    "SD": "South Dakota",
    "TN": "Tennessee",
    "TX": "Texas",
    "UT": "Utah",
    "VT": "Vermont",
    "VA": "Virginia",
    "WA": "Washington",
    "WV": "West Virginia",
    "WI": "Wisconsin",
    "WY": "Wyoming"
}

In [149]:
## Global Parameters

DEG_F = 2
SIGMA = 3.5
SELECTED_COLUMNS = ['state_name', 'state', 'congress', 's_upper', 's_lower', 'district_name', 'race_type', 'margins', 'voter_power']
current_directory = os.getcwd()
data_frames = []

## Data processing
for index, state_abbreviation in enumerate(state_fips):
    subdirectory = f'State Legislature House/{state_abbreviation}_State_House'
    directory_path = os.path.join(current_directory, subdirectory,'district-data.csv')
    df = gpd.read_file(directory_path)

    # Labeling
    df['state_name'] = state_abbreviations[state_abbreviation]
    df['state'] = state_fips[state_abbreviation]
    df['congress'] = np.nan
    df['s_upper'] = np.nan
    df['s_lower'] = df['ID']
    df['district_name'] = df['ID'].apply(lambda x: f'State House District {x}')
    # df['NAMELSAD'] = df['district_name']
    df['race_type'] = 'State Leg (Lower)'
    # df['cook_rating'] = np.nan

    # Margins and VP calculation
    df['margins'] = ((df['E_16-20_COMP_Dem'].astype(int) - df['E_16-20_COMP_Rep'].astype(int)) / df['E_16-20_COMP_Total'].astype(int))*100

    t_dist = t(DEG_F)
    df['voter_power'] = t_dist.pdf(df['margins']/SIGMA)
    df['voter_power'] = round((df['voter_power'] / df['voter_power'].max())*100)

    # Cleaning up df
    df = df.iloc[1:].reset_index(drop=True)
    df = df[SELECTED_COLUMNS]

    data_frames.append(df)

# Save
df_all = pd.concat(data_frames, ignore_index=True)
output_path = os.path.join(current_directory, 'csv outputs/state_house_all.csv')
df_all.to_csv(output_path, index=False)