In [1]:
import pandas as pd
import requests
import io
from scipy.special import logit

In [2]:
df = pd.read_csv('./2022-race-demographics.csv')
data = df[['States', 'Race', 'Population']]
data.columns = ['state', 'race', 'population']

all_totals = data.groupby(['state'])[['population']]\
                    .sum()\
                    .rename(columns = {'population' : 'total'})
data = data[data['race'] == 'Asian']\
            .set_index('state')\
            .drop(columns = 'race')
data.loc[:, 'asian'] = data['population']/all_totals['total']
data = data.drop(columns = 'population')

state_df = pd.read_csv('../source_data/state_info.csv')\
             .set_index('state')

asian_df = pd.concat([data, state_df], ignore_index = False, axis = 1)\
             .reset_index()\
             .drop(columns = 'state')\
             .rename(columns = {'state_po': 'state'})\
             [['state', 'asian']].set_index('state')

In [3]:
df = pd.read_csv('../processed_data/2006-2022-education.csv')\
                [['year', 'state', 'bachelor_higher_pct']]

bachelor_df = df[df['year'] == 2022]\
                .drop(columns = 'year')\
                .set_index('state')

In [4]:
df = state_df[['state_po']]\
             .rename(columns = {'state_po':'state'})\
             .set_index('state')

for state, _ in df.iterrows():
    r = requests.get(f'https://fred.stlouisfed.org/graph/fredgraph.csv?id={state}URN')
    temp_df = pd.read_csv(io.BytesIO(r.content))
    temp_df = temp_df[temp_df['DATE'].str.contains('2024')]
    df.loc[state, 'unemployment_pct'] = temp_df[f'{state}URN'].mean()

df.loc[:,'unemployment_pct'] = df['unemployment_pct']/100

prev_df = pd.read_csv('../processed_data/1976-2023-unemployment.csv')\
            [['year', 'state', 'unemployment_pct']]\
            .rename(columns = {'unemployment_pct': 'prev_unemployment_pct'})

prev_df = prev_df[prev_df['year'] == 2023]\
                 .set_index('state')\
                 .drop(columns = 'year')

df = pd.concat([df, prev_df], ignore_index = False, axis = 1)
df.loc[:, 'unemployment_pct_1'] = df['unemployment_pct'] - df['prev_unemployment_pct']
unemployment_df = df.drop(columns = 'prev_unemployment_pct')

In [5]:
r = requests.get(f'https://ava.prri.org/ajx_map.statesdata?category=religion1&sc=1&year=2023&topic=religious')
df = pd.DataFrame(r.json()['states'][1:]).drop(columns = 'sort')
df.loc[:,'state'] = df['state'].str[3:]
df.loc[:,'white_evangelist_pct'] = df['percent'].astype('float')/100
evangelical_df = df.drop(columns = 'percent')\
                   .set_index('state')
evangelical_df.loc['DC', 'white_evangelist_pct'] = 0.01

In [6]:
df = pd.read_csv('../processed_data/1976-2020-president-two-party.csv')\
       [['year', 'state', 'rep_pct']]\
       .set_index('state')

pres_df = df[df['year'] == 2020]\
            .drop(columns = 'year')\
            .rename(columns = {'rep_pct': 'result_pres_4'})

pres_df.loc[:, 'result_pres_8'] = df[df['year'] == 2016]['rep_pct']
pres_df.loc[:, 'result_pres_12'] = df[df['year'] == 2012]['rep_pct']

In [7]:
df = pd.read_csv('../processed_data/1976-2022-house-two-party-vote-by-state.csv')\
       [['year', 'state', 'rep_pct']]\
       .set_index('state')

house_df = df[df['year'] == 2022]\
             .drop(columns = 'year')\
             .rename(columns = {'rep_pct': 'result_house_2'})

house_df.loc[:, 'result_house_8'] = df[df['year'] == 2016]['rep_pct']
house_df.loc['DC', ['result_house_2', 'result_house_8']] = 0.5

In [8]:
all_df = pd.concat([house_df, 
                    pres_df, 
                    evangelical_df, 
                    unemployment_df, 
                    asian_df,
                    bachelor_df],
                    
                    ignore_index = False,
                    axis = 1)

# Data found here: https://docs.google.com/spreadsheets/d/1zLNAuRqPauss00HDz4XbTH2HqsCzMe0pR8QmD1K8jk8/edit?gid=0#gid=0
# and here: https://www.dailykos.com/story/2012/11/19/1163009/-Daily-Kos-Elections-presidential-results-by-congressional-district-for-the-2012-2008-elections
all_df.loc['ME-01', all_df.columns[all_df.columns.str.contains('result')]] = [0.37036410938180486,
                                                                          0.4196957525215817,
                                                                          (37.2)/(60 + 37.2),
                                                                          (39.3)/(39.3 + 54.1),
                                                                          (38.2)/(38.2 + 59.6)]

all_df.loc['ME-02', all_df.columns[all_df.columns.str.contains('result')]] = [0.37036410938180486,
                                                                          0.4958910178697673,
                                                                          (51.6)/(51.6 + 45.5),
                                                                          (51.4)/(51.4 + 41.1),
                                                                          (44.4)/(53 + 44.4)]

all_df.loc['NE-01', all_df.columns[all_df.columns.str.contains('result')]] = [0.5791051464163287,
                                                                          0.6945263835923261,
                                                                          (54.3)/(54.3 + 43.3),
                                                                          (57.5)/(57.5 + 36.2),
                                                                          (57.5)/(57.5 + 40.8)]

all_df.loc['NE-02', all_df.columns[all_df.columns.str.contains('result')]] = [0.5133412311477651,
                                                                          0.5062152812665968,
                                                                          (45.8)/(45.8 + 52.2),
                                                                          (48.2)/(48.2 + 46),
                                                                          (52.8)/(52.8 + 45.7)]

all_df.loc['NE-03', all_df.columns[all_df.columns.str.contains('result')]] = [0.8321447845193123,
                                                                          0.9999,
                                                                          (74.9)/(74.9 + 23.1),
                                                                          (74.9)/(74.9 + 20),
                                                                          (70.2)/(70.2 + 27.8)]

all_df.loc[:, all_df.columns.drop('unemployment_pct_1')] = \
    logit(all_df[all_df.columns.drop('unemployment_pct_1')])

all_df = all_df.sort_index()
all_df.to_csv('./2024-data.csv')
