In [3]:
import re
import pandas as pd
import numpy as np
import altair as alt
from vega_datasets import data
from pathlib import Path
from datetime import date
from ETLForElectionAndVaccinationData import *

# uses intermediate json files to speed things up
alt.data_transformers.enable('json')

## Color global variables
TO_OTHER =   "#556B2F" 
TO_DEMOCRAT =  "#11A3D6"
TO_REPUBLICAN = "#8C1616"
STAYED_DEMOCRAT  =  '#0015BC'
STAYED_REPUBLICAN = '#FF0000'
STAYED_OTHER = '#B4D3B2'

segment_color_dict = {
    'TO_OTHER' : TO_OTHER ,
    'TO_DEMOCRAT'  : TO_DEMOCRAT,
    'TO_REPUBLICAN' : TO_REPUBLICAN,
    'STAYED_DEMOCRAT'  :  STAYED_DEMOCRAT,
    'STAYED_REPUBLICAN' : STAYED_REPUBLICAN,
    'STAYED_OTHER' : STAYED_OTHER
}

color_segment_dict = {TO_OTHER:"To other",
                      TO_DEMOCRAT:"To Democrat", 
                      TO_REPUBLICAN:"To Republican", 
                      STAYED_DEMOCRAT:"Stayed Democrat",
                      STAYED_REPUBLICAN:"Stayed Republican",
                      STAYED_OTHER:"Stayed Other"}

DataFolder = Path("../DataForPresidentialElectionsAndCovid/")


In [6]:
def getUnemploymentRate():
    """
        THIS FUNCTION reads the county level unemployment rate from the 2020 dataset published by the BLS
        and 
        
        Functions called: None
        
        Input: None
        Returns: Dataframe election_winners_df with the following set of columns.
                 Note: Granularity = COUNTYFP.
        
            state                  (full name)
            state_po               (2-letter abbreviation)
            CTYNAME                (full name)
            COUNTYFP               (FIPS number) Questions
            party_winner_2020
            totalvotes_2020
            fractionalvotes_2020
            party_winner_2016
            totalvotes_2016
            fractionalvotes_2016
                
    """
    unemployment_df = pd.read_excel(DataFolder / r"laucntycur14.xlsx",
                                    names=["LAUS_code","state_FIPS","county_FIPS","county_name_and_state_abbreviation","Period","labor_force","employed","unemployed","unemployment_rate"],
                                    header=5,
                                    skipfooter=3)
    
    # Format the county FIPS as the state FIPS followed by the county FIPS
    concatenate_fips = lambda x : int(str(x["state_FIPS"]) + "{:03d}".format(x["county_FIPS"]))
    unemployment_df["COUNTYFP"] = unemployment_df.apply(concatenate_fips, axis=1)
    # Keep only US mainland states
    unemployment_df = unemployment_df[unemployment_df["COUNTYFP"] < 57000]
    # extract state and county names
    extract_names_regex = r"^(?P<CTYNAME>.*),\s(?P<state>[A-Z]{2})$"
    extract_county_names = lambda x : re.search(extract_names_regex,x).group("CTYNAME") if x != "District of Columbia" else "District of Columbia"
    extract_state_names = lambda x : re.search(extract_names_regex,x).group("state") if x != "District of Columbia" else "District of Columbia"
    unemployment_df["CTYNAME"] = unemployment_df["county_name_and_state_abbreviation"].apply(extract_county_names)
    unemployment_df["state"] = unemployment_df["county_name_and_state_abbreviation"].apply(extract_state_names)
    # Reformat present month which ends with " p"
    reformat_present_month = lambda x: x[:-2] if x[-2:] ==" p" else x 
    unemployment_df["Period"] = unemployment_df["Period"].apply(reformat_present_month)
    # Convert period to datetime
    unemployment_df["month"] = pd.to_datetime(unemployment_df["Period"], format="%b-%y")
    unemployment_df.drop(columns=["state_FIPS", "county_FIPS", "LAUS_code","county_name_and_state_abbreviation","Period","labor_force","employed","unemployed"], inplace=True)
    
    covid_df = getCasesRollingAveragePer100K()
    # Remove non mainland US states
    covid_df = covid_df[covid_df["COUNTYFP"] < 57000]
    # Change period to month and average cases per 100K per month and county
    covid_df["year_month"] = covid_df["date"].dt.to_period('M')
    covid_df.drop(columns=["date"], inplace=True)
    covid_df = covid_df.groupby(["year_month", "COUNTYFP"]).sum()
    covid_df.reset_index(inplace=True)
    # Get back the month period as a timestamp
    covid_df["month"] = covid_df["year_month"].apply(lambda x: x.to_timestamp(freq="D", how="start"))
    covid_df.drop(columns=["year_month"], inplace=True)
    
    uncemployment_covid_df = pd.merge(unemployment_df, covid_df, how="left", on=["month", "COUNTYFP"])
    
    election_df = getElectionData()
    election_df = election_df[["COUNTYFP", "party_winner_2020"]]
    
    
    uncemployment_covid_df = pd.merge(uncemployment_covid_df, election_df, how="left", on="COUNTYFP" )
    # uncemployment_covid_df.join(covid_df, on="COUNTYFP", how="left")
    
    return uncemployment_covid_df

In [7]:
unemployment_covid_df = getUnemploymentRate()
unemployment_covid_df.dropna(inplace=True)
unemployment_covid_df

Unnamed: 0,unemployment_rate,COUNTYFP,CTYNAME,state,month,cases_avg_per_100k,deaths_avg_per_100k,party_winner_2020
0,10.9,1001,Autauga County,AL,2020-04-01,64.41,7.56,REPUBLICAN
1,14.5,1003,Baldwin County,AL,2020-04-01,68.61,1.35,REPUBLICAN
2,9.3,1005,Barbour County,AL,2020-04-01,142.38,1.16,REPUBLICAN
3,17.1,1007,Bibb County,AL,2020-04-01,200.16,0.00,REPUBLICAN
4,9.1,1009,Blount County,AL,2020-04-01,51.42,0.00,REPUBLICAN
...,...,...,...,...,...,...,...,...
43969,6.7,56037,Sweetwater County,WY,2021-05-01,686.55,4.76,REPUBLICAN
43970,5.9,56039,Teton County,WY,2021-05-01,277.79,0.00,DEMOCRAT
43971,6.1,56041,Uinta County,WY,2021-05-01,498.66,4.97,REPUBLICAN
43972,4.7,56043,Washakie County,WY,2021-05-01,318.38,0.00,REPUBLICAN


In [9]:
unemployment_covid_df.isnull().sum()/len(unemployment_covid_df) * 100

unemployment_rate      0.000000
COUNTYFP               0.000000
CTYNAME                0.000000
state                  0.000000
month                  0.000000
cases_avg_per_100k     1.701005
deaths_avg_per_100k    1.701005
party_winner_2020      0.891436
dtype: float64

In [10]:
unemployment_covid_df[unemployment_covid_df["party_winner_2020"].isnull()].reset_index()

Unnamed: 0,index,unemployment_rate,COUNTYFP,CTYNAME,state,month,cases_avg_per_100k,deaths_avg_per_100k,party_winner_2020
0,70,12.1,2050,Bethel Census Area,AK,2020-04-01,5.46,0.00,
1,71,13.3,2060,Bristol Bay Borough,AK,2020-04-01,,,
2,72,25.5,2068,Denali Borough,AK,2020-04-01,,,
3,73,9.8,2070,Dillingham Census Area,AK,2020-04-01,,,
4,74,10,2090,Fairbanks North Star Borough,AK,2020-04-01,58.89,2.10,
...,...,...,...,...,...,...,...,...,...
387,40926,6.5,2275,Wrangell Borough/city,AK,2021-05-01,142.75,0.00,
388,40927,7.7,2282,Yakutat Borough/city,AK,2021-05-01,,,
389,40928,12.3,2290,Yukon-Koyukuk Census Area,AK,2021-05-01,152.88,0.00,
390,41057,8.3,6077,San Joaquin County,CA,2021-05-01,206.45,9.81,


In [33]:
unemployment_covid_df[(unemployment_covid_df["party_winner_2020"].isnull()) & (unemployment_covid_df["state"] == "AK") & (unemployment_covid_df["month"] == unemployment_covid_df["month"][0])].reset_index()

Unnamed: 0,index,unemployment_rate,COUNTYFP,CTYNAME,state,month,cases_avg_per_100k,deaths_avg_per_100k,party_winner_2020
0,40903,12.8,2050,Bethel Census Area,AK,2021-05-01,165.33,7.02,
1,40904,5.9,2060,Bristol Bay Borough,AK,2021-05-01,,,
2,40905,12.3,2068,Denali Borough,AK,2021-05-01,463.23,0.0,
3,40906,8.9,2070,Dillingham Census Area,AK,2021-05-01,26.19,0.0,
4,40907,5.1,2090,Fairbanks North Star Borough,AK,2021-05-01,580.02,6.53,
5,40908,10.8,2100,Haines Borough,AK,2021-05-01,45.18,0.0,
6,40909,10.7,2105,Hoonah-Angoon Census Area,AK,2021-05-01,,,
7,40910,5.1,2110,Juneau Borough/city,AK,2021-05-01,217.5,0.0,
8,40911,7.7,2122,Kenai Peninsula Borough,AK,2021-05-01,222.67,0.0,
9,40912,7.7,2130,Ketchikan Gateway Borough,AK,2021-05-01,1563.08,0.0,


In [4]:
election_df = getElectionData()
election_df[election_df["COUNTYFP"]==51800]

Unnamed: 0,state,state_po,CTYNAME,COUNTYFP,party_winner_2020,totalvotes_2020,fractionalvotes_2020,party_winner_2016,totalvotes_2016,fractionalvotes_2016
2950,VIRGINIA,VA,SUFFOLK CITY,51800,DEMOCRAT,49642.0,0.577656,DEMOCRAT,43240.0,0.53839


In [5]:
election_df

Unnamed: 0,state,state_po,CTYNAME,COUNTYFP,party_winner_2020,totalvotes_2020,fractionalvotes_2020,party_winner_2016,totalvotes_2016,fractionalvotes_2016
0,ALABAMA,AL,AUTAUGA,1001,REPUBLICAN,27770.0,0.714368,REPUBLICAN,24973.0,0.727666
1,ALABAMA,AL,BALDWIN,1003,REPUBLICAN,109679.0,0.761714,REPUBLICAN,95215.0,0.765457
2,ALABAMA,AL,BARBOUR,1005,REPUBLICAN,10518.0,0.534512,REPUBLICAN,10469.0,0.520967
3,ALABAMA,AL,BIBB,1007,REPUBLICAN,9595.0,0.784263,REPUBLICAN,8819.0,0.764032
4,ALABAMA,AL,BLOUNT,1009,REPUBLICAN,27588.0,0.895716,REPUBLICAN,25588.0,0.893348
...,...,...,...,...,...,...,...,...,...,...
3148,WYOMING,WY,SWEETWATER,56037,REPUBLICAN,16698.0,0.732363,REPUBLICAN,17130.0,0.709515
3149,WYOMING,WY,TETON,56039,DEMOCRAT,14787.0,0.665990,DEMOCRAT,12627.0,0.579235
3150,WYOMING,WY,UINTA,56041,REPUBLICAN,9459.0,0.792473,REPUBLICAN,8470.0,0.726564
3151,WYOMING,WY,WASHAKIE,56043,REPUBLICAN,4032.0,0.804812,REPUBLICAN,3814.0,0.763241


In [25]:
election_df[election_df["state_po"]=="AK"]

Unnamed: 0,state,state_po,CTYNAME,COUNTYFP,party_winner_2020,totalvotes_2020,fractionalvotes_2020,party_winner_2016,totalvotes_2016,fractionalvotes_2016
67,ALASKA,AK,DISTRICT 1,2001,REPUBLICAN,7314.0,0.480038,REPUBLICAN,6638.0,0.47906
68,ALASKA,AK,DISTRICT 10,2010,REPUBLICAN,11205.0,0.721196,REPUBLICAN,9040.0,0.691925
69,ALASKA,AK,DISTRICT 11,2011,REPUBLICAN,10628.0,0.66767,REPUBLICAN,9689.0,0.665084
70,ALASKA,AK,DISTRICT 12,2012,REPUBLICAN,11238.0,0.702349,REPUBLICAN,9543.0,0.694645
71,ALASKA,AK,DISTRICT 13,2013,REPUBLICAN,7713.0,0.603138,REPUBLICAN,6533.0,0.616562
72,ALASKA,AK,DISTRICT 14,2014,REPUBLICAN,11443.0,0.586734,REPUBLICAN,10420.0,0.573704
73,ALASKA,AK,DISTRICT 15,2015,REPUBLICAN,5559.0,0.480482,REPUBLICAN,4982.0,0.506825
74,ALASKA,AK,DISTRICT 16,2016,DEMOCRAT,8118.0,0.526484,DEMOCRAT,7436.0,0.44298
75,ALASKA,AK,DISTRICT 17,2017,DEMOCRAT,7239.0,0.57135,DEMOCRAT,6788.0,0.484679
76,ALASKA,AK,DISTRICT 18,2018,DEMOCRAT,7720.0,0.606347,DEMOCRAT,7402.0,0.528101
