In [26]:
import pandas as pd
import numpy as np
import altair as alt
from vega_datasets import data
from pathlib import Path
import re

from ETLForElectionAndVaccinationData import *

# uses intermediate json files to speed things up
alt.data_transformers.enable('json')

import plotly.graph_objs as go
import plotly.express as px

# Theme settings
import plotly.io as plt_io

## Color global variables
TO_OTHER =   "#556B2F" 
TO_DEMOCRAT =  "#11A3D6"
TO_REPUBLICAN = "#8C1616"
STAYED_DEMOCRAT  =  '#0015BC'
STAYED_REPUBLICAN = '#FF0000'
STAYED_OTHER = '#B4D3B2'

segment_color_dict = {
    'TO_OTHER' : TO_OTHER ,
    'TO_DEMOCRAT'  : TO_DEMOCRAT,
    'TO_REPUBLICAN' : TO_REPUBLICAN,
    'STAYED_DEMOCRAT'  :  STAYED_DEMOCRAT,
    'STAYED_REPUBLICAN' : STAYED_REPUBLICAN,
    'STAYED_OTHER' : STAYED_OTHER
}

color_segment_dict = {TO_OTHER:"To other",
                      TO_DEMOCRAT:"To Democrat", 
                      TO_REPUBLICAN:"To Republican", 
                      STAYED_DEMOCRAT:"Stayed Democrat",
                      STAYED_REPUBLICAN:"Stayed Republican",
                      STAYED_OTHER:"Stayed Other"}

DataFolder = Path("../DataForPresidentialElectionsAndCovid/")


In [51]:
def getUnemploymentRate():
    """
        THIS FUNCTION reads the county level unemployment rate from the 2020 dataset published by the BLS.
        
        Functions called: None
        
        Input: None
        Returns: Dataframe election_winners_df with the following set of columns.
                 Note: Granularity = COUNTYFP.
        
            state                  (full name)
            state_po               (2-letter abbreviation)
            CTYNAME                (full name)
            COUNTYFP               (FIPS number) Questions
            party_winner_2020
            totalvotes_2020
            fractionalvotes_2020
            party_winner_2016
            totalvotes_2016
            fractionalvotes_2016
                
    """
    unemployment_df = pd.read_excel(DataFolder / r"unemployment/laucnty20.xlsx",
                                    names=["LAUS_code","state_FIPS","county_FIPS","county_name_and_state_abbreviation","year","empty_col","labor_force","employed","unemployed","unemployment_rate"],
                                    header=5,
                                    nrows=3141)
    
    # Remove the Puerto Reco rows which all have NaN values
    # extract state and county names
    unemployment_df["county_name_and_state_abbreviation"]
    extract_names_regex = r"^(?P<CTYNAME>.*),\s(?P<state>[A-Z]{2})$"
    extract_county_names = lambda x : re.search(extract_names_regex,x).group("CTYNAME") if x != "District of Columbia" else "District of Columbia"
    extract_state_names = lambda x : re.search(extract_names_regex,x).group("state") if x != "District of Columbia" else "District of Columbia"
    unemployment_df["CTYNAME"] = unemployment_df["county_name_and_state_abbreviation"].apply(extract_county_names)
    unemployment_df["state"] = unemployment_df["county_name_and_state_abbreviation"].apply(extract_state_names)
    unemployment_df.drop(columns=["LAUS_code","county_name_and_state_abbreviation","year","empty_col","labor_force","employed","unemployed"], inplace=True)
    return unemployment_df

In [52]:
getUnemploymentRate()

Unnamed: 0,state_FIPS,county_FIPS,unemployment_rate,CTYNAME,state
0,1,1,4.9,Autauga County,AL
1,1,3,5.6,Baldwin County,AL
2,1,5,7.0,Barbour County,AL
3,1,7,6.6,Bibb County,AL
4,1,9,4.1,Blount County,AL
...,...,...,...,...,...
3136,56,37,7.4,Sweetwater County,WY
3137,56,39,6.0,Teton County,WY
3138,56,41,6.3,Uinta County,WY
3139,56,43,5.3,Washakie County,WY


In [53]:
extract_names_regex = r"^(?P<CTYNAME>.*),\s(?P<state>[A-Z]{2})$"
extract_county_names = lambda x : re.search(extract_names_regex,x).group("CTYNAME")

In [54]:
extract_county_names("fdsafsda, AA")

'fdsafsda'