## Covid data
    Source :  https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv
              Estimates fo mask use
              https://github.com/nytimes/covid-19-data/tree/master/mask-use
              
              
              Note about the data
                      The data in these files is a different version of the data in our main U.S. cases and deaths files.
                      Instead of cumulative totals, each file contains the daily number of new cases and deaths, the seven-day 
                      rolling average and the seven-day rolling average per 100,000 residents.
                      
                      Confirmed Cases
            Confirmed cases are counts of individuals whose coronavirus infections were confirmed by a laboratory test and reported by a federal, state, territorial or local government agency. Only tests that detect viral RNA in a sample are considered confirmatory. These are often called molecular or RT-PCR tests.
              
              
              Another dataset : https://dataverse.harvard.edu/file.xhtml?fileId=4593425&version=54.1
              
              
              Since the first reported coronavirus case in Washington State on Jan. 21, 2020, The Times has tracked cases of coronavirus in real time as they were identified after testing. Because of the widespread shortage of testing, however, the data is necessarily limited in the picture it presents of the outbreak.
              
              
            This data is for cumulative cases
            
            # We can join the NYTime latest data with population data from census for 2020
            df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv')
            
            
TO create a stoke dash line:
https://github.com/altair-viz/altair/issues/2116

In [1]:
import pandas as pd
import numpy as np


## Color global variables
TO_OTHER =   "#556B2F" 
TO_DEMOCRAT =  "#11A3D6"
TO_REPUBLICAN = "#8C1616"
STAYED_DEMOCRAT  =  '#0015BC'
STAYED_REPUBLICAN = '#FF0000'
STAYED_OTHER = '#B4D3B2'


segment_color_dict = {
    'TO_OTHER' : TO_OTHER ,
    'TO_DEMOCRAT'  : TO_DEMOCRAT,
    'TO_REPUBLICAN' : TO_REPUBLICAN,
    'STAYED_DEMOCRAT'  :  STAYED_DEMOCRAT,
    'STAYED_REPUBLICAN' : STAYED_REPUBLICAN,
    'STAYED_OTHER' : STAYED_OTHER
}


color_segment_dict = {TO_OTHER:"To other",
                      TO_DEMOCRAT:"To Democrat", 
                      TO_REPUBLICAN:"To Republican", 
                      STAYED_DEMOCRAT:"Stayed Democrat",
                      STAYED_REPUBLICAN:"Stayed Republican",
                      STAYED_OTHER:"Stayed Other"}

In [2]:
def getCasesRollingAveragePer100K():
    """
        This function returns a dataframe with rolling average of cases
        
        Columns : date cases_avg_per_100k deaths_avg_per_100k  COUNTYFP
        
    """
    
    ## The below is the rolling avergae, as it is updated we will get the latest data
    #case_rolling_df = pd.read_csv(r"https://raw.githubusercontent.com/nytimes/covid-19-data/master/rolling-averages/us-counties.csv")
    case_rolling_df = pd.read_csv(r"../DataForPresidentialElectionsAndCovid/Dataset 7 Covid/june 26 _rolling_average_us-counties.csv")
    case_rolling_df['date'] = pd.to_datetime(case_rolling_df['date'])
    case_rolling_df.sort_values(by=['state', 'county', 'date'], inplace=True)
    #print(f"First date in dataset = {case_rolling_df['date'].min()}\nLast date in dataset = {case_rolling_df['date'].max()}")


    case_rolling_df = case_rolling_df[['date', 'geoid', 'county', 'cases_avg_per_100k', 'deaths_avg_per_100k' ]].copy()
    
    case_rolling_df['COUNTYFP'] = case_rolling_df['geoid'].str.slice(4)
    case_rolling_df['COUNTYFP'] = case_rolling_df['COUNTYFP'].astype(int)
    case_rolling_df.drop(columns=['geoid', 'county'], inplace=True)
    
    return case_rolling_df


In [3]:
def getElectionData():
    """
        This function return a dataframe  with this set of columns:
        
        state   state_po 
        CTYNAME COUNTYFP
        party_winner_2020 totalvotes_2020 fractionalvotes_2020
        party_winner_2016 totalvotes_2016 fractionalvotes_2016
        changecolor
        
        Granularity = COUNTYFP
    """
    
    election_df = pd.read_csv(r"../DataForPresidentialElectionsAndCovid/countypres_2000-2020.csv")
    election_df = election_df[election_df['year'] >= 2016].copy()

    election_df.rename(columns = {'county_fips':'COUNTYFP', 'county_name':'CTYNAME'}, inplace=True)
    election_df.loc[election_df['CTYNAME'] == 'DISTRICT OF COLUMBIA','COUNTYFP'] = 11001.0

    #election_df.version.unique() #array([20191203, 20210608], dtype=int64)
    #election_df.office.unique() array(['PRESIDENT', 'US PRESIDENT'], dtype=object)
    election_df.drop(columns=['office', 'mode', 'version', 'candidate'], inplace=True)
    #Drop rows that are preccincts and do not have a county fup
    election_df.dropna(subset=['COUNTYFP'], inplace=True)
    election_df['party'] = np.where((election_df["party"] != 'DEMOCRAT') &  (election_df["party"] != 'REPUBLICAN') , "OTHER", election_df["party"])
    election_df['COUNTYFP'] =  election_df['COUNTYFP'].astype(int) 

    election_df= election_df.groupby(['year','state', 'state_po', 'CTYNAME', 'COUNTYFP', 'party', 'totalvotes']).agg(candidatevotes = ('candidatevotes', sum)).reset_index()

    election_df['fractionalvotes'] = election_df['candidatevotes'] / election_df['totalvotes'] 

    # get the party that won in each county
    election_df['maxfractionalvotes'] = election_df.groupby(['year','state', 'state_po', 'CTYNAME', 'COUNTYFP',  'totalvotes'])['fractionalvotes'].transform(max)

    election_2016_winners_df = election_df[(election_df['fractionalvotes'] == election_df['maxfractionalvotes']) & (election_df['year'] == 2016) ].copy()
    election_2016_winners_df.rename(columns = {'totalvotes' : 'totalvotes_2016', 'fractionalvotes' : 'fractionalvotes_2016', 'party':'party_winner_2016'}, inplace=True)
    election_2016_winners_df.drop(columns=['year', 'maxfractionalvotes', 'candidatevotes'], inplace=True)
    election_2020_winners_df = election_df[(election_df['fractionalvotes'] == election_df['maxfractionalvotes']) & (election_df['year'] == 2020) ].copy()
    election_2020_winners_df.rename(columns = {'totalvotes' : 'totalvotes_2020', 'fractionalvotes' : 'fractionalvotes_2020', 'party':'party_winner_2020'}, inplace=True)
    election_2020_winners_df.drop(columns=['year', 'maxfractionalvotes', 'candidatevotes'], inplace=True)

    election_winners_df = election_2020_winners_df.merge(election_2016_winners_df, how='inner', on=['state','state_po','CTYNAME','COUNTYFP'])

    return election_winners_df

In [4]:
def getElectionSegmentsData(segment_color_dict = segment_color_dict):
    """
        This function return a dataframe  with this set of columns:
        
        state   state_po 
        CTYNAME COUNTYFP
        party_winner_2020 totalvotes_2020 fractionalvotes_2020
        party_winner_2016 totalvotes_2016 fractionalvotes_2016
        changecolor
        
        Granularity = COUNTYFP
        Change color shows Segments = TO_OTHER TO_DEMOCRAT TO_REPUBLICAN STAYED_DEMOCRAT STAYED_REPUBLICAN STAYED_OTHER
    """

    election_winners_df = getElectionData()

    ## Set a variable of color that marks NO change and other categories

    #Split the no change further into those that stayed democart and those that stayed republican
    election_winners_df['changecolor'] = pd.Series(np.where(election_winners_df['party_winner_2020'] == election_winners_df['party_winner_2016'],
                                       #No change stayed the same - find if before and after is republican, democrat or other
                                       np.where(election_winners_df['party_winner_2020'] == 'REPUBLICAN',
                                                segment_color_dict['STAYED_REPUBLICAN'],
                                                np.where(election_winners_df['party_winner_2020'] == 'DEMOCRAT',
                                                segment_color_dict['STAYED_DEMOCRAT'],
                                                segment_color_dict['STAYED_OTHER'])
                                               ), 
                                      np.where(election_winners_df['party_winner_2020'] == 'REPUBLICAN',
                                               segment_color_dict['TO_REPUBLICAN'],
                                               np.where(election_winners_df['party_winner_2020'] == 'DEMOCRAT', 
                                                        segment_color_dict['TO_DEMOCRAT'],
                                                        segment_color_dict['TO_OTHER']
                                                        ) 
                                                )
                                        )
                                )


    return election_winners_df

In [5]:
def getRollingCaseAverageSegmentLevel():
    """
        This function combines the cases rolling averges and the election winners with  change color denoting
        segment level.
        It averages the cases at the segment level
        
    """
    
    case_rolling_df = getCasesRollingAveragePer100K()
    
    ### Plot all data
    case_rolling_df = case_rolling_df [case_rolling_df['date'] < pd.to_datetime('2021-01-01')].copy()
    
    election_winners_df = getElectionSegmentsData()
    case_rolling_df = case_rolling_df.merge(election_winners_df[['state', 'COUNTYFP', 'changecolor']], how='inner', on='COUNTYFP')
    case_rolling_df = case_rolling_df.groupby(['date', 'changecolor']).agg(cases_avg_per_100k=('cases_avg_per_100k', 'mean')).reset_index()
    case_rolling_df['segmentname'] = case_rolling_df['changecolor'].map(color_segment_dict)
    return case_rolling_df

In [6]:
## To find out if the message by leadership had any effect on the people who changed affiliation, we also plot 
## by county
def getRollingCaseAverageCountyLevel():
    """
        This function combines the cases rolling averges and the election winners 
        The counties which changed affilitians are only returned.
        It averages the cases at the county level
        
    """
    
    case_rolling_df = getCasesRollingAveragePer100K()
    election_winners_df = getElectionSegmentsData()
    case_rolling_df = case_rolling_df.merge(election_winners_df[['state', 'COUNTYFP', 'changecolor']], how='inner', on='COUNTYFP')
  
    case_rolling_df = case_rolling_df[case_rolling_df['changecolor'].isin([TO_REPUBLICAN, TO_DEMOCRAT])].copy()
  
    case_rolling_df = case_rolling_df.groupby(['date', 'COUNTYFP']).agg(cases_avg_per_100k=('cases_avg_per_100k', 'mean'), changecolor=('changecolor', 'first')).reset_index()
    return case_rolling_df

In [7]:
def getStateLevelElectionData2020():
    """ 
        This function returns 2020 election winning party per state
    """
    # Join with state level election data to color the circles
    state_election_df = pd.read_csv(r'../DataForPresidentialElectionsAndCovid/Dataset 1 Population numbers from Dataverse/1976-2020-president.csv')
    state_election_df= state_election_df[state_election_df['year']==2020].copy()
    state_election_df.drop(columns=[ 'state_cen', 'state_ic','office', 'candidate',  'writein', 'version', 'notes', 'party_detailed'], inplace=True)
    
    state_election_df['fractionalvotes'] = state_election_df['candidatevotes'] / state_election_df['totalvotes'] 

    # get the party that won in each county
    state_election_df['maxfractionalvotes'] = state_election_df.groupby(['year','state', 'state_po','state_fips','totalvotes'])['fractionalvotes'].transform(max)
    state_election_df = state_election_df[(state_election_df['fractionalvotes'] == state_election_df['maxfractionalvotes'])  ].copy()
    state_election_df.drop(columns=['maxfractionalvotes', 'year'], inplace=True)  
    return state_election_df
    

In [8]:
import altair as alt
from vega_datasets import data

# uses intermediate json files to speed things up
alt.data_transformers.enable('json')

def getBaseChart(case_rolling_df):
    """
      Creates a chart by encoding the Data along the X positional axis and rolling mean along the Y positional axis 
      The mark (bar/line..) can be decided upon by the calling function.
    """
    
    domain = ["2020-01-01", "2020-12-31"]
    base = (
        alt.Chart(case_rolling_df)
        .encode(
            x=alt.X(
                "date:T",
                timeUnit='yearmonthdate',
                scale=alt.Scale(domain=domain),
                axis=alt.Axis(
                                title=None,
                                #format=("%b %Y"),
                                labelAngle=0, 
                                #tickCount=6
                            ),
            ),
            y=alt.Y(
                "cases_avg_per_100k:Q", axis=alt.Axis(title="Cases (rolling mean per 100K)")
            ),
        )
        .properties(width=600, height=400)
    )
    return base

def getSelection():
    """
      This function creates a selection element and uses it to "conditionally" set a color for a categorical variable (segment).
      It return both the single selection as well as the Category for Color choice set based on selection 
    """
    radio_select = alt.selection_multi(
        fields=["segmentname", 'changecolor'], name="Segment", 
    )

    change_color_condition = alt.condition(
        radio_select, alt.Color("changecolor:N", scale=None, legend=None), alt.value("lightgrey")
    )

    return radio_select, change_color_condition
    
def createChart(case_rolling_df):
    """
      This function uses the "base" encoding chart to create a line chart.
      The highlight_segment variable uses the mark_line function to create a line chart out of the encoding.
      The color of the line is set using the conditional color set for the categorical variable using the selection.
      The chart is bound to the selection using add_selection.
      It also creates a selector element of a vertical array of circles so that the user can select between segment 
    """

    radio_select, change_color_condition = getSelection()

    make_selector = (
        alt.Chart(case_rolling_df)
        .mark_rect()
        .encode(
            y=alt.Y("segmentname:N", axis=alt.Axis(title="Pick affiliation", titleFontSize=15)),
            color=change_color_condition
        )
        .add_selection(radio_select)
    )

    base = getBaseChart(case_rolling_df)

    highlight_segment = (
        base.mark_line(strokeWidth=2)
        .add_selection(radio_select)
        .encode(
            color=change_color_condition,
            strokeDash=alt.condition(
                (alt.datum.segmentname == "To Democrat") | (alt.datum.segmentname == "To Republican"), 
                alt.value([3, 5]),  # dashed line: 5 pixels  dash + 5 pixels space
                alt.value([0]),  # solid line
            )
        )
    ).properties(title="Rolling Average Cases Per 100K")

    return base, make_selector, highlight_segment, radio_select

def createTooltip(base, radio_select):
    """
      This function uses the "base" encoding chart and the selection captured.
      Four elements related to selection are created here
    """
    # Create a selection that chooses the nearest point & selects based on x-value
    nearest = alt.selection(
        type="single", nearest=True, on="mouseover", fields=["date"], empty="none"
    )

    # Transparent selectors across the chart. This is what tells us
    # the x-value of the cursor
    selectors = (
        alt.Chart(case_rolling_df)
        .mark_point()
        .encode(
            x="date:T",
            opacity=alt.value(0),
        )
        .add_selection(nearest)
    )


    # Draw points on the line, and highlight based on selection
    points = base.mark_point(size=5, dy=-10).encode(
        opacity=alt.condition(nearest, alt.value(1), alt.value(0))
    ).transform_filter(radio_select)
    

    # Draw text labels near the points, and highlight based on selection
    tooltip_text = base.mark_text(
        align="left",
        dx=-60,
        dy=-15,
        fontSize=15,
        #fontWeight="bold",
        lineBreak = "\n",
    ).encode(
        text=alt.condition(
            nearest, 
            alt.Text("cases_avg_per_100k:Q", format=".2f"), 
            alt.value(" "),
 
        ),
    ).transform_filter(radio_select)


    # Draw a rule at the location of the selection
    rules = (
        alt.Chart(case_rolling_df)
        .mark_rule(color="darkgrey", strokeWidth=2, strokeDash=[5, 4])
        .encode(
            x="date:T",
        )
        .transform_filter(nearest)
    )
    return selectors, rules, points, tooltip_text





case_rolling_df = getRollingCaseAverageSegmentLevel()
segment_list = case_rolling_df.changecolor.unique()

base, make_selector, highlight_segment, radio_select  = createChart(case_rolling_df)

selectors, rules, points, tooltip_text  = createTooltip(base, radio_select)



### Bring all the layers together with layering and concatenation
( alt.layer(
    highlight_segment, selectors, points,rules, tooltip_text ) | make_selector 
)
#.configure(background='#EFF2F4')



##TBD TO confirm
An Associated Press analysis reveals that in 376 counties with the highest number of new cases per capita, the overwhelming majority—93% of those counties—went for Trump, a rate above other less severely hit areas.

https://fortune.com/2020/11/06/trump-voters-covid-cases-red-counties-2020-election-results/

Many Places Hard Hit By COVID-19 Leaned More Toward Trump In 2020 Than 2016
Support for President Trump increased in 2020 in many of the U.S. counties that lost lives at the highest rate to COVID-19, according to an NPR analysis.

Of the 100 counties with the highest COVID-19 death rates per capita, 68 had a higher proportion of votes cast for Trump this cycle than they did in 2016. This includes both Republican-leaning counties and counties that supported Joe Biden.
https://www.npr.org/sections/health-shots/2020/11/06/930897912/many-places-hard-hit-by-covid-19-leaned-more-toward-trump-in-2020-than-2016


In [9]:
# Find the annual number of cases per county
cases_rolling_df= getCasesRollingAveragePer100K()


In [10]:
#cases_rolling_df = cases_rolling_df[(cases_rolling_df['date']>=pd.to_datetime('2020-01-01')) & (cases_rolling_df['date']<=pd.to_datetime('2020-12-31'))]
cases_rolling_df = cases_rolling_df.groupby('COUNTYFP')['deaths_avg_per_100k'].mean().fillna(0).reset_index()
#Select the top 100
cases_rolling_df = cases_rolling_df.sort_values( ['deaths_avg_per_100k'], ascending= False)
deaths_top_100_rolling_df = cases_rolling_df[:400].copy()

election_df = getElectionSegmentsData()
merged_df = deaths_top_100_rolling_df.merge(election_df, how='left', on= 'COUNTYFP', indicator=True)
merged_df = merged_df[merged_df['_merge'] == 'both'].copy()
merged_df['pct_increase'] = merged_df['fractionalvotes_2020'] -  merged_df['fractionalvotes_2016']
merged_df['pct_increase'] = merged_df['pct_increase'] * 100

merged_df['segmentname'] =  merged_df['changecolor'].map(color_segment_dict)


In [11]:
def createPercentPOintChangeAvgDeathsChart():
    
    input_dropdown = alt.binding_select(options=merged_df['segmentname'].unique().tolist(), name='Affiliation: ')
    selection = alt.selection_single(fields=['segmentname'], bind=input_dropdown, name='Affiliation: ')

    perc_point_deaths_chart = alt.Chart(
        merged_df,
        title = "Covid deaths in 2020 versus Percentage point difference in votes"
    ).mark_circle(
    ).encode(
    x= alt.X("pct_increase:Q", title="Percent point change"),
    y= alt.Y("deaths_avg_per_100k:Q", title="Average deaths per 100K"),
    #color=alt.Color("changecolor:N", scale=None),
    color =   alt.condition(selection,
                    alt.Color('changecolor:N',  scale=None, legend=None),
                    alt.value('lightgray')),
    #size= alt.Size("totalvotes_2020:Q", scale=alt.Scale(domain=[100,20000]) , legend=None),
    tooltip = [alt.Tooltip("CTYNAME:N", title="County Name:"),
              alt.Tooltip("state:N", title="State Name:"),
              alt.Tooltip("pct_increase:N", title="Percent Point Increase:", format=".2f"),

              ]
    ).properties(
    height=300,
    width=800
    ).add_selection(
    selection
    )

    mark_more_deaths_line1 =  alt.Chart(pd.DataFrame({'x': [0]})).mark_rule(strokeDash=[2, 5]).encode(x='x')
    mark_more_deaths_line2 =  alt.Chart(pd.DataFrame({'y': [2]})).mark_rule(strokeDash=[2, 5]).encode(y='y')


    annotations = [[8,2.3, "Counties above this line\nhad the highest COVID-19 death rates"]]
    a_df = pd.DataFrame(annotations, columns=['x','y','note'])


    more_deaths_text = alt.Chart(a_df).mark_text(
        align='left',
        baseline='middle',
        fontSize = 10,
        dx = 7
    ).encode(
        x="x:Q",
        y="y:Q",
        text=alt.Text('note:N')
    )

    return (perc_point_deaths_chart + mark_more_deaths_line1  + mark_more_deaths_line2  + more_deaths_text)
#.configure(background='#EFF2F4')
createPercentPOintChangeAvgDeathsChart()

### Covid vaccination data
    Source : https://covid.cdc.gov/covid-data-tracker/#vaccinations
    Looking int oCovid vaccination percentages by party affiliation measure by 2020 Presidential elections
    Data as of : Data as of: June 26, 2021 
    Source:  #State level election results
            https://dataverse.harvard.edu/file.xhtml?fileId=4299753&version=6.0
    

In [12]:
def createStateVaccinationData():
    vaccination_df = pd.read_csv(
            r'../DataForPresidentialElectionsAndCovid/Dataset 7 Covid/covid19_vaccinations_in_the_united_states.csv',
           skiprows=2)
    # Select columns containing at least one dose per 100K since taking that one dose shows openness to taking the vaccine
    vaccination_df = vaccination_df[['State/Territory/Federal Entity', 'People with at least One Dose by State of Residence', 'Percent of Total Pop with at least One Dose by State of Residence']].copy()
    #calculate Total population assumed by data as per percent and numbers
    vaccination_df['Total population'] =  (vaccination_df['People with at least One Dose by State of Residence'] * 100 )/vaccination_df[ 'Percent of Total Pop with at least One Dose by State of Residence']

    #Read the persidential election CSV from local disk
    population_df = pd.read_csv(
                                r"../DataForPresidentialElectionsAndCovid/Dataset 3 Population Estimate through 2020/County Data Till 2020 co-est2020-alldata.csv", 
                                encoding='latin-1')
    state_pop_df = population_df[population_df['SUMLEV'] != 50].copy()
    state_pop_df = state_pop_df[['STATE',  'STNAME', 'POPESTIMATE2020']]

    vaccination_df = vaccination_df.merge(state_pop_df, how="inner", left_on='State/Territory/Federal Entity', right_on='STNAME')
    vaccination_df = vaccination_df[['STATE', 'STNAME', 'People with at least One Dose by State of Residence',
           'Percent of Total Pop with at least One Dose by State of Residence',
           'Total population'  ]].copy()
    vaccination_df = vaccination_df.rename(columns={'STATE':'STATEFP', 'Percent of Total Pop with at least One Dose by State of Residence':'Percent with one dose'})

    state_election_df = getStateLevelElectionData2020()
    vaccination_df= vaccination_df.merge(state_election_df, how="inner", left_on='STATEFP', right_on='state_fips')
    
    # for charting purposes
    vaccination_df['Percent with one dose'] =  vaccination_df['Percent with one dose']/100
    
    return vaccination_df



In [13]:
def createStateVaccinationChart():
    source  = createStateVaccinationData()
    max_value = source['Total population'].max()
    min_value =  source['Total population'].min()
    source['y_center'] = ((source['Total population'] - min_value) / (max_value - min_value)) + .5 
    
    
    big_chart = alt.Chart(
        source,
        title=["Percentage of state’s population age 18 and older that has received",
               "at least one dose of a COVID-19 vaccine as of June 26th"]
    ).mark_point(
        filled=True, 
        opacity=1,

    ).encode(
         x= alt.X(
                 'Percent with one dose:Q',
                 axis=alt.Axis(title=None, format='%', orient='top', values=[0.3, .4, .5, .6, .7, .8]),
                 scale=alt.Scale(domain=[.30, .80])),
                 y = alt.Y("y_center:Q", axis=None),
        color = alt.Color(
                        "party_simplified:N",
                         scale= alt.Scale(
                            domain=["DEMOCRAT", "REPUBLICAN"],
                            range=["#237ABD", "#CD2128"])
        ),
        tooltip=[alt.Tooltip('state_po:N', title="State: ")],
        size = alt.Size("Total population:Q", 
                       scale=alt.Scale(range=[100, 3000]),
                       legend=None)
    ).properties(
    width = 700,
    height=400
    )

    big_chart_line =  alt.Chart(pd.DataFrame({'x': [.5]})).mark_rule(strokeDash=[10, 10]).encode(x='x')
    
    
    big_chart_text  = alt.Chart(
        source
    ).mark_text(
        align='left',
        baseline='middle',
        dx=-3,
        fontSize=8,
        fontWeight='bold',
        color="white"
    ).encode(
         x= alt.X('Percent with one dose:Q'),
         y=  alt.Y("y_center:Q"),
         text='state_po'
    )

    small_chart = alt.Chart(
        source,
        title="Percentage of people vaccinated wih one dose"
    ).mark_point(
        filled=True, 
        opacity=1
    ).encode(
    x= alt.X(
        'Percent with one dose:Q',
        axis=alt.Axis(format='%', orient='top', values=[0,.2, .4, .6, .8, 1]),
        scale=alt.Scale(domain=[0, 1]),
        title=None),
        y = alt.Y("y_center:Q", axis=None),
        color = alt.Color(
                        "party_simplified:N",
                         legend=alt.Legend(title="Presidential election choice"),
                         scale= alt.Scale(
                            domain=["DEMOCRAT", "REPUBLICAN"],
                            range=["#237ABD", "#CD2128"])
        ),
        size = alt.Size("Total population:Q", 
                       scale=alt.Scale(range=[50, 100]),
                       legend=None)
    ).properties(
    width = 400,
    height=50
    )

    # Add a rectangle around the data
    box = pd.DataFrame({'x1': [.3], 'x2': [0.8], 'y1': [0], 'y2': [1.5]})

    rect = alt.Chart(box).mark_rect(fill='white', stroke='black', opacity=.3).encode(
        alt.X('x1', 
            ),
        alt.Y('y1',
            ),
        x2='x2',
        y2='y2'
    )

    full_x_chart = (small_chart+rect)

    final_chart = (
        (big_chart+big_chart_text+big_chart_line) & full_x_chart
    ).resolve_scale(
        x='independent', y='independent', size='independent'
    ).configure_title(
    fontSize=15
    ).configure_axis(
      labelColor='#a9a9a9'
    )

    return final_chart

In [14]:
createStateVaccinationChart()