# Vaccinations Administered by County

In [1]:
# Dependencies and Setup
import hvplot.pandas
import pandas as pd
import numpy as np
import requests
import json
from scipy import stats
from api_keys import geoapify_key
from api_keys import google_api_key

import warnings
warnings.filterwarnings("ignore")

In [2]:
# Load vaccine data into DataFrame
vaccine_df = pd.read_csv("../Keyana/COVID-19_Vaccinations_in_the_United_States_County.csv")
vaccine_df.head()

Unnamed: 0,Recip_County,Administered_Dose1_Recip_5Plus,Administered_Dose1_Recip_12Plus,Administered_Dose1_Recip_18Plus,Administered_Dose1_Recip_65Plus,Series_Complete_5Plus,Series_Complete_12Plus,Series_Complete_18Plus,Series_Complete_65Plus
0,Washburn County,11097.0,10863.0,10368.0,4749.0,10311.0,10105.0,9663.0,4483.0
1,Taylor County,3145.0,3079.0,2966.0,1249.0,2949.0,2887.0,2792.0,1211.0
2,Nassau County,1384503.0,1329779.0,1232671.0,313054.0,1174924.0,1127179.0,1041792.0,268167.0
3,Lampasas County,11660.0,11468.0,10849.0,3920.0,9999.0,9845.0,9330.0,3448.0
4,Saginaw County,,,,,103054.0,101354.0,95769.0,32377.0


In [3]:
# Rename columns
vaccine_df = vaccine_df.rename(columns = {'Recip_County': 'County',
        'Administered_Dose1_Recip_5Plus': 'Administered Dose 1 (5 Plus)',
        'Administered_Dose1_Recip_12Plus': 'Administered Dose 1 (12 Plus)',
        'Administered_Dose1_Recip_18Plus': 'Administered Dose 1 (18 Plus)',
        'Administered_Dose1_Recip_65Plus': 'Administered Dose 1 (65 Plus)',
        'Series_Complete_5Plus': 'Administered Series (5 Plus)',
        'Series_Complete_12Plus': 'Administered Series (12 Plus)',
        'Series_Complete_18Plus': 'Administered Series (18 Plus)',
        'Series_Complete_65Plus': 'Administered Series (65 Plus)'})

vaccine_df

Unnamed: 0,County,Administered Dose 1 (5 Plus),Administered Dose 1 (12 Plus),Administered Dose 1 (18 Plus),Administered Dose 1 (65 Plus),Administered Series (5 Plus),Administered Series (12 Plus),Administered Series (18 Plus),Administered Series (65 Plus)
0,Washburn County,11097.0,10863.0,10368.0,4749.0,10311.0,10105.0,9663.0,4483.0
1,Taylor County,3145.0,3079.0,2966.0,1249.0,2949.0,2887.0,2792.0,1211.0
2,Nassau County,1384503.0,1329779.0,1232671.0,313054.0,1174924.0,1127179.0,1041792.0,268167.0
3,Lampasas County,11660.0,11468.0,10849.0,3920.0,9999.0,9845.0,9330.0,3448.0
4,Saginaw County,,,,,103054.0,101354.0,95769.0,32377.0
...,...,...,...,...,...,...,...,...,...
1962776,Adams County,,0.0,0.0,0.0,,0.0,0.0,0.0
1962777,Harrison County,,0.0,0.0,0.0,,0.0,0.0,0.0
1962778,Matagorda County,,0.0,0.0,0.0,,0.0,0.0,0.0
1962779,Pleasants County,,0.0,0.0,0.0,,0.0,0.0,0.0


In [4]:
# Add column for dose 1 total number of vaccines
vaccine_df['Total Dose 1 Vaccines Administered'] = vaccine_df['Administered Dose 1 (5 Plus)'] + vaccine_df['Administered Dose 1 (12 Plus)'] + vaccine_df['Administered Dose 1 (18 Plus)'] + vaccine_df['Administered Dose 1 (65 Plus)']
vaccine_df.head()

Unnamed: 0,County,Administered Dose 1 (5 Plus),Administered Dose 1 (12 Plus),Administered Dose 1 (18 Plus),Administered Dose 1 (65 Plus),Administered Series (5 Plus),Administered Series (12 Plus),Administered Series (18 Plus),Administered Series (65 Plus),Total Dose 1 Vaccines Administered
0,Washburn County,11097.0,10863.0,10368.0,4749.0,10311.0,10105.0,9663.0,4483.0,37077.0
1,Taylor County,3145.0,3079.0,2966.0,1249.0,2949.0,2887.0,2792.0,1211.0,10439.0
2,Nassau County,1384503.0,1329779.0,1232671.0,313054.0,1174924.0,1127179.0,1041792.0,268167.0,4260007.0
3,Lampasas County,11660.0,11468.0,10849.0,3920.0,9999.0,9845.0,9330.0,3448.0,37897.0
4,Saginaw County,,,,,103054.0,101354.0,95769.0,32377.0,


In [5]:
# Add column for series total number of vaccines
vaccine_df['Total Series Vaccines Administered'] = vaccine_df['Administered Series (5 Plus)'] + vaccine_df['Administered Series (12 Plus)'] + vaccine_df['Administered Series (18 Plus)'] + vaccine_df['Administered Series (65 Plus)']
vaccine_df

Unnamed: 0,County,Administered Dose 1 (5 Plus),Administered Dose 1 (12 Plus),Administered Dose 1 (18 Plus),Administered Dose 1 (65 Plus),Administered Series (5 Plus),Administered Series (12 Plus),Administered Series (18 Plus),Administered Series (65 Plus),Total Dose 1 Vaccines Administered,Total Series Vaccines Administered
0,Washburn County,11097.0,10863.0,10368.0,4749.0,10311.0,10105.0,9663.0,4483.0,37077.0,34562.0
1,Taylor County,3145.0,3079.0,2966.0,1249.0,2949.0,2887.0,2792.0,1211.0,10439.0,9839.0
2,Nassau County,1384503.0,1329779.0,1232671.0,313054.0,1174924.0,1127179.0,1041792.0,268167.0,4260007.0,3612062.0
3,Lampasas County,11660.0,11468.0,10849.0,3920.0,9999.0,9845.0,9330.0,3448.0,37897.0,32622.0
4,Saginaw County,,,,,103054.0,101354.0,95769.0,32377.0,,332554.0
...,...,...,...,...,...,...,...,...,...,...,...
1962776,Adams County,,0.0,0.0,0.0,,0.0,0.0,0.0,,
1962777,Harrison County,,0.0,0.0,0.0,,0.0,0.0,0.0,,
1962778,Matagorda County,,0.0,0.0,0.0,,0.0,0.0,0.0,,
1962779,Pleasants County,,0.0,0.0,0.0,,0.0,0.0,0.0,,


In [6]:
# Add column for total number of vaccines 
vaccine_df['Total Vaccines Administered'] = vaccine_df['Total Dose 1 Vaccines Administered'] + vaccine_df['Total Series Vaccines Administered']
vaccine_df

Unnamed: 0,County,Administered Dose 1 (5 Plus),Administered Dose 1 (12 Plus),Administered Dose 1 (18 Plus),Administered Dose 1 (65 Plus),Administered Series (5 Plus),Administered Series (12 Plus),Administered Series (18 Plus),Administered Series (65 Plus),Total Dose 1 Vaccines Administered,Total Series Vaccines Administered,Total Vaccines Administered
0,Washburn County,11097.0,10863.0,10368.0,4749.0,10311.0,10105.0,9663.0,4483.0,37077.0,34562.0,71639.0
1,Taylor County,3145.0,3079.0,2966.0,1249.0,2949.0,2887.0,2792.0,1211.0,10439.0,9839.0,20278.0
2,Nassau County,1384503.0,1329779.0,1232671.0,313054.0,1174924.0,1127179.0,1041792.0,268167.0,4260007.0,3612062.0,7872069.0
3,Lampasas County,11660.0,11468.0,10849.0,3920.0,9999.0,9845.0,9330.0,3448.0,37897.0,32622.0,70519.0
4,Saginaw County,,,,,103054.0,101354.0,95769.0,32377.0,,332554.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
1962776,Adams County,,0.0,0.0,0.0,,0.0,0.0,0.0,,,
1962777,Harrison County,,0.0,0.0,0.0,,0.0,0.0,0.0,,,
1962778,Matagorda County,,0.0,0.0,0.0,,0.0,0.0,0.0,,,
1962779,Pleasants County,,0.0,0.0,0.0,,0.0,0.0,0.0,,,


In [7]:
# Check for duplicates
duplicates = vaccine_df['County'].duplicated
print(duplicates)

<bound method Series.duplicated of 0           Washburn County
1             Taylor County
2             Nassau County
3           Lampasas County
4            Saginaw County
                 ...       
1962776        Adams County
1962777     Harrison County
1962778    Matagorda County
1962779    Pleasants County
1962780     Houghton County
Name: County, Length: 1962781, dtype: object>


In [8]:
# Remove duplicates
vaccine_df = vaccine_df.drop_duplicates(subset=['County'], keep='first')
vaccine_df

Unnamed: 0,County,Administered Dose 1 (5 Plus),Administered Dose 1 (12 Plus),Administered Dose 1 (18 Plus),Administered Dose 1 (65 Plus),Administered Series (5 Plus),Administered Series (12 Plus),Administered Series (18 Plus),Administered Series (65 Plus),Total Dose 1 Vaccines Administered,Total Series Vaccines Administered,Total Vaccines Administered
0,Washburn County,11097.0,10863.0,10368.0,4749.0,10311.0,10105.0,9663.0,4483.0,37077.0,34562.0,71639.0
1,Taylor County,3145.0,3079.0,2966.0,1249.0,2949.0,2887.0,2792.0,1211.0,10439.0,9839.0,20278.0
2,Nassau County,1384503.0,1329779.0,1232671.0,313054.0,1174924.0,1127179.0,1041792.0,268167.0,4260007.0,3612062.0,7872069.0
3,Lampasas County,11660.0,11468.0,10849.0,3920.0,9999.0,9845.0,9330.0,3448.0,37897.0,32622.0,70519.0
4,Saginaw County,,,,,103054.0,101354.0,95769.0,32377.0,,332554.0,
...,...,...,...,...,...,...,...,...,...,...,...,...
3276,Swift County,5893.0,5636.0,5277.0,1955.0,5537.0,5299.0,4964.0,1867.0,18761.0,17667.0,36428.0
3277,Stokes County,29136.0,28648.0,27464.0,10293.0,23030.0,22666.0,21725.0,8347.0,95541.0,75768.0,171309.0
3278,Ozark County,3760.0,3723.0,3635.0,1706.0,3055.0,3033.0,2970.0,1402.0,12824.0,10460.0,23284.0
3281,Churchill County,16164.0,15826.0,14980.0,4504.0,13979.0,13710.0,12974.0,3909.0,51474.0,44572.0,96046.0


In [9]:
# Drop NaN
vaccine_df.dropna()

Unnamed: 0,County,Administered Dose 1 (5 Plus),Administered Dose 1 (12 Plus),Administered Dose 1 (18 Plus),Administered Dose 1 (65 Plus),Administered Series (5 Plus),Administered Series (12 Plus),Administered Series (18 Plus),Administered Series (65 Plus),Total Dose 1 Vaccines Administered,Total Series Vaccines Administered,Total Vaccines Administered
0,Washburn County,11097.0,10863.0,10368.0,4749.0,10311.0,10105.0,9663.0,4483.0,37077.0,34562.0,71639.0
1,Taylor County,3145.0,3079.0,2966.0,1249.0,2949.0,2887.0,2792.0,1211.0,10439.0,9839.0,20278.0
2,Nassau County,1384503.0,1329779.0,1232671.0,313054.0,1174924.0,1127179.0,1041792.0,268167.0,4260007.0,3612062.0,7872069.0
3,Lampasas County,11660.0,11468.0,10849.0,3920.0,9999.0,9845.0,9330.0,3448.0,37897.0,32622.0,70519.0
5,Echols County,1572.0,1537.0,1458.0,366.0,1273.0,1245.0,1182.0,319.0,4933.0,4019.0,8952.0
...,...,...,...,...,...,...,...,...,...,...,...,...
3276,Swift County,5893.0,5636.0,5277.0,1955.0,5537.0,5299.0,4964.0,1867.0,18761.0,17667.0,36428.0
3277,Stokes County,29136.0,28648.0,27464.0,10293.0,23030.0,22666.0,21725.0,8347.0,95541.0,75768.0,171309.0
3278,Ozark County,3760.0,3723.0,3635.0,1706.0,3055.0,3033.0,2970.0,1402.0,12824.0,10460.0,23284.0
3281,Churchill County,16164.0,15826.0,14980.0,4504.0,13979.0,13710.0,12974.0,3909.0,51474.0,44572.0,96046.0


In [10]:
# API and response
county = vaccine_df['County']
url_county = f"https://maps.googleapis.com/maps/api/geocode/json?address={county}&key={google_api_key}"
response_county = requests.get(url_county).json()
print(json.dumps(response_county, indent=4, sort_keys=True))

{
    "results": [
        {
            "address_components": [
                {
                    "long_name": "Lampasas County",
                    "short_name": "Lampasas County",
                    "types": [
                        "administrative_area_level_2",
                        "political"
                    ]
                },
                {
                    "long_name": "Texas",
                    "short_name": "TX",
                    "types": [
                        "administrative_area_level_1",
                        "political"
                    ]
                },
                {
                    "long_name": "United States",
                    "short_name": "US",
                    "types": [
                        "country",
                        "political"
                    ]
                }
            ],
            "formatted_address": "Lampasas County, TX, USA",
            "geometry": {
                "bounds": {
      

In [11]:
# List to hold lat, lon and county data
county_list = []
lat = []
lon = []


# Find latitude and longitude for each county
countys = vaccine_df['County']
for county in countys:
    try:
        url_county = f"https://maps.googleapis.com/maps/api/geocode/json?address={county}&key={google_api_key}"
        response = requests.get(url_county).json()
        county_list.append(response['results'][0]['address_components'][0]['long_name'])
        lat.append(response['results'][0]['geometry']['location']['lat'])
        lon.append(response['results'][0]['geometry']['location']['lng'])
    except (KeyError, IndexError):
        pass
print(lat)
print(lon)
print(county_list)

[45.9657186, 32.2455088, 40.6546145, 31.2737682, 43.4044253, 30.7503289, 42.2073905, 42.9663458, 45.4995277, 45.4122013, 30.1805306, 35.80127969999999, 43.4369244, 32.7545792, 47.3416805, 44.6834072, 30.2297347, 36.0730279, 41.168563, 38.0684692, 38.86115789999999, 36.5023236, 41.8243831, 44.850452, 35.1268552, 48.11341179999999, 39.2587106, 45.1881488, 39.3176186, 36.2322429, 35.6078586, 42.2790746, 29.9943564, 18.2854476, 40.4465288, 48.8787167, 29.2299682, 39.5423418, 40.6519679, 35.4637028, 37.76078010000001, 40.0966287, 45.3607574, 39.0986811, 37.9335368, 40.9123374, 34.8580405, 42.54405029999999, 34.1996254, 18.0777392, 38.845412, 39.4241422, 37.4337342, 36.3860217, 34.008713, 37.394731, 36.2195399, 38.8413778, 40.4111363, 39.440463, 40.1111311, 38.9914998, 43.9466791, 44.366621, 33.9196567, 40.675971, 38.029655, 36.1449988, 44.45616740000001, 40.4531318, 26.069985, 40.2989226, 31.712238, 38.6762327, 43.2371798, 46.2061318, 48.4241961, 38.2310851, 34.6935359, 40.0010204, 34.88400

In [12]:
# Put data in dict
data = {'County': county_list, 
        'Lat': lat, 
       'Lon': lon} 
# Create DataFrame for mapping
county_vaccine_df = pd.DataFrame(data)

# Add total vaccines administered column
county_vaccine_df["Total Vaccines Administered"] = vaccine_df['Total Vaccines Administered']

# Show DataFrame
county_vaccine_df

Unnamed: 0,County,Lat,Lon,Total Vaccines Administered
0,Washburn County,45.965719,-91.814895,71639.0
1,Taylor County,32.245509,-99.812494,20278.0
2,Nassau County,40.654615,-73.559413,7872069.0
3,Lampasas County,31.273768,-98.221298,70519.0
4,Saginaw County,43.404425,-84.016742,
...,...,...,...,...
1951,Swift County,45.231565,-95.719604,
1952,Stokes County,36.424440,-80.232131,
1953,Ozark County,36.672006,-92.381362,
1954,Churchill County,39.662668,-118.486396,


In [13]:
# Drop NaN
county_vaccine_df.dropna()

Unnamed: 0,County,Lat,Lon,Total Vaccines Administered
0,Washburn County,45.965719,-91.814895,71639.0
1,Taylor County,32.245509,-99.812494,20278.0
2,Nassau County,40.654615,-73.559413,7872069.0
3,Lampasas County,31.273768,-98.221298,70519.0
5,Echols County,30.750329,-82.950156,8952.0
...,...,...,...,...
1943,Winn Parish,31.919612,-92.658640,17888601.0
1947,LaGrange County,41.628870,-85.396277,167286.0
1948,Hamblen County,36.205433,-83.293409,41779.0
1950,Dundy County,40.130612,-101.615777,66873.0


In [36]:
# Configure map
map_plot_1 = county_vaccine_df.hvplot.points(
    "Lon",
    "Lat",
    geo = True,
    tiles = "EsriImagery",
    frame_width = 600,
    frame_height = 400,
    size = "Total Vaccines Administrated",
    scale = 0.8, 
    color = "County"
)

# Display the map plot
map_plot_1



In [35]:
# Statistical summary for total 1 dose
mean_dose1 = vaccine_df['Total Dose 1 Vaccines Administered'].mean()
median_dose1 = vaccine_df['Total Dose 1 Vaccines Administered'].median()
variance_dose1 = vaccine_df['Total Dose 1 Vaccines Administered'].var()
std_dev_dose1 = vaccine_df['Total Dose 1 Vaccines Administered'].std()
sem_dose1 = vaccine_df['Total Dose 1 Vaccines Administered'].sem()

summary_dose1_df = pd.DataFrame({"Mean Total Dose 1 Administered": mean_dose1,
                           "Median Total Dose 1 Administered": median_dose1,
                                 "Total Dose 1 Administered Variance": variance_dose1,
                                 "Total Dose 1 Administered Std. Dev": std_dev_dose1,
                                 "Total Dose 1 Administered Std. Err.": sem_dose1}, index=[0])
summary_dose1_df

Unnamed: 0,Mean Total Dose 1 Administered,Median Total Dose 1 Administered,Total Dose 1 Administered Variance,Total Dose 1 Administered Std. Dev,Total Dose 1 Administered Std. Err.
0,266619.243346,51301.0,906666200000.0,952190.199239,22192.018858


In [34]:
# Statistical summary for series
mean_series = vaccine_df['Total Series Vaccines Administered'].mean()
median_series = vaccine_df['Total Series Vaccines Administered'].median()
variance_series = vaccine_df['Total Series Vaccines Administered'].var()
std_dev_series = vaccine_df['Total Series Vaccines Administered'].std()
sem_series = vaccine_df['Total Series Vaccines Administered'].sem()
summary_series_df = pd.DataFrame({"Mean Total Series Vaccines Administered": mean_series,
                                  "Median Total Series Vaccines Administered": median_series, 
                                 "Total Series Administrated Variance": variance_series,
                                 "Total Series Administered Std. Dev.": std_dev_series, 
                                 "Total Series Administered Std. Err.": sem_series}, index=[0])
summary_series_df

Unnamed: 0,Mean Total Series Vaccine Distribution,Median Total Series Vaccine Distribution,Total Series Administrated Variance,Total Series Administered Std. Dev.,Total Series Administered Std. Err.
0,224434.29702,44283.0,650755300000.0,806694.063922,18286.788552
