In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mlt
import seaborn as sns

import json
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly import tools
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.express as px
import plotly.io as pio

init_notebook_mode(connected=True)
import warnings
warnings.filterwarnings("ignore")

In [None]:
data = pd.read_csv("../input/us-vaccination-progress/us_state_vaccinations.csv")
state_pop = pd.read_csv("../input/state-population-estimates/csvData.csv")
data.info()

### Description
* `location`: name of the state or federal entity.
* `date`: date of the observation.
* `total_vaccinations`: total number of doses administered. This is counted as a single dose, and may not equal the total number of people vaccinated, depending on the specific dose regime (e.g. people receive multiple doses). If a person receives one dose of the vaccine, this metric goes up by 1. If they receive a second dose, it goes up by 1 again.
* `total_vaccinations_per_hundred`: `total_vaccinations` per 100 people in the total population of the state.
* `daily_vaccinations_raw`: daily change in the total number of doses administered. It is only calculated for consecutive days. This is a raw measure provided for data checks and transparency, but we strongly recommend that any analysis on daily vaccination rates be conducted using `daily_vaccinations` instead.
* `daily_vaccinations`: new doses administered per day (7-day smoothed). For countries that don't report data on a daily basis, we assume that doses changed equally on a daily basis over any periods in which no data was reported. This produces a complete series of daily figures, which is then averaged over a rolling 7-day window. An example of how we perform this calculation can be found [here](https://github.com/owid/covid-19-data/issues/333#issuecomment-763015298).
* `daily_vaccinations_per_million`: `daily_vaccinations` per 1,000,000 people in the total population of the state.
* `people_vaccinated`: total number of people who received at least one vaccine dose. If a person receives the first dose of a 2-dose vaccine, this metric goes up by 1. If they receive the second dose, the metric stays the same.
* `people_vaccinated_per_hundred`: `people_vaccinated` per 100 people in the total population of the state.
* `people_fully_vaccinated`: total number of people who received all doses prescribed by the vaccination protocol. If a person receives the first dose of a 2-dose vaccine, this metric stays the same. If they receive the second dose, the metric goes up by 1.
* `people_fully_vaccinated_per_hundred`: `people_fully_vaccinated` per 100 people in the total population of the state.
* `total_distributed`: cumulative counts of COVID-19 vaccine doses recorded as shipped in CDC's Vaccine Tracking System.
* `total_distributed_per_hundred`: cumulative counts of COVID-19 vaccine doses recorded as shipped in CDC's Vaccine Tracking System per 100 people in the total population of the state.
* `share_doses_used`: share of vaccination doses administered among those recorded as shipped in CDC's Vaccine Tracking System.

<h1 style='background:#ffa500; border:0; color:black'><center>Overall Vaccine Progress</center></h1>  

<center><a href='https://www.census.gov/popclock/'>Approximately 330 million people live in the US</a></center> 

In [None]:
us = data.loc[data.location == 'United States'].copy(deep=True).reset_index()
us.date = pd.to_datetime(us.date)

plt.figure(figsize=(15,12))
sns.lineplot(data=us, x='date',y='people_vaccinated')
sns.lineplot(data=us, x='date',y='people_fully_vaccinated')

plt.annotate(xy=[us.date.iloc[-22],max(us.people_vaccinated.dropna())],weight='bold',
               text='People Vaccinated (One Dose): {:,d}'.format(int(max(us.people_vaccinated.dropna()))))
plt.annotate(xy=[us.date.iloc[-16],us.people_fully_vaccinated.dropna().iloc[-1]], weight  = 'bold',
               text='Fully Vaccinated: {:,d}'.format(int(max(us.people_fully_vaccinated.dropna()))))

plt.ylabel('Number of People',weight='bold')
plt.xlabel('Dates',weight='bold')
plt.title('People Vaccinated/Fully Vaccinated Since January 2021',weight='bold')
plt.ticklabel_format(axis='y', style='plain' )

plt.show()

In [None]:
print('The US has vaccinated approximately {}% of its population with at least 1 dose'.format(str(
    round((max(us.people_vaccinated.dropna())/330000000)*100,2)
)))
print('The US has fully vaccinated approximately {}% of its population'.format(str(
    round(
        (us.people_fully_vaccinated.dropna().iloc[-1])/330000000*100,2)
)))

<h1 style='background:#ffa500; border:0; color:black'><center>State Vaccination Progress</center></h1>  
<center>
    <ul>
        <li>Which states are leading in vaccines administered?</li>
        <li>Which states are leading in vaccines administered based on percent population?</li>
    </ul>
</center>

In [None]:
states = data.copy(deep=True)
# Non-US states filter
for i in ['American Samoa','Bureau of Prisons','Dept of Defense','Federated States of Micronesia','Guam',
                  'Indian Health Svc','Long Term Care','Marshall Islands','Northern Mariana Islands',
                  'Puerto Rico','Republic of Palau','Veterans Health','United States','Virgin Islands']:
    states = states[states.location != i]
state_list = states.location.unique()
latest = states.loc[states.date == states.date.iloc[-1]].reset_index(drop=True)

In [None]:
state_codes = pd.Series(["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
          "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
          "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
          "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
          "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"])
fig = go.Figure(data = go.Choropleth(
            locations = state_codes,
            z = latest['people_vaccinated'],
            locationmode = 'USA-states',
            colorscale = 'Blues',
            colorbar_title = "Total People Vaccinated"
))
fig.update_layout(
    title_text = 'Total Vaccinated with One Dose',
    geo_scope='usa',
)
fig.show()

In [None]:
top_10 = latest.sort_values(by='people_vaccinated',ascending=False).head(10)[['location','people_vaccinated']]
top_10.columns = ['State','People Vaccinated']
top_10['People Vaccinated'] = top_10['People Vaccinated'].apply(lambda x: '{:,d}'.format(int(x)))
top_10.set_index(keys='State',drop=True)

In [None]:
pop = state_pop.Pop.drop(39).reset_index(drop=True)
percentage = round(latest.people_vaccinated/pop*100,2)

fig = go.Figure(data = go.Choropleth(
            locations = state_codes,
            z = percentage,
            zmax = 100,
            zmin = 0,
            locationmode = 'USA-states',
            colorscale = 'mygbm',
            colorbar_title = "% Population"
))
fig.update_layout(
    title_text = 'Percentage of People Vaccinated with One Dose by State',
    geo_scope='usa',
)
fig.show()

In [None]:
top_10_fully_vaccinated = pd.DataFrame([latest.location,pop,percentage],index = 
                                       ['State','Population','Percent Vaccinated']).T.set_index(keys='State',drop=True)

top_10_fully_vaccinated.Population = top_10_fully_vaccinated.Population.apply(lambda x: '{:,d}'.format(int(x)))

top_10_fully_vaccinated.sort_values(by='Percent Vaccinated', ascending=False).head(10)