In [102]:
# import the necessary libraries
import pandas as pd
import plotly
import plotly.express as px
import json
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
import cufflinks
import urllib.request
from urllib.request import urlopen

In [103]:
# get daily CDC US Covid-19 vaccination data and save to csv
url_daily = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/us_state_vaccinations.csv'
filename_daily = 'dailyvaccinations.csv'
urllib.request.urlretrieve(url_daily, filename_daily)

('dailyvaccinations.csv', <http.client.HTTPMessage at 0x2434bb3a748>)

In [104]:
# create dataframes for cumulative and daily vaccinations using respective csv datasets
vacc = pd.read_csv('covidvaccinations.csv')
daily_vacc = pd.read_csv('dailyvaccinations.csv')

In [105]:
vacc.head()

Unnamed: 0,State/Territory/Federal Entity,Total Doses Delivered,Doses Delivered per 100K,18+ Doses Delivered per 100K,Total Doses Administered by State where Administered,Doses Administered per 100k by State where Administered,18+ Doses Administered by State where Administered,18+ Doses Administered per 100K by State where Administered,People with at least One Dose by State of Residence,Percent of Total Pop with at least One Dose by State of Residence,...,Percent of 65+ Pop with at least One Dose by State of Residence,People 65+ Fully Vaccinated by State of Residence,Percent of 65+ Pop Fully Vaccinated by State of Residence,People 65+ Fully Vaccinated_Moderna_Resident,People 65+ Fully Vaccinated_Pfizer_Resident,People 65+ Fully Vaccinated_Janssen_Resident,People 65+ Fully Vaccinated_Unknown 2-dose Manuf_Resident,65+ Doses Administered by State where Administered,Doses Administered per 100k of 65+ pop by State where Administered,Doses Delivered per 100k of 65+ pop
0,Alaska,769205,105148.0,139459.0,560662,76641.0,547959,99347.0,307711,42.1,...,78.0,66456,72.6,38153,26763,1530,10,134680,147050,839853
1,Alabama,4130550,84242.0,108275.0,2617405,53382.0,2603796,68254.0,1599130,32.6,...,72.9,501562,59.0,264621,213398,23504,39,1076330,126651,486040
2,Arkansas,2644650,87635.0,114109.0,1803795,59772.0,1785108,77022.0,1077072,35.7,...,73.3,307810,58.8,177109,121916,8710,75,683131,130398,504818
3,American Samoa,54030,97021.0,124387.0,38743,69570.0,37454,86226.0,21732,39.0,...,25.9,2163,22.8,867,1290,6,0,4548,48041,570719
4,Arizona,6652295,91394.0,117980.0,5123812,70394.0,5055669,89664.0,2981885,41.0,...,79.2,840638,64.2,411844,402881,25249,664,1927824,147316,508339


In [106]:
# check dimensions of dataset
vacc.shape

(63, 51)

In [107]:
# check list of unique locations
vacc['State/Territory/Federal Entity'].unique()

array(['Alaska', 'Alabama', 'Arkansas', 'American Samoa', 'Arizona',
       'Bureau of Prisons', 'California', 'Colorado', 'Connecticut',
       'District of Columbia', 'Dept of Defense', 'Delaware', 'Florida',
       'Federated States of Micronesia', 'Georgia', 'Guam', 'Hawaii',
       'Iowa', 'Idaho', 'Indian Health Svc', 'Illinois', 'Indiana',
       'Kansas', 'Kentucky', 'Louisiana', 'Massachusetts', 'Maryland',
       'Maine', 'Marshall Islands', 'Michigan', 'Minnesota', 'Missouri',
       'Northern Mariana Islands', 'Mississippi', 'Montana',
       'North Carolina', 'North Dakota', 'Nebraska', 'New Hampshire',
       'New Jersey', 'New Mexico', 'Nevada', 'New York State', 'Ohio',
       'Oklahoma', 'Oregon', 'Pennsylvania', 'Puerto Rico',
       'Rhode Island', 'Republic of Palau', 'South Carolina',
       'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Virginia',
       'Veterans Health', 'Virgin Islands', 'Vermont', 'Washington',
       'Wisconsin', 'West Virginia', 'Wyoming'], 

In [108]:
# check count of unique locations
len(vacc['State/Territory/Federal Entity'].unique())

63

In [109]:
# create list of nonstate names and remove those entries from the dataset using for loop
nonstates = ['American Samoa', 'Bureau of Prisons', 'Dept of Defense', 'Federated States of Micronesia', 'Guam', 'Indian Health Svc', 'Marshall Islands', 'Northern Mariana Islands', 'Republic of Palau', 'Veterans Health', 'Virgin Islands']
for place in nonstates:
    vacc = vacc[vacc['State/Territory/Federal Entity'] != place]

In [110]:
# check new count of unique locations which consists of 50 states as well as DC and PR
len(vacc['State/Territory/Federal Entity'].unique())

52

In [111]:
# create a scatter plot of vaccine doses delivered vs doses administered categorized by state and log adjusted
fig = px.scatter(vacc, x='Total Doses Delivered', y='Total Doses Administered by State where Administered', color='State/Territory/Federal Entity', 
                 height=700, hover_name='State/Territory/Federal Entity', log_x=True, log_y=True, 
                 title='COVID-19 Vaccine Doses Delivered vs Administered',
                 color_discrete_sequence=px.colors.qualitative.Vivid)
fig.update_traces(textposition='top center')
fig.show()

In [112]:
# create a scatter plot of vaccine doses per 100,000 delivered vs doses administered categorized by state and log adjusted
fig = px.scatter(vacc, x='Doses Delivered per 100K', y='Doses Administered per 100k by State where Administered', color='State/Territory/Federal Entity', 
                 height=700, hover_name='State/Territory/Federal Entity', log_x=True, log_y=True, 
                 title='COVID-19 Vaccine Doses Delivered vs Administered per 100,000',
                 color_discrete_sequence=px.colors.qualitative.Vivid)
fig.update_traces(textposition='top center')
# fig.update_layout(showlegend=False)
# fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

In [113]:
# create a bar plot of percentage of population fully vaccinated categorized by state and sorted
fig = px.bar(vacc.sort_values('Percent of Total Pop Fully Vaccinated by State of Residence'), 
                 x='Percent of Total Pop Fully Vaccinated by State of Residence', y="State/Territory/Federal Entity", color='State/Territory/Federal Entity',  
                 orientation='h', width=1000, height=900)
fig.update_layout(title='COVID-19 Percent of Population Fully Vaccinated by State', xaxis_title="", yaxis_title="", 
                    yaxis_categoryorder = 'total ascending',
                    uniformtext_minsize=6, uniformtext_mode='hide')
fig.show()

In [115]:
# create a bar plot of vaccine doses administered per 100,000 categorized by state and sorted
fig = px.bar(vacc.sort_values('Doses Administered per 100k by State where Administered'), 
                 x='Doses Administered per 100k by State where Administered', y="State/Territory/Federal Entity", color='State/Territory/Federal Entity',  
                 orientation='h', width=1000, height=900)
fig.update_layout(title='COVID-19 Vaccinations Administered per 100,000 by State', xaxis_title="", yaxis_title="", 
                    yaxis_categoryorder = 'total ascending',
                    uniformtext_minsize=6, uniformtext_mode='hide')
fig.show()

In [76]:
# create a dictionary of US state abbrevations mapped to their names
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York State': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

In [77]:
# create column for state abbreviations by mapping names to abbreviations from list
vacc['State Abbr'] = vacc['State/Territory/Federal Entity'].map(us_state_abbrev)

In [78]:
# create a choropleth map of the percentage of population fully vaccinated for all US states using plotly.express to map states by their abbreviations
fig = px.choropleth(vacc, locations='State Abbr', color='Percent of Total Pop Fully Vaccinated by State of Residence',color_continuous_scale='inferno',
 range_color=(20, 40),locationmode='USA-states', scope='usa',labels={'Percent of Total Pop Fully Vaccinated by State of Residence':'COVID-19 Percent of Population Fully Vaccinated'},hover_name='State/Territory/Federal Entity',
 hover_data={'Percent of Total Pop Fully Vaccinated by State of Residence':True})
fig.update_layout(margin={'r':0,'t':0,'l':0,'b':0})
fig.show()

In [79]:
# create a choropleth map of the vaccine doses administered per 100,000 for all US states using plotly.express to map states by their abbreviations
fig = px.choropleth(vacc, locations='State Abbr', color='Doses Administered per 100k by State where Administered',color_continuous_scale='inferno',
 range_color=(50000, 90000),locationmode='USA-states', scope='usa',labels={'Doses Administered per 100k by State where Administered':'COVID-19 Vaccine Doses Administered per 100,000'},hover_name='State/Territory/Federal Entity',
 hover_data={'Doses Administered per 100k by State where Administered':True})
fig.update_layout(margin={'r':0,'t':0,'l':0,'b':0})
fig.show()

In [80]:
# create a treemap of the total vaccinations administered by state organized by state
fig = px.treemap(vacc,
                 path=['State/Territory/Federal Entity'],
                 values='Total Doses Administered by State where Administered',
                 color='State/Territory/Federal Entity',
                 width=1000, height=700,
                 title='COVID-19 Vaccinations Administered by State',
                 )

fig.show()

In [81]:
# initiate offline mode for plotly and cufflinks to create interative plots
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

In [82]:
daily_vacc.head()

Unnamed: 0,date,location,total_vaccinations,total_distributed,people_vaccinated,people_fully_vaccinated_per_hundred,total_vaccinations_per_hundred,people_fully_vaccinated,people_vaccinated_per_hundred,distributed_per_hundred,daily_vaccinations_raw,daily_vaccinations,daily_vaccinations_per_million,share_doses_used
0,2021-01-12,Alabama,78134.0,377025.0,70861.0,0.15,1.59,7270.0,1.45,7.69,,,,0.207
1,2021-01-13,Alabama,84040.0,378975.0,74792.0,0.19,1.71,9245.0,1.53,7.73,5906.0,5906.0,1205.0,0.222
2,2021-01-14,Alabama,92300.0,435350.0,80480.0,,1.88,,1.64,8.88,8260.0,7083.0,1445.0,0.212
3,2021-01-15,Alabama,100567.0,444650.0,86956.0,0.28,2.05,13488.0,1.77,9.07,8267.0,7478.0,1525.0,0.226
4,2021-01-16,Alabama,,,,,,,,,7557.0,7498.0,1529.0,


In [99]:
# check dimensions of dataset
daily_vacc.shape

(7238, 14)

In [83]:
# check count of unique locations
len(daily_vacc['location'].unique())

65

In [84]:
# check list of unique locations
daily_vacc['location'].unique()

array(['Alabama', 'Alaska', 'American Samoa', 'Arizona', 'Arkansas',
       'Bureau of Prisons', 'California', 'Colorado', 'Connecticut',
       'Delaware', 'Dept of Defense', 'District of Columbia',
       'Federated States of Micronesia', 'Florida', 'Georgia', 'Guam',
       'Hawaii', 'Idaho', 'Illinois', 'Indian Health Svc', 'Indiana',
       'Iowa', 'Kansas', 'Kentucky', 'Long Term Care', 'Louisiana',
       'Maine', 'Marshall Islands', 'Maryland', 'Massachusetts',
       'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana',
       'Nebraska', 'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico',
       'New York State', 'North Carolina', 'North Dakota',
       'Northern Mariana Islands', 'Ohio', 'Oklahoma', 'Oregon',
       'Pennsylvania', 'Puerto Rico', 'Republic of Palau', 'Rhode Island',
       'South Carolina', 'South Dakota', 'Tennessee', 'Texas',
       'United States', 'Utah', 'Vermont', 'Veterans Health',
       'Virgin Islands', 'Virginia', 'Washington', 'West V

In [85]:
# create list of nonstate names and remove those entries from the dataset using for loop
nonstates = ['American Samoa', 'Bureau of Prisons', 'Dept of Defense', 'Federated States of Micronesia', 'Guam', 'Indian Health Svc', 'Long Term Care', 'Marshall Islands', 'Northern Mariana Islands', 'Republic of Palau', 'United States', 'Veterans Health', 'Virgin Islands']
for place in nonstates:
    daily_vacc = daily_vacc[daily_vacc['location'] != place]

In [86]:
# convert the values in the date column to datetime objects
daily_vacc['date'] = pd.to_datetime(daily_vacc['date'])

In [87]:
# create new dataframe grouped by date and state with raw and weekly averaged vaccination counts
daily_vacc = daily_vacc.groupby(['date', 'location'])['daily_vaccinations_raw', 'daily_vaccinations'].sum().reset_index()

In [88]:
# create new column of state abbreviations by mapping names to abbreviations from list
daily_vacc['State Abbr'] = daily_vacc['location'].map(us_state_abbrev)

In [89]:
# create a choropleth map of the daily number of COVID-19 vaccinations given for all US states by date using plotly.express to map states by their abbreviation
fig = px.choropleth(daily_vacc, locations='State Abbr', color='daily_vaccinations_raw',color_continuous_scale='inferno',
 range_color=(0, 100000),locationmode='USA-states', scope='usa',labels={'daily_vaccinations_raw':'Daily COVID-19 Vaccinations Administered'},hover_name='location',
 hover_data={'daily_vaccinations_raw':True}, animation_frame=daily_vacc['date'].dt.strftime('%Y-%m-%d'))
fig.update_layout(margin={'r':0,'t':0,'l':0,'b':0})
fig.show()

In [90]:
# create interactive bubble plot showing number of daily vaccinations given by state on each date 
temp = daily_vacc[daily_vacc['daily_vaccinations_raw']>0].sort_values('location', ascending=False)
fig = px.scatter(temp, x='date', y='location', size='daily_vaccinations_raw', color='daily_vaccinations_raw', 
                 width=1000, height=900, color_continuous_scale='inferno', title='Daily COVID-19 Vaccinations Administered by State')
fig.update_layout(yaxis = dict(dtick = 1))
fig.update(layout_coloraxis_showscale=False)
fig.show()

In [91]:
# create new dataframe of combined US daily values grouped by date with raw and weekly averaged vaccination counts
comb_vacc = daily_vacc.groupby('date')['daily_vaccinations_raw', 'daily_vaccinations'].sum().reset_index()

In [92]:
# create interactive line plot showing number of daily raw vaccinations and weekly averaged vaccinations by date
comb_vacc.iplot(kind='lines', x ='date', y =['daily_vaccinations_raw', 'daily_vaccinations'], title='United States Daily COVID-19 Vaccinations Administered Raw and Weekly Average')

In [93]:
# create interactive bar plot showing number of daily raw vaccinations by date
comb_vacc.iplot(kind='bar', x ='date', y ='daily_vaccinations_raw', title='United States Daily COVID-19 Vaccinations Administered')