In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#import other packages
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns

#import the plotly packages for geospatial data visualization
import plotly.offline as py
from plotly import tools
import plotly.graph_objs as go
import plotly.express as px

In [None]:
path = '/kaggle/input/usa-covid19-vaccinations/us_state_vaccinations.csv'

df = pd.read_csv(path)

In [None]:
df.head(5)

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.location.unique()

The location does not only contain state names, for example United States is included in location.

In [None]:
#data type Date
from datetime import datetime

df['date']=pd.to_datetime(df['date'])

#determine the date range
print("start date: ",df.date.min())
print("latest date: ",df.date.max())

#number of vaccination days
dates_range_length = len(pd.date_range(df.date.min(), df.date.max()))
print("The date range equals ",dates_range_length," days.")

The first day of vaccination was December 20th, 2020. The latest date is yesterday, April 13th, 2021. There are 115 days of vaccination included in the date range.

In [None]:
#add month to data frame
df['month'] = df.date.dt.month
df.month.unique()

In [None]:
#Date by Week Number
df['week'] = df.date.dt.week


In [None]:
state_code = {'Alabama': 'AL',
        'Alaska': 'AK',
        'Arizona': 'AZ',
        'Arkansas': 'AR',
        'California': 'CA',
        'Colorado': 'CO',
        'Connecticut': 'CT',
        'Delaware': 'DE',
        'District of Columbia': 'DC',
        'Florida': 'FL',
        'Georgia': 'GA',
        'Hawaii': 'HI',
        'Idaho': 'ID',
        'Illinois': 'IL',
        'Indiana': 'IN',
        'Iowa': 'IA',
        'Kansas': 'KS',
        'Kentucky': 'KY',
        'Louisiana': 'LA',
        'Maine': 'ME',
        'Maryland': 'MD',
        'Massachusetts': 'MA',
        'Michigan': 'MI',
        'Minnesota': 'MN',
        'Mississippi': 'MS',
        'Missouri': 'MO',
        'Montana': 'MT',
        'Nebraska': 'NE',
        'Nevada': 'NV',
        'New Hampshire': 'NH',
        'New Jersey': 'NJ',
        'New Mexico': 'NM',
        'New York State': 'NY',
        'North Carolina': 'NC',
        'North Dakota': 'ND',
        'Ohio': 'OH',
        'Oklahoma': 'OK',
        'Oregon': 'OR',
        'Pennsylvania': 'PA',
        'Rhode Island': 'RI',
        'South Carolina': 'SC',
        'South Dakota': 'SD',
        'Tennessee': 'TN',
        'Texas': 'TX',
        'Utah': 'UT',
        'Vermont': 'VT',
        'Virginia': 'VA',
        'Washington': 'WA',
        'West Virginia': 'WV',
        'Wisconsin': 'WI',
        'Wyoming': 'WY'}
df['State Code'] = df['location'].map(state_code)

In [None]:
df.head(3)

In [None]:
df['State Code'].isnull().sum()

In [None]:
df['year'] = df.date.dt.year
df_vaccinations_2020 = df[(df.year == 2020)]
df_vaccinations_2020.location.unique()

In 2020, the location included was United States.

In [None]:
#Month
df_month = df.groupby(['State Code','month']).daily_vaccinations.sum().reset_index()
df_month = df_month.sort_values(by=['month','State Code'])
df_month['month'] = df_month.month.replace(1,"January-2021")
df_month['month'] = df_month.month.replace(2,"February-2021")
df_month['month'] = df_month.month.replace(3,"March-2021")
df_month['month'] = df_month.month.replace(4,"April-2021")
df_month.head()

In [None]:
#Week
df_week = df.groupby(['State Code','week']).daily_vaccinations.sum().reset_index()
df_week = df_week.sort_values(by=['week','State Code'])
df_week.head()

In [None]:
#Daily corona vaccinations on monthly basis
fig1 = px.choropleth(df_month,locations=df_month['State Code'],color=df_month['daily_vaccinations'],
                     color_continuous_scale='YlOrRd',locationmode='USA-states',scope='usa',animation_frame=df_month['month'],
                     title="Vaccinations in the USA on monthly basis",labels={'daily_vaccinations':'Number of Vaccinations'})

fig1.show()

In [None]:
#Daily corona vaccinations on weekly basis
fig2 = px.choropleth(df_week,locations=df_week['State Code'],color=df_week['daily_vaccinations'],
                     color_continuous_scale='YlOrRd',locationmode='USA-states',scope='usa',animation_frame=df_week['week'],
                     title="Vaccinations in the USA on weekly basis",labels={'daily_vaccinations':'Number of Vaccinations'})

fig2.show()

Now, let's focus on the people that got a vaccin.

In [None]:
df_vaccinated = df[(df.date == '2021-04-13')]
df_vaccinated = df_vaccinated[['people_vaccinated','people_fully_vaccinated','State Code']]
df_vaccinated = df_vaccinated.dropna(axis=0)

In [None]:
df_vaccinated.tail()

In [None]:
df_vaccinated['State Code'].isnull().sum()


In [None]:
#choropleth map people vaccinated 
fig3 = px.choropleth(df_vaccinated,locations=df_vaccinated['State Code'],color=df_vaccinated['people_vaccinated'],
                     color_continuous_scale='YlOrRd',locationmode='USA-states',scope='usa',
                     title="Total People Vaccinated by US State",labels={'people_vaccinated':'Total Number of Vaccinations'})

fig3.show()

In [None]:
#choropleth map people fully vaccinated
fig4 = px.choropleth(df_vaccinated,locations=df_vaccinated['State Code'],color=df_vaccinated['people_fully_vaccinated'],
                     color_continuous_scale='YlOrRd',locationmode='USA-states',scope='usa',
                     title="Total People Fully Vaccinated by US State",labels={'people_fully_vaccinated':'Number of People Fully Vaccinated'})

fig4.show()

**Vaccinations in California in Week 14**

In [None]:
#vaccinations in California State in the previous week
df_california = df[(df['State Code'] == 'CA')&(df['week']==14)]
df_california = df_california[['date','location','week','State Code','daily_vaccinations']]
df_california

In [None]:
#Average daily vaccinations in California
average_CA = df[df['location']=="California"]['daily_vaccinations'].mean()
print("The average daily number of vaccinations in California equals ",average_CA)

In [None]:
y = [average_CA]
daily_vaccination = np.array(df_california.iloc[0:7,4])
daily_vaccination = daily_vaccination.tolist()

#add vaccinations in week 14 to list
y.extend(daily_vaccination)
print(y)

In [None]:
#Vaccinations in California in Week 14
x = ['Average','Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
clrs = ['red' if (x != 'Average') else 'green' for x in x ]

fig, ax = plt.subplots(1, 1, figsize=(8,6))
ax = sns.barplot(x,y, palette=clrs)

plt.xlabel('Day in Week 14') # add x-label to the plot
plt.ylabel('Number of Vaccins') # add y-label to the plot
plt.title('Daily Vaccinations in California in Week 14', fontsize = 20)


plt.show()

**This is the end of this analysis.**