In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import datetime as dt

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In this notebook we're going to find out
* top 10 states that distributed the most vaccines total
* top 10 states that distributed the most vaccines per capita

## Exploring the Data

In [None]:
us_vaccinations = pd.read_csv('../input/usa-covid19-vaccinations/us_state_vaccinations.csv')
first_five = us_vaccinations.head()
first_five

In [None]:
us_vaccinations.info()

The `date` column is of `object` type, let's convert it to `datetime` datatype.

In [None]:
us_vaccinations['date'] = pd.to_datetime(us_vaccinations['date'])

Let's find out the missing values in the dataset.

In [None]:
us_vaccinations.isnull().sum()

In [None]:
# fill missing values with 0
us_vaccinations.fillna(0, inplace=True)

Let's find the unique states listed in the dataset.

In [None]:
us_vaccinations['location'].unique()

Here we can see a few non-state entries, let's drop them off.

In [None]:
# locations that are not states
drop_locs = ['Bureau of Prisons', 'Dept of Defense', 'Indian Health Svc',
             'Long Term Care', 'United States', 'Veterans Health']
us_vaccinations.drop(us_vaccinations[us_vaccinations['location'].isin(drop_locs)].index,
                     inplace=True)
us_vaccinations.shape

Our task is to find out the vaccinations done for the past seven days. So, let's find out the dates.

In [None]:
# find out the past 7 days
day_seven = us_vaccinations['date'].max()
day_one = day_seven - dt.timedelta(days=6)
print('day_one:', day_one, ', day_seven:', day_seven)

### Top 10 states that distributed the most vaccines total

In [None]:
# state-wise vaccine distribution
total_distributions_sorted = us_vaccinations.groupby('location')['total_distributed'].agg(
    np.sum).sort_values(ascending=False)
top_10_total_distributors = total_distributions_sorted.head(10)
top_10_total_distributors

In [None]:
bottom_10_distributors = total_distributions_sorted.tail(10)
bottom_10_distributors

### Top 10 states that distributed the most vaccines per capita

In [None]:
vac_per_capita = us_vaccinations.groupby('location')['distributed_per_hundred'].agg(np.sum)
vac_per_capita_sorted = vac_per_capita.sort_values(ascending=False)
top_10_per_capita_distributors = vac_per_capita_sorted.head(10)
top_10_per_capita_distributors

Hence, *California* is the state that has distributed most vaccines in total in the last week.

*Northern Mariana Islands* is the state that has distributed most vaccines per capita in the last week.
