### Q9. (10 marks) For every state, find the date on which the entire population will get at least one does of vaccination. Assume the same rate of vaccination as in the week ending on 14th Aug, 2021. (Do not treat children separately, and assume the same rate of vaccination.) Output them in the following manner: stateid, populationlef t, rateofvaccination, date. Call this output file complete-vaccination.csv and the script/program to generate this complete-vaccination-generator.sh.

# 1. Importing the necessary libraries

In [1]:
import json
import numpy as np
import pandas as pd
import datetime
from dateutil import relativedelta  # used for handling dates and doing relative arithmetic

# 2. Load the cleaned vaccination data (done in Q1)

In [2]:
# Load the cowin_vaccine_data_districtwise.csv
cowin_vaccine_data_districtwise = pd.read_csv('./dataset/cowin_vaccine_data_districtwise_clean.csv', dtype='string')
cowin_vaccine_data_districtwise.head()

# convert number values columns to numeric
cowin_vaccine_data_districtwise.iloc[:, 4:] = cowin_vaccine_data_districtwise.iloc[:, 4:].apply(
                                                                    pd.to_numeric, errors='ignore')
print('The datatypes of columns containing numeric values has been changed from string to numeric')

The datatypes of columns containing numeric values has been changed from string to numeric


# 3. Load the cleaned census data (done in Q1)

In [3]:
# Load the census_data_clean.csv
census_data = pd.read_csv('./dataset/census_data_clean.csv', dtype='string')
print(census_data.head())

# Convert the columns containing numbers to numeric datatype
census_data.iloc[:, 3:] = census_data.iloc[:, 3:].apply(pd.to_numeric, errors='ignore')
print('The datatypes of columns containing numeric values has been changed from string to numeric')

  State     Level               Name       TOT_P      TOT_M      TOT_F
0    00     India              India  1210854977  623270258  587584719
1    01     STATE  Jammu and Kashmir    12541302    6640662    5900640
2    01  DISTRICT            kupwara      870354     474190     396164
3    01  DISTRICT             budgam      753745     398041     355704
4    01  DISTRICT                leh      133487      78971      54516
The datatypes of columns containing numeric values has been changed from string to numeric


# 4. Find the common states between census data and vaccine data

In [4]:
# Find the unique state names in the vaccine data
state_names_from_vaccine_data = cowin_vaccine_data_districtwise['State'].dropna().unique()
state_names_from_vaccine_data = [state_name.lower() for state_name in state_names_from_vaccine_data]
print('Number of unique state in vaccine data =', len(state_names_from_vaccine_data))

# Find the unique state names in the census
state_names_from_census_data = census_data[census_data['Level'] == 'STATE']['Name'].dropna().unique()
state_names_from_census_data = [state_name.lower() for state_name in state_names_from_census_data]
print('Number of unique state in census data =', len(state_names_from_census_data))

# Find the common states between the vaccine and census data
common_states_vaccine_and_census = set(state_names_from_census_data).intersection(state_names_from_vaccine_data)
print('There are', len(common_states_vaccine_and_census), 'states common between the vaccine data and census data')

Number of unique state in vaccine data = 36
Number of unique state in census data = 35
There are 35 states common between the vaccine data and census data


# 5. Prepare complete-vaccination.csv

In [7]:
# Prepare a file for storing data for each state
complete_vaccination = pd.DataFrame(columns=['stateid', 'populationleft', 'rateofvaccination', 'date'])

# define start_date and end_date
# the vaccination data starts from 16 January 2021
start_date = datetime.datetime.strptime('16/01/2021', '%d/%m/%Y')
end_date = datetime.datetime.strptime('14/08/2021', '%d/%m/%Y')

# define dates for last week
last_week_start_date = datetime.datetime.strptime('8/08/2021', '%d/%m/%Y')
last_week_end_date = datetime.datetime.strptime('14/08/2021', '%d/%m/%Y')
last_week_day_before_start_date = last_week_start_date - datetime.timedelta(days=1)

# change the date format to match the format in dataframe
start_date = start_date.strftime('%d/%m/%Y')
end_date = end_date.strftime('%d/%m/%Y')
last_week_start_date = last_week_start_date.strftime('%d/%m/%Y')
last_week_end_date = last_week_end_date.strftime('%d/%m/%Y')
last_week_day_before_start_date = last_week_day_before_start_date.strftime('%d/%m/%Y')

for state in list(common_states_vaccine_and_census):                                                 
    
    # find the vaccination data for this state
    state_data = cowin_vaccine_data_districtwise[cowin_vaccine_data_districtwise['State'].str.lower() == state]
    
    # find population data for this state
    population_data = census_data[(census_data['Level'] == 'STATE') & (census_data['Name'].str.lower() == state)]
                                                     
    # find the state_code for this district
    state_code = state_data.iloc[0]['State_Code']
    
    # calculate the number of first dose administered in the last week
    dose_administered_last_week = sum(state_data[last_week_end_date + '-' + 'First Dose Administered']) - \
                        sum(state_data[last_week_day_before_start_date + '-' + 'First Dose Administered'])
    
    # calculate the rate of vaccination per day in the last week
    rate = dose_administered_last_week / 7
    
    # calculate the population of this state
    total_population = population_data['TOT_P'].values[0]
    
    # calculate the population left to be vaccinated
    # population left is total population - number of people already administered first dose
    population_left = total_population - sum(state_data[end_date + '-' + 'First Dose Administered'])
    
    # days required to vaccinate the remaining population with the first dose
    days_required = int(population_left / rate)
    
    # date on which first dose is given to all the population
    complete_vaccination_date = datetime.datetime.strptime(end_date, '%d/%m/%Y') + datetime.timedelta(days=days_required)

    # append data to our dataframe
    complete_vaccination.loc[-1] = [state_code, population_left, rate, complete_vaccination_date]
    complete_vaccination.index += 1

# dump data to csv file
complete_vaccination = complete_vaccination.sort_values('stateid')
complete_vaccination.to_csv('./output/complete-vaccination.csv', index=False)
complete_vaccination.head()

Unnamed: 0,stateid,populationleft,rateofvaccination,date
34,AN,147872,2526.857143,2021-10-11
20,AP,30870658,148431.142857,2022-03-09
24,AR,682750,1850.571429,2022-08-17
17,AS,19775523,177262.285714,2021-12-03
4,BR,78932074,317408.428571,2022-04-19


--------------------------------------------------------------------------------- END of Q9 ---------------------------------------------------------------------------------------------