### Q7. (10 marks) For each state, district and overall, find the following ratios: total number of Covishield vaccinated persons (either 1 or 2 doses) to total number of Covaxin vaccinated persons (same). Output them in the following manner: districtid, vaccineratio. Call this output file vaccine-type-ratio.csv and the script/program to generate this vaccine-type-ratio-generator.sh. Sort the output by the ratio.

# 1. Import necessary libraries

In [1]:
import json
import numpy as np
import pandas as pd
import datetime
from dateutil import relativedelta  # used for handling dates and doing relative arithmetic

# 2. Load the cleaned vaccination data (done in Q1)

In [2]:
# Load the cowin_vaccine_data_districtwise.csv
cowin_vaccine_data_districtwise = pd.read_csv('./dataset/cowin_vaccine_data_districtwise_clean.csv', dtype='string')
cowin_vaccine_data_districtwise.head()

# convert number values columns to numeric
cowin_vaccine_data_districtwise.iloc[:, 4:] = cowin_vaccine_data_districtwise.iloc[:, 4:].apply(
                                                                    pd.to_numeric, errors='ignore')
print('The datatypes of columns containing numeric values has been changed from string to numeric')

The datatypes of columns containing numeric values has been changed from string to numeric


# 3. Find the unique district names in the vaccine data

In [3]:
# Find the unique district names in the vaccine data
district_names_from_vaccine_data = cowin_vaccine_data_districtwise['District'].dropna().unique()
district_names_from_vaccine_data = [district_name.lower() for district_name in district_names_from_vaccine_data]
print('Number of unique districts in vaccine data =', len(district_names_from_vaccine_data))

Number of unique districts in vaccine data = 714


# 4. Prepare district-vaccine-type-ratio.csv
The vaccination data starts from 16 January 2021

In [4]:
# Prepare a file for storing vaccine type ratio for each district
district_vaccine_type_ratio = pd.DataFrame(columns=['districtid', 'vaccineratio'])

for district in list(district_names_from_vaccine_data):
    
    # find the vaccination data for this district
    district_data = cowin_vaccine_data_districtwise[cowin_vaccine_data_districtwise['District'].str.lower() == district]
    
    # find the district_key for this district
    district_key = district_data['District_Key'].values[0]

    # define start_date and end_date
    # the vaccination data starts from 16 January 2021
    start_date = datetime.datetime.strptime('16/01/2021', '%d/%m/%Y')
    end_date = datetime.datetime.strptime('14/08/2021', '%d/%m/%Y')
    
    # change the date format to match the format in dataframe
    start_date = start_date.strftime('%d/%m/%Y')
    end_date = end_date.strftime('%d/%m/%Y')

    # calculate total covaxin and covishield doses administered
    # the value on the last day gives total since the data is cumulative
    covaxin_doses = district_data[end_date + '-' + 'Covaxin (Doses Administered)'].values[0]
    covishield_doses = district_data[end_date + '-' + 'CoviShield (Doses Administered)'].values[0]

    # put 'NaN' where division by zero occurs
    if(covaxin_doses == 0):
        vaccine_type_ratio = float('NaN')
    else:
        vaccine_type_ratio = covishield_doses/covaxin_doses

    # append data to our file
    district_vaccine_type_ratio.loc[-1] = [district_key, vaccine_type_ratio]
    district_vaccine_type_ratio.index += 1

# dump data to csv files
district_vaccine_type_ratio = district_vaccine_type_ratio.sort_values('vaccineratio')
district_vaccine_type_ratio.to_csv('./output/district-vaccine-type-ratio.csv', index=False)
district_vaccine_type_ratio.head()

Unnamed: 0,districtid,vaccineratio
261,OR_Khordha,0.231339
671,AS_Barpeta,0.817545
614,BR_Patna,0.930361
355,MH_Bhandara,1.182007
644,AS_Sonitpur,1.393974


# 5. Find the unique states in vaccine data

In [5]:
# Find the unique state names in the vaccine data
state_names_from_vaccine_data = cowin_vaccine_data_districtwise['State'].dropna().unique()
state_names_from_vaccine_data = [state_name.lower() for state_name in state_names_from_vaccine_data]
print('Number of unique state in vaccine data =', len(state_names_from_vaccine_data))

Number of unique state in vaccine data = 36


# 6. Prepare state-vaccine-type-ratio.csv
The vaccination data starts from 16 January 2021

In [6]:
# Prepare a file for storing vaccine type ratio for each state
state_vaccine_type_ratio = pd.DataFrame(columns=['stateid', 'vaccineratio'])

for state in list(state_names_from_vaccine_data):
    
    # find the vaccination data for this state
    state_data = cowin_vaccine_data_districtwise[cowin_vaccine_data_districtwise['State'].str.lower() == state]
    
    # find the state_code for this state
    state_code = state_data['State_Code'].values[0]
    
    # define start_date and end_date based on our time period of analysis
    start_date = datetime.datetime.strptime('10/01/2021', '%d/%m/%Y')
    end_date = datetime.datetime.strptime('14/08/2021', '%d/%m/%Y')
    
    # change the date format to match the format in dataframe
    start_date = start_date.strftime('%d/%m/%Y')
    end_date = end_date.strftime('%d/%m/%Y')

    # calculate total covaxin and covishield doses administered
    # the value on the last day gives total since the data is cumulative
    covaxin_doses = sum(state_data[end_date + '-' + 'Covaxin (Doses Administered)'])
    covishield_doses = sum(state_data[end_date + '-' + 'CoviShield (Doses Administered)'])

    # put 'NaN' where division by zero occurs
    if(covaxin_doses == 0):
        vaccine_type_ratio = float('NaN')
    else:
        vaccine_type_ratio = covishield_doses/covaxin_doses

    # append data to our file
    state_vaccine_type_ratio.loc[-1] = [state_code, vaccine_type_ratio]
    state_vaccine_type_ratio.index += 1

# dump data to csv files
state_vaccine_type_ratio = state_vaccine_type_ratio.sort_values('vaccineratio')
state_vaccine_type_ratio.to_csv('./output/state-vaccine-type-ratio.csv', index=False)
state_vaccine_type_ratio.head()

Unnamed: 0,stateid,vaccineratio
0,DL,3.222403
5,TG,3.93607
22,JH,5.095742
34,AP,5.129828
6,TN,5.532841


# 7. Prepare overall-vaccine-type-ratio.csv
The vaccination data starts from 16 January 2021

In [7]:
# Prepare a file for storing vaccine type ratio for India (overall)
overall_vaccine_type_ratio = pd.DataFrame(columns=['overallid', 'vaccineratio'])

# define start_date and end_date based on our time period of analysis
start_date = datetime.datetime.strptime('10/01/2021', '%d/%m/%Y')
end_date = datetime.datetime.strptime('14/08/2021', '%d/%m/%Y')

# change the date format to match the format in dataframe
start_date = start_date.strftime('%d/%m/%Y')
end_date = end_date.strftime('%d/%m/%Y')

# calculate total covaxin and covishield doses administered
# the value on the last day gives total since the data is cumulative
covaxin_doses = sum(cowin_vaccine_data_districtwise[end_date + '-' + 'Covaxin (Doses Administered)'])
covishield_doses = sum(cowin_vaccine_data_districtwise[end_date + '-' + 'CoviShield (Doses Administered)'])

# put 'NaN' where division by zero occurs
if(covaxin_doses == 0):
    vaccine_type_ratio = float('NaN')
else:
    vaccine_type_ratio = covishield_doses/covaxin_doses

# append data to our file
overall_vaccine_type_ratio.loc[-1] = ['India', vaccine_type_ratio]
overall_vaccine_type_ratio.index += 1

# dump data to csv files
overall_vaccine_type_ratio.to_csv('./output/overall-vaccine-type-ratio.csv', index=False)
overall_vaccine_type_ratio.head()

Unnamed: 0,overallid,vaccineratio
0,India,7.009187


--------------------------------------------------------------------------------- END of Q7 ---------------------------------------------------------------------------------------------