Use the covid_vaccine_statewise.csv dataset and perform the
following analytics.
#
A. Describe the dataset.
B. Number of Males vaccinated
C.. Number of females vaccinated

In [1]:
import pandas as pd

In [4]:
# Load the dataset
# Ensure the file 'Covid Vaccine Statewise.csv' is in your current directory
df = pd.read_csv('Covid_Vaccine_Statewise.csv')

# Display the first few rows to verify loading
print("Data loaded successfully.")
print(df.head())

Data loaded successfully.
   Updated On  State  Total Doses Administered  Sessions   Sites   \
0  16/01/2021  India                   48276.0    3455.0   2957.0   
1  17/01/2021  India                   58604.0    8532.0   4954.0   
2  18/01/2021  India                   99449.0   13611.0   6583.0   
3  19/01/2021  India                  195525.0   17855.0   7951.0   
4  20/01/2021  India                  251280.0   25472.0  10504.0   

   First Dose Administered  Second Dose Administered  \
0                  48276.0                       0.0   
1                  58604.0                       0.0   
2                  99449.0                       0.0   
3                 195525.0                       0.0   
4                 251280.0                       0.0   

   Male (Doses Administered)  Female (Doses Administered)  \
0                        NaN                          NaN   
1                        NaN                          NaN   
2                        NaN           

In [5]:
# A. Describe the dataset
print("--- Dataset Information ---")
print(df.info())

print("\n--- Statistical Description ---")
print(df.describe())

--- Dataset Information ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7845 entries, 0 to 7844
Data columns (total 24 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   Updated On                           7845 non-null   object 
 1   State                                7845 non-null   object 
 2   Total Doses Administered             7621 non-null   float64
 3   Sessions                             7621 non-null   float64
 4    Sites                               7621 non-null   float64
 5   First Dose Administered              7621 non-null   float64
 6   Second Dose Administered             7621 non-null   float64
 7   Male (Doses Administered)            7461 non-null   float64
 8   Female (Doses Administered)          7461 non-null   float64
 9   Transgender (Doses Administered)     7461 non-null   float64
 10   Covaxin (Doses Administered)        7621 non-null   float64
 11  Co

In [6]:
# B. Number of Males vaccinated
# Since the data is cumulative, we calculate the maximum value for each state 
# to get the total count for that state.

# 'Male (Doses Administered)' tracks the total doses given to males.
male_vaccinated_statewise = df.groupby('State')['Male (Doses Administered)'].max().reset_index()

# Rename the column for clarity
male_vaccinated_statewise.columns = ['State', 'Total Males Vaccinated']

print("--- Number of Males Vaccinated (State-wise) ---")
print(male_vaccinated_statewise)

# To find the total for India, we can filter for the 'India' row if it exists,
# or sum the states (excluding 'India' to avoid double counting).
# Here we check the 'India' row which usually aggregates the data.
india_total_males = df[df['State'] == 'India']['Male (Doses Administered)'].max()
print(f"\nTotal Males Vaccinated in India: {india_total_males}")

--- Number of Males Vaccinated (State-wise) ---
                                       State  Total Males Vaccinated
0                Andaman and Nicobar Islands                165554.0
1                             Andhra Pradesh              10852932.0
2                          Arunachal Pradesh                486874.0
3                                      Assam               6739027.0
4                                      Bihar              14926420.0
5                                 Chandigarh                524263.0
6                               Chhattisgarh               5916437.0
7   Dadra and Nagar Haveli and Daman and Diu                444656.0
8                                      Delhi               6228216.0
9                                        Goa                748770.0
10                                   Gujarat              20266401.0
11                                   Haryana               7206601.0
12                          Himachal Pradesh           

In [7]:
# C. Number of Females vaccinated
# Similarly, we find the maximum cumulative value for females per state.

female_vaccinated_statewise = df.groupby('State')['Female (Doses Administered)'].max().reset_index()

# Rename the column for clarity
female_vaccinated_statewise.columns = ['State', 'Total Females Vaccinated']

print("--- Number of Females Vaccinated (State-wise) ---")
print(female_vaccinated_statewise)

# Total for India (from the 'India' row)
india_total_females = df[df['State'] == 'India']['Female (Doses Administered)'].max()
print(f"\nTotal Females Vaccinated in India: {india_total_females}")

--- Number of Females Vaccinated (State-wise) ---
                                       State  Total Females Vaccinated
0                Andaman and Nicobar Islands                  145049.0
1                             Andhra Pradesh                12986129.0
2                          Arunachal Pradesh                  391993.0
3                                      Assam                 5962985.0
4                                      Bihar                12902990.0
5                                 Chandigarh                  399424.0
6                               Chhattisgarh                 5851349.0
7   Dadra and Nagar Haveli and Daman and Diu                  220429.0
8                                      Delhi                 4605508.0
9                                        Goa                  648014.0
10                                   Gujarat                16880326.0
11                                   Haryana                 5801370.0
12                         