# **Coronavirus Pandemic in India Analysis**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### **Data Cleaning**

In [None]:
dfc = pd.read_csv('Data/COVID19_India.csv')

In [None]:
dfv = pd.read_csv('Data/COVID_Vaccine_Statewise.csv')

In [None]:
dfc.info() # info() displays the information about the DataFrame

In [None]:
dfc.describe() # describe() returns the description of the data in the DataFrame

In [None]:
dfc.drop(['Sno', 'Time', 'ConfirmedIndianNational', 'ConfirmedForeignNational'], axis = 1, inplace = True) # drop() is used to drop the unused rows/columns

In [None]:
dfc['Date'] = pd.to_datetime(dfc['Date'], format = '%Y-%m-%d') # to_datetime() is used to convert argument to datetime

In [None]:
dfv.rename(columns = {"Updated On": "Vaccine Date"}, inplace = True) # rename() is used to rename a row/column

In [None]:
dfv.info() # info() displays the information about the DataFrame

In [None]:
dfv.isnull().sum() # isnull() checks for the null values in the DataFrame

In [None]:
vaccination = dfv.drop(columns = ['Sputnik V (Doses Administered)', 'AEFI', '18-44 Years (Doses Administered)', '45-60 Years (Doses Administered)', '60+ Years (Doses Administered)'], axis = 1)

In [None]:
vaccine = dfv[dfv.State != 'India'] # removing the datas having state column as India

In [None]:
vaccine.rename(columns = {"Total Individuals Vaccinated": "Total"}, inplace = True)

In [None]:
dfc['Active'] = dfc['Confirmed'] - (dfc['Cured'] + dfc['Deaths'])

In [None]:
statewise = pd.pivot_table(dfc, values = ['Confirmed', 'Deaths', 'Cured'], index = 'State/UnionTerritory', aggfunc = max)

In [None]:
statewise['Recovery Rate'] = statewise['Cured'] * 100 / statewise['Confirmed']

In [None]:
statewise['Mortality Rate'] = statewise['Deaths'] * 100 / statewise['Confirmed']

In [None]:
statewise = statewise.sort_values(by = 'Confirmed', ascending = False)

In [None]:
statewise.style.background_gradient(cmap = 'gray')

### **Exploratory Data Analysis**

#### **Top 15 States (COVID-19 Active Cases)**

In [None]:
top_active = dfc.groupby(by = 'State/UnionTerritory').max()[['Active', 'Date']].sort_values(by = ['Active'], ascending = False).reset_index()

plt.figure(figsize = (27, 7), dpi = 500)
sns.barplot(x = 'State/UnionTerritory', y = 'Active', data = top_active.iloc[:15], palette = 'Dark2', linewidth = 1, edgecolor = 'black')

plt.xlabel('States', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.ylabel('Active Cases', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.title("Top 15 States (COVID-19 Active Cases)", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 20, 'fontweight': 'bold'})

plt.show()

**_From the analysis above, we may infer that Maharashtra has the most COVID-19 active cases, followed by Karnataka, Kerala, and Tamil Nadu, in that order._**

#### **Top 15 States (COVID-19 Most Deaths)**

In [None]:
top_deaths = dfc.groupby(by = 'State/UnionTerritory').max()[['Deaths', 'Date']].sort_values(by = ['Deaths'], ascending = False).reset_index()

plt.figure(figsize = (27, 7), dpi = 500)
sns.barplot(x = 'State/UnionTerritory', y = 'Deaths', data = top_deaths.iloc[:15], palette = 'tab10', linewidth = 1, edgecolor = 'black')

plt.xlabel('States', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.ylabel('Deaths', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.title("Top 15 States (COVID-19 Most Deaths)", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 20, 'fontweight': 'bold'})

plt.show()

**_According to the analysis above, Maharashtra has the most COVID-19 deaths, followed by Karnataka, Tamil Nadu, and New Delhi, in that order._**

#### **COVID-19 Growth**

In [None]:
plt.figure(figsize = (20, 7), dpi = 500)
sns.lineplot(data = dfc[dfc['State/UnionTerritory'].isin(['Maharashtra', 'Karnataka', 'Kerala', 'Tamil Nadu', 'Uttar Pradesh', 'Rajasthan', 'Andhra Pradesh'])], 
             x = 'Date', y = 'Active', hue = 'State/UnionTerritory', linewidth = 2)

plt.xlabel('Date', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.ylabel('Active Cases', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 13, 'fontweight': 'bold'})
plt.title("COVID-19 Growth", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 18, 'fontweight': 'bold'})

plt.show()

**_The analysis above shows COVID-19's growth through time._**

#### **Gender (COVID-19 Vaccination)**

In [None]:
male = vaccination['Male(Individuals Vaccinated)'].sum()
female = vaccination['Female(Individuals Vaccinated)'].sum()

label = ['Male', 'Female']

plt.pie([male, female], labels = label, autopct = '%.2f%%', textprops = {"font" : "Arial", "size" : "large", "weight" : "bold"}, wedgeprops = {"edgecolor" : "white", "linewidth" : 5, 'antialiased': True})
plt.title('Gender (COVID-19 Vaccination)', fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 17, 'fontweight': 'bold'})

plt.show()

**_We infer from the study above that during the COVID-19 vaccination effort, males had a greater rate of immunisation than females._**

#### **Top 15 States (COVID-19 Most Vaccinations)**

In [None]:
vaccine_max = vaccine.groupby('State')['Total'].sum().to_frame('Total')
vaccine_max = vaccine_max.sort_values('Total', ascending = False)[:15]

plt.figure(figsize = (27, 7), dpi = 500)
sns.barplot(x = vaccine_max.index, y = vaccine_max.Total, data = vaccine_max.iloc[:15], palette = 'Set1', linewidth = 1, edgecolor = 'black')

plt.xlabel('States', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.ylabel('Vaccination', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 15, 'fontweight': 'bold'})
plt.title("Top 15 States (COVID-19 Most Vaccinations)", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 20, 'fontweight': 'bold'})

plt.show()

**_The analysis above shows, Maharashtra had the most vaccinations, followed by Uttar Pradesh, Rajasthan, and Gujarat._**

#### **Top 15 States (COVID-19 Least Vaccinations)**

In [None]:
vaccine_min = vaccine.groupby('State')['Total'].sum().to_frame('Total')
vaccine_min = vaccine_min.sort_values('Total', ascending = True)[:10]

plt.figure(figsize = (30, 10), dpi = 700)
sns.barplot(x = vaccine_min.index, y = vaccine_min.Total, data = vaccine_min.iloc[:10], palette = 'Set2', linewidth = 1, edgecolor = 'black')

plt.xlabel('States', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 17, 'fontweight': 'bold'})
plt.ylabel('Vaccination', fontdict = {'fontname': 'Arial', 'color': 'black', 'fontsize': 17, 'fontweight': 'bold'})
plt.title("Top 10 States (COVID-19 Least Vaccinations)", fontdict = {'fontname': 'Times New Roman', 'color': 'black', 'fontsize': 22, 'fontweight': 'bold'})

plt.show()

**_The analysis above shows, Lakshadweep had the least vaccinations, followed by Andaman and Nicobar Islands, Ladakh, and Dadra and Nagar Haveli and Daman and Diu._**

#### **Conclusion**

**_According to the data analysis project mentioned above on the 2019 Coronavirus Pandemic in India, Maharashtra had the highest number of active cases, the fastest vaccination rate, and the highest number of fatalities. In India, Uttar Pradesh had the second-highest vaccination rate, while Karnataka had the second-highest COVID activity rate. The Andaman and Nicobar islands and Lakshadweep were the two Union Territories with the lowest vaccination rates, respectively. Additionally, we learned that COVID-19 was spreading over time and that men were more likely to be immunised than women._**