# ðŸ‡®ðŸ‡³ COVID-19 Cases in India â€“ EDA Project

Company / Internship Level Data Analysis

## 1. Import Libraries

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use("default")
sns.set_theme(style="whitegrid")


## 2. Load Dataset

In [None]:

df = pd.read_csv("Covid_cases_in_India.csv")
df.head()


## 3. Basic Data Understanding

In [None]:

df.shape


In [None]:

df.info()


### Observation
The dataset contains state-wise COVID-19 information with confirmed, active, recovered, and death cases.

## 4. Data Cleaning

In [None]:

df = df.drop(columns=["S. No."])
df.columns = ["State", "Total_Confirmed", "Active", "Recovered", "Deaths"]
df


## 5. Top 10 States by Confirmed Cases

In [None]:

top_states = df.sort_values("Total_Confirmed", ascending=False).head(10)

plt.figure(figsize=(10,5))
sns.barplot(x="Total_Confirmed", y="State", data=top_states)
plt.title("Top 10 States by Total Confirmed COVID-19 Cases")
plt.show()


## 6. Active vs Recovered vs Deaths

In [None]:

df.set_index("State")[["Active", "Recovered", "Deaths"]].plot(kind="bar", figsize=(14,6))
plt.title("COVID-19 Case Distribution by State")
plt.ylabel("Number of Cases")
plt.show()


## 7. Recovery Rate Analysis

In [None]:

df["Recovery_Rate"] = (df["Recovered"] / df["Total_Confirmed"]) * 100

plt.figure(figsize=(10,5))
sns.barplot(x="Recovery_Rate", y="State", data=df.sort_values("Recovery_Rate", ascending=False))
plt.title("Recovery Rate (%) by State")
plt.show()


## 8. Death Percentage Analysis

In [None]:

df["Death_Rate"] = (df["Deaths"] / df["Total_Confirmed"]) * 100

plt.figure(figsize=(10,5))
sns.barplot(x="Death_Rate", y="State", data=df.sort_values("Death_Rate", ascending=False))
plt.title("Death Rate (%) by State")
plt.show()


## 9. Correlation Heatmap

In [None]:

plt.figure(figsize=(8,5))
sns.heatmap(df[["Total_Confirmed", "Active", "Recovered", "Deaths"]].corr(), annot=True, cmap="coolwarm")
plt.title("Correlation Heatmap")
plt.show()


## 10. Final Insights
- Major states contribute most of the cases
- Recovery rate is significantly higher than death rate
- Active cases are concentrated in few regions
- Data visualization helps in quick decision making