# Import All Required Libraries

In [None]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as pt
import seaborn as sns
import plotly.express as px
from plotly.subplots import make_subplots
from datetime import datetime
import warnings
warnings.filterwarnings("ignore")

# Read File

In [None]:
f=pd.read_csv(r"../input/covid-19-ind/covid_19_india.csv")


In [None]:
# Take a look at the datashet
f.head()

In [None]:
f.info()

In [None]:
f.describe()

# Data cleaning 

In [None]:
## Data type of ConfirmedIndianNational have to be in int formate but its is in object formate so need to change it data type
f["ConfirmedIndianNational"].unique()

In [None]:
f["ConfirmedIndianNational"].value_counts()

In [None]:
## there is 17664 row have " - " value so we are replacing it by 0.
f["ConfirmedIndianNational"].replace(to_replace="-",value="0",inplace=True)
f["ConfirmedIndianNational"]=f["ConfirmedIndianNational"].astype("int")

In [None]:
f.info()

In [None]:
## Data type of ConfirmedForeignNational have to be in int formate but its is in object formate so need to change it data type
f["ConfirmedForeignNational"].unique()

In [None]:
f["ConfirmedForeignNational"].value_counts()

In [None]:
## there is 17664 row have " - " value so we are replacing it by 0.
f["ConfirmedForeignNational"].replace(to_replace="-",value="0",inplace=True)
f["ConfirmedForeignNational"]=f["ConfirmedForeignNational"].astype("int")

In [None]:
f.info()

In [None]:
f["Date"]=pd.to_datetime(f.Date)


In [None]:
f["Time"]=pd.to_datetime(f.Time)

In [None]:
f["Time"]=f.Time.dt.time

In [None]:
f.info()

# Drop unnesesarry columns

In [None]:
f.drop(columns="Sno",inplace=True)

In [None]:
print(f.columns)

In [None]:
sns.heatmap(f.isnull());

In [None]:
f.isnull().sum()

# Notes


There is no null Values so don't  need to do anything

# Get some statical information about the data

In [None]:
f.describe()

In [None]:
f.corr()

# Insight of Data using Visualization

In [None]:
f.head()


In [None]:
## Which state have the highest confirmed cases in the whole period

State_wise=f.groupby("State/UnionTerritory")["Confirmed","Deaths","Cured"].sum().reset_index()
State_wise["Death_percentage"]=((State_wise["Deaths"]/State_wise["Confirmed"])*100)
State_wise.style.background_gradient(cmap="magma")



In [None]:
px.bar(x=State_wise.nlargest(10,"Confirmed")["State/UnionTerritory"],y=State_wise.nlargest(10,"Confirmed")["Confirmed"],
       color_discrete_sequence=px.colors.diverging.RdGy,
       title="Top 10 state with highest number of confirmed Cases")

In [None]:
px.bar(x=State_wise.nlargest(10,"Cured")["State/UnionTerritory"],y=State_wise.nlargest(10,"Cured")["Cured"],
      color_discrete_sequence=px.colors.sequential.Blackbody,
      title="Top 10 State with highest number of Cured cases")

In [None]:
px.bar(x=State_wise.nlargest(10,"Death_percentage")["State/UnionTerritory"],y=State_wise.nlargest(10,"Death_percentage")["Death_percentage"],
      color_discrete_sequence=px.colors.diverging.Tropic,
      title="Top 10 State with highest number of Death_percentage cases")

# Exploratory Data Analysis of State wise testing Details Data set

# Read Statewise Testing data

Lets us "t" is a covid_testing_state

In [None]:
t=pd.read_csv(r"../input/covid-19-ind/StatewiseTestingDetails.csv")

In [None]:
t.head()

In [None]:
t.info()

In [None]:
t["Date"]=t["Date"].astype("datetime64[ns]")

In [None]:
t.info()

In [None]:
t.shape

In [None]:
t.describe()

In [None]:
t.corr()

In [None]:
t["Negative"]=t["TotalSamples"]-t["Positive"]
t=t.dropna()
t.info()

In [None]:
t=t.groupby("State")["TotalSamples","Negative","Positive"].max().reset_index()
t["Positive_percentage"]=((t["Positive"]/t["TotalSamples"])*100)
t.style.background_gradient(cmap="gist_earth_r")

# Top 10 state highest number of samples

In [None]:
px.bar(x=t.nlargest(10,"TotalSamples")["State"],y=t.nlargest(10,"TotalSamples")["TotalSamples"],
       labels={"y":"Total Samples","x":"State"},
       color_discrete_sequence=px.colors.sequential.Hot,
       title="Top 10 State with highest  number of Total Samples")

# Top 10 states with highest number of Negative cases

In [None]:
px.bar(x=t.nlargest(10,"Negative")["State"],y=t.nlargest(10,"Negative")["Negative"],
       labels={"y":"Total Negative Cases","x":"State"},
       color_discrete_sequence=px.colors.sequential.Magenta_r,
       title="Top 10 State with highest  number of Negative Cases")

# Top 10 states with highest number of Positive cases

In [None]:
px.bar(x=t.nlargest(10,"Positive")["State"],y=t.nlargest(10,"Positive")["Positive"],
       labels={"y":"Total Positive Cases","x":"State"},
       color_discrete_sequence=px.colors.sequential.OrRd_r,
       title="Top 10 State with highest  number of Positive Cases")

# Read Vaccination Data set


Lets us v is a Vaccination 

In [None]:
v=pd.read_csv(r"../input/covid-19-ind/covid_vaccine_statewise.csv")
v.head()

In [None]:
v.isnull().sum()

In [None]:
v.describe()

In [None]:
v.shape

In [None]:
v=v.drop(columns=["Sputnik V (Doses Administered)","AEFI","18-44 Years (Doses Administered)","45-60 Years(Individuals Vaccinated)","60+ Years(Individuals Vaccinated)"],axis=1)

In [None]:
v.head()

In [None]:
male = v["Male(Individuals Vaccinated)"].sum() 
female = v["Female(Individuals Vaccinated)"].sum()  
trans = v["Transgender(Individuals Vaccinated)"].sum()
px.pie(names=["Male Vaccinated","Female Vaccinated","Trans Gender"],values=[male,female,trans],title="Male and Female Vaccinated ratio for Covid19")

In [None]:
v.info()

In [None]:
Doses = v["Total Doses Administered"].sum() 
Vaccinated = v["Total Individuals Vaccinated"].sum()  
px.pie(names=["Doses Administered","People Vaccinated"],values=[Doses,Vaccinated],title="Doses administered vs People Vaccinated")

In [None]:
Covaxin = v[' Covaxin (Doses Administered)'].sum() 
Covishield = v['CoviShield (Doses Administered)'].sum()  
px.pie( names=["Covaxin Vaccinated","Covishield Vaccinated"],values=[Covaxin,Covishield],title="Covaxin and Covishield Vaccination")