In [83]:
import numpy as np
import pandas as pd 
import math
import datetime 


In [10]:
covid_df = pd.read_csv("covid_19_india.csv")

#### Imported the Covid CSV file and then understanding the data. 

In [20]:
covid_df.head(5)

Unnamed: 0,Sno,Date,Time,State/UnionTerritory,ConfirmedIndianNational,ConfirmedForeignNational,Cured,Deaths,Confirmed
0,1.0,2020-01-30,6:00 PM,Kerala,1,0,0.0,0.0,1.0
1,2.0,2020-01-31,6:00 PM,Kerala,1,0,0.0,0.0,1.0
2,3.0,2020-02-01,6:00 PM,Kerala,2,0,0.0,0.0,2.0
3,4.0,2020-02-02,6:00 PM,Kerala,3,0,0.0,0.0,3.0
4,5.0,2020-02-03,6:00 PM,Kerala,3,0,0.0,0.0,3.0


In [14]:
covid_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15114 entries, 0 to 15113
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Sno                       15086 non-null  float64
 1   Date                      15086 non-null  object 
 2   Time                      15086 non-null  object 
 3   State/UnionTerritory      15086 non-null  object 
 4   ConfirmedIndianNational   15086 non-null  object 
 5   ConfirmedForeignNational  15086 non-null  object 
 6   Cured                     15086 non-null  float64
 7   Deaths                    15086 non-null  float64
 8   Confirmed                 15086 non-null  float64
dtypes: float64(4), object(5)
memory usage: 1.0+ MB


In [15]:
covid_df.describe()

Unnamed: 0,Sno,Cured,Deaths,Confirmed
count,15086.0,15086.0,15086.0,15086.0
mean,7543.5,174793.7,2721.084449,194282.0
std,4355.097416,364833.0,7182.672358,409518.4
min,1.0,0.0,0.0,0.0
25%,3772.25,1685.0,12.0,2935.5
50%,7543.5,19647.0,364.0,26081.5
75%,11314.75,208755.2,2170.0,221601.2
max,15086.0,4927480.0,83777.0,5433506.0


In [16]:
covid_df.isnull().sum()

Sno                         28
Date                        28
Time                        28
State/UnionTerritory        28
ConfirmedIndianNational     28
ConfirmedForeignNational    28
Cured                       28
Deaths                      28
Confirmed                   28
dtype: int64

#### Dropping unnecessary columns from the dataframe. 

In [31]:
covid_df.drop(["Sno","Time","ConfirmedIndianNational","ConfirmedForeignNational"],inplace = True, axis = 1)

In [34]:
covid_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15114 entries, 0 to 15113
Data columns (total 5 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Date                  15086 non-null  object 
 1   State/UnionTerritory  15086 non-null  object 
 2   Cured                 15086 non-null  float64
 3   Deaths                15086 non-null  float64
 4   Confirmed             15086 non-null  float64
dtypes: float64(3), object(2)
memory usage: 590.5+ KB


#### Finding the active cases by summing cured cases and deaths and subtracting it with the confirmed cases.

In [36]:
covid_df["Active_Cases"] = covid_df["Confirmed"] - (covid_df["Deaths"] + covid_df["Cured"])
covid_df.sample(5)

Unnamed: 0,Date,State/UnionTerritory,Cured,Deaths,Confirmed,Active_Cases
6894,2020-10-02,Madhya Pradesh,107279.0,2336.0,130088.0,20473.0
12885,2021-03-19,Telengana,298262.0,1664.0,302360.0,2434.0
12298,2021-03-03,Maharashtra,2036790.0,52238.0,2169330.0,80302.0
10555,2021-01-14,Chandigarh,19794.0,329.0,20415.0,292.0
890,2020-04-12,Tripura,0.0,0.0,2.0,2.0


#### Analysing the cases statewise.

In [90]:
statewise = pd.pivot_table(covid_df, values = ["Confirmed","Deaths","Cured","Active_Cases"],index = "State/UnionTerritory",aggfunc = max)

In [91]:
statewise

Unnamed: 0_level_0,Active_Cases,Confirmed,Cured,Deaths
State/UnionTerritory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Andaman and Nicobar Islands,1154.0,6674.0,6359.0,92.0
Andhra Pradesh,211554.0,1475372.0,1254291.0,9580.0
Arunachal Pradesh,3068.0,22462.0,19977.0,88.0
Assam,47740.0,340858.0,290774.0,2344.0
Bihar,115152.0,664115.0,595377.0,4039.0
Cases being reassigned to states,9265.0,9265.0,0.0,0.0
Chandigarh,8653.0,56513.0,48831.0,647.0
Chhattisgarh,131245.0,925531.0,823113.0,12036.0
Dadra and Nagar Haveli and Daman and Diu,2081.0,9652.0,8944.0,4.0
Daman & Diu,2.0,2.0,0.0,0.0


#### State with the most active cases 

In [54]:
state_wise = statewise.sort_values(by = "Active_Cases",ascending = False)

In [56]:
state_wise["Active_Cases"]

State/UnionTerritory
Maharashtra                                 701614.0
Karnataka                                   605515.0
Kerala                                      445692.0
Uttar Pradesh                               310783.0
Tamil Nadu                                  242929.0
Rajasthan                                   212753.0
Andhra Pradesh                              211554.0
Gujarat                                     148297.0
West Bengal                                 131948.0
Chhattisgarh                                131245.0
Haryana                                     116867.0
Bihar                                       115152.0
Madhya Pradesh                              111366.0
Delhi                                       103424.0
Odisha                                       95743.0
Telangana                                    80695.0
Uttarakhand                                  80000.0
Punjab                                       79963.0
Telengana                

#### Finding the mortality rate and recovery rate State wise 

In [80]:
state_wise = statewise.sort_values(by = "Confirmed",ascending = False)
state_wise["Recovery"] = (state_wise["Cured"]/state_wise["Confirmed"]) * 100
state_wise["Mortality"] = (state_wise["Deaths"]/state_wise["Confirmed"]) * 100

In [81]:
state_wise

Unnamed: 0_level_0,Active_Cases,Confirmed,Cured,Deaths,Recovery,Mortality
State/UnionTerritory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Maharashtra,701614.0,5433506.0,4927480.0,83777.0,90.686934,1.541859
Karnataka,605515.0,2272374.0,1674487.0,22838.0,73.688882,1.005028
Kerala,445692.0,2200706.0,1846105.0,6612.0,83.886944,0.300449
Tamil Nadu,242929.0,1664350.0,1403052.0,18369.0,84.300297,1.103674
Uttar Pradesh,310783.0,1637663.0,1483249.0,18072.0,90.571076,1.103524
Andhra Pradesh,211554.0,1475372.0,1254291.0,9580.0,85.015237,0.649328
Delhi,103424.0,1402873.0,1329899.0,22111.0,94.798246,1.576123
West Bengal,131948.0,1171861.0,1026492.0,13576.0,87.59503,1.158499
Chhattisgarh,131245.0,925531.0,823113.0,12036.0,88.934136,1.300443
Rajasthan,212753.0,879664.0,713129.0,7080.0,81.06834,0.804853


In [99]:
state_wise.to_csv("D:\COVID_19_DATA_ANALYSIS_PROJECT_USING_PYTHON_AND_POWERBI\TRANSFORMED_DATASETS\state_wise_data.csv")

### Post this, the data was loaded in Microsoft Power Bi and the visuals were created 