# Data Analysis and Visualization using Numpy, Pandas and Seaborn

In [1]:
!pip install jovian --upgrade --quiet



In [2]:
project_name = "covid19-data-analysis"

In [3]:
import jovian

In [4]:
jovian.commit(project=project_name,filename='covid19-data-analysis',environment=None)

<IPython.core.display.Javascript object>

[jovian] Attempting to save notebook..


ConnectionError: HTTPSConnectionPool(host='api.jovian.ai', port=443): Max retries exceeded with url: /user/profile (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x000002C158A34BB0>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))

## Importing All the Libraries Needed

In [5]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

### Reading the CSV Data

In [6]:
covid_df = pd.read_csv('covid_19_india.csv')

### Exploring The Data for Range of Values

In [7]:
covid_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6386 entries, 0 to 6385
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   Sno                       6386 non-null   int64 
 1   Date                      6386 non-null   object
 2   Time                      6386 non-null   object
 3   State/UnionTerritory      6386 non-null   object
 4   ConfirmedIndianNational   6386 non-null   object
 5   ConfirmedForeignNational  6386 non-null   object
 6   Cured                     6386 non-null   int64 
 7   Deaths                    6386 non-null   int64 
 8   Confirmed                 6386 non-null   int64 
dtypes: int64(4), object(5)
memory usage: 449.1+ KB


In [8]:
covid_df.describe

<bound method NDFrame.describe of        Sno      Date     Time State/UnionTerritory ConfirmedIndianNational  \
0        1  30/01/20  6:00 PM               Kerala                       1   
1        2  31/01/20  6:00 PM               Kerala                       1   
2        3  01/02/20  6:00 PM               Kerala                       2   
3        4  02/02/20  6:00 PM               Kerala                       3   
4        5  03/02/20  6:00 PM               Kerala                       3   
...    ...       ...      ...                  ...                     ...   
6381  6382  17/09/20  8:00 AM            Telengana                       -   
6382  6383  17/09/20  8:00 AM              Tripura                       -   
6383  6384  17/09/20  8:00 AM          Uttarakhand                       -   
6384  6385  17/09/20  8:00 AM        Uttar Pradesh                       -   
6385  6386  17/09/20  8:00 AM          West Bengal                       -   

     ConfirmedForeignNational

In [9]:
covid_df.columns

Index(['Sno', 'Date', 'Time', 'State/UnionTerritory',
       'ConfirmedIndianNational', 'ConfirmedForeignNational', 'Cured',
       'Deaths', 'Confirmed'],
      dtype='object')

In [10]:
covid_df.shape

(6386, 9)

### By using the shape method we can see our data set has 6386 rows and columns
### From the info method we got to know the type of values each column contains

In [11]:
covid_df.Date

0       30/01/20
1       31/01/20
2       01/02/20
3       02/02/20
4       03/02/20
          ...   
6381    17/09/20
6382    17/09/20
6383    17/09/20
6384    17/09/20
6385    17/09/20
Name: Date, Length: 6386, dtype: object

### From the above Info We can see the dataset Contains Covid Data From 30th January 2020 till 17th September 2020

### To Explore the data In greater Depth We Will BreakDown the date into Year, Month, Day and Weekday

In [12]:
covid_df['year'] = pd.DatetimeIndex(covid_df['Date']).year
covid_df['month'] = pd.DatetimeIndex(covid_df['Date']).month

KeyError: 'month'

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000002C15A0080D0>

In [14]:
states = covid_df['State/UnionTerritory']

In [15]:
states.unique()

array(['Kerala', 'Telengana', 'Delhi', 'Rajasthan', 'Uttar Pradesh',
       'Haryana', 'Ladakh', 'Tamil Nadu', 'Karnataka', 'Maharashtra',
       'Punjab', 'Jammu and Kashmir', 'Andhra Pradesh', 'Uttarakhand',
       'Odisha', 'Puducherry', 'West Bengal', 'Chhattisgarh',
       'Chandigarh', 'Gujarat', 'Himachal Pradesh', 'Madhya Pradesh',
       'Bihar', 'Manipur', 'Mizoram', 'Andaman and Nicobar Islands',
       'Goa', 'Unassigned', 'Assam', 'Jharkhand', 'Arunachal Pradesh',
       'Tripura', 'Nagaland', 'Meghalaya', 'Dadar Nagar Haveli',
       'Cases being reassigned to states', 'Sikkim', 'Daman & Diu',
       'Dadra and Nagar Haveli and Daman and Diu', 'Telangana',
       'Telangana***', 'Telengana***'], dtype=object)

### Changing The 'State/UnionTerritory' Column Name to States 
### & Changing Columns 'ConfirmedIndianNational' and 'ConfirmedForeignNational' to Indian and Foreigner

In [16]:
d = {'State/UnionTerritory':'States','ConfirmedIndianNational':'Indian','ConfirmedForeignNational':'Foreigner'}
covid_df.rename(columns = d, inplace = True)

### Lockdown Imposed

In [17]:
lockdown = covid_df[covid_df.Foreigner == '-']

In [18]:
lockdown

Unnamed: 0,Sno,Date,Time,States,Indian,Foreigner,Cured,Deaths,Confirmed,year,month
446,447,29/03/20,7:30 PM,Andhra Pradesh,-,-,1,0,19,2020,3
447,448,29/03/20,7:30 PM,Andaman and Nicobar Islands,-,-,0,0,9,2020,3
448,449,29/03/20,7:30 PM,Bihar,-,-,0,1,11,2020,3
449,450,29/03/20,7:30 PM,Chandigarh,-,-,0,0,8,2020,3
450,451,29/03/20,7:30 PM,Chhattisgarh,-,-,0,0,7,2020,3
...,...,...,...,...,...,...,...,...,...,...,...
6381,6382,17/09/20,8:00 AM,Telengana,-,-,133555,1005,165003,2020,9
6382,6383,17/09/20,8:00 AM,Tripura,-,-,12956,222,20676,2020,9
6383,6384,17/09/20,8:00 AM,Uttarakhand,-,-,24432,447,35947,2020,9
6384,6385,17/09/20,8:00 AM,Uttar Pradesh,-,-,258573,4690,330265,2020,9


### From the above Data we can see the values for Indian And Foreigner were Entered as '-' From 29th March i.e The lockdown in India Started near About same date (25th March 2020)

### Max Deaths Reported and Maximum Cured In a Day

In [19]:
total_deaths = covid_df.Deaths.sum()
print('Total Deaths Reported in India due to Covid-19 till 17th September is {}'.format(total_deaths))

Total Deaths Reported in India due to Covid-19 till 17th September is 4029091


In [20]:
total_cured = covid_df.Cured.max()
print('Total Cured From Covid-19 till 17th September is {}'.format(total_cured))

Total Cured From Covid-19 till 17th September is 792832


## Exploring Data State Wise
# Maharashtra

In [21]:
Maharashtra_df = covid_df[covid_df.States == 'Maharashtra']
Maharashtra_df

Unnamed: 0,Sno,Date,Time,States,Indian,Foreigner,Cured,Deaths,Confirmed,year,month
76,77,09/03/20,6:00 PM,Maharashtra,2,0,0,0,2,2020,9
91,92,10/03/20,6:00 PM,Maharashtra,5,0,0,0,5,2020,10
97,98,11/03/20,6:00 PM,Maharashtra,2,0,0,0,2,2020,11
120,121,12/03/20,6:00 PM,Maharashtra,11,0,0,0,11,2020,12
133,134,13/03/20,6:00 PM,Maharashtra,14,0,0,0,14,2020,3
...,...,...,...,...,...,...,...,...,...,...,...
6230,6231,13/09/20,8:00 AM,Maharashtra,-,-,728512,29115,1037765,2020,9
6265,6266,14/09/20,8:00 AM,Maharashtra,-,-,740061,29531,1060308,2020,9
6300,6301,15/09/20,8:00 AM,Maharashtra,-,-,755850,29894,1077374,2020,9
6335,6336,16/09/20,8:00 AM,Maharashtra,-,-,775273,30409,1097856,2020,9


In [26]:
covid_df.loc[120:160]

Unnamed: 0,Sno,Date,Time,States,Indian,Foreigner,Cured,Deaths,Confirmed,year,month
120,121,12/03/20,6:00 PM,Maharashtra,11,0,0,0,11,2020,12
121,122,12/03/20,6:00 PM,Andhra Pradesh,1,0,0,0,1,2020,12
122,123,13/03/20,6:00 PM,Delhi,6,0,0,0,6,2020,3
123,124,13/03/20,6:00 PM,Haryana,0,14,0,0,14,2020,3
124,125,13/03/20,6:00 PM,Kerala,19,0,3,0,19,2020,3
125,126,13/03/20,6:00 PM,Rajasthan,1,2,0,0,3,2020,3
126,127,13/03/20,6:00 PM,Telengana,1,0,0,0,1,2020,3
127,128,13/03/20,6:00 PM,Uttar Pradesh,10,1,0,0,11,2020,3
128,129,13/03/20,6:00 PM,Ladakh,3,0,0,0,3,2020,3
129,130,13/03/20,6:00 PM,Tamil Nadu,1,0,0,0,1,2020,3


In [None]:
print("Total cases Confirmed In Maharashtra till 17th September are {}".format(Maharashtra_df.Confirmed.sum()))

In [None]:
Maharashtra_df.Deaths.sum()
print('Total Deaths In Maharshtra till 17th September are {}'.format(Maharashtra_df.Deaths.sum()))

In [None]:
print('Total Cured From Covid 19 in maharashtra till 17th September are {}'.format(Maharashtra_df.Cured.sum()))

In [None]:
print("Recovery rate of Maharashtra as of 17th Sept is {:2f} %".format(Maharashtra_df.Cured.sum()/Maharashtra_df.Confirmed.sum()))

In [None]:
confirmed_df = covid_df.groupby('month')[['Confirmed']].sum()
cured_df = covid_df.groupby('month')[['Cured']].sum()
deaths_df = covid_df.groupby('month')[['Deaths']].sum()

In [None]:
confirmed_df

In [None]:
month_df.max()

In [None]:
jovian.commit(project=project_name,filename='covid19-data-analysis',environment=None)

In [None]:
plt.figure(figsize=[12,8])
sns.lineplot(data=month_df, x="Cured", y="Confirmed", lw=8, ls='--');

In [None]:
months = range(1,13)
plt.figure(figsize=[12,8])
plt.plot(months,month_df.Deaths)
plt.plot(months,month_df.Cured)
plt.title("Deaths vs Cured")
plt.legend(['Deaths', 'Cured']);