In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
sns.set()
import plotly.graph_objs as go
import plotly.express as px 
from datetime import datetime

In [None]:
from google.colab import files
data = files.upload()

Saving covid_19_data.csv to covid_19_data.csv


In [None]:
import io
df = pd.read_csv(io.BytesIO(data['covid_19_data.csv']))

In [None]:
df.head(10)

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0
5,6,01/22/2020,Guangdong,Mainland China,1/22/2020 17:00,26.0,0.0,0.0
6,7,01/22/2020,Guangxi,Mainland China,1/22/2020 17:00,2.0,0.0,0.0
7,8,01/22/2020,Guizhou,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
8,9,01/22/2020,Hainan,Mainland China,1/22/2020 17:00,4.0,0.0,0.0
9,10,01/22/2020,Hebei,Mainland China,1/22/2020 17:00,1.0,0.0,0.0


In [None]:
# Information of data
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 306429 entries, 0 to 306428
Data columns (total 8 columns):
 #   Column           Non-Null Count   Dtype  
---  ------           --------------   -----  
 0   SNo              306429 non-null  int64  
 1   ObservationDate  306429 non-null  object 
 2   Province/State   228329 non-null  object 
 3   Country/Region   306429 non-null  object 
 4   Last Update      306429 non-null  object 
 5   Confirmed        306429 non-null  float64
 6   Deaths           306429 non-null  float64
 7   Recovered        306429 non-null  float64
dtypes: float64(3), int64(1), object(4)
memory usage: 18.7+ MB


In [None]:
df['Province/State'] = df['Province/State'].fillna("Unknown")

In [None]:
df.isnull().sum()

SNo                0
ObservationDate    0
Province/State     0
Country/Region     0
Last Update        0
Confirmed          0
Deaths             0
Recovered          0
dtype: int64

In [None]:
df = df.rename(columns = {"Province/State":"State"})
df = df.rename(columns = {"Country/Region":"Country"})

In [None]:
df[['Confirmed','Deaths','Recovered']] = df[['Confirmed','Deaths','Recovered']].astype(int)

In [None]:
df['Active'] = df['Confirmed']-df['Deaths']-df['Recovered']
df.columns

Index(['SNo', 'ObservationDate', 'State', 'Country', 'Last Update',
       'Confirmed', 'Deaths', 'Recovered', 'Active'],
      dtype='object')

In [None]:
df['Date'] = df['ObservationDate'].copy()

DATA ANALYSIS OF WORLD COVID CASES


In [None]:
fig = go.Figure(data = [go.Table(
    header = dict(
        values = ['<b>Total Confirmed</b>','<b>Total Deaths</b>','<b>Total Recovered</b>','<b>Total Active Cases</b>'],
        line_color='darkslategray',
        fill_color= 'DarkCyan',
        align='center',
        font=dict(color='white', size=12)
    ),
    cells = dict(
        values = [sum(df['Confirmed']),sum(df['Deaths']),sum(df['Recovered']),sum(df['Active'])],
        line_color='darkslategray',
        fill_color = 'white',
        font = dict(color = 'darkslategray', size = 12))
)])

fig.update_layout(title = 'World-Covid-19 Cases',
                  title_x = 0.5,
                  title_font = dict(size = 16, color = 'DarkCyan'))

fig.show()

In [None]:
labels = ["Active Cases","Recovered Cases","Death Cases"]

sumactive= sum(df['Active'])
sumrecoverd = sum(df['Recovered'])
sumdeaths = sum(df['Deaths'])

fig = px.pie(df, 
             values = [sumactive,sumrecoverd,sumdeaths],
             names = labels,
             color_discrete_sequence = ['SkyBlue','PaleGreen','LightSlateGray'])

fig.update_traces(textposition = 'inside', textinfo = 'percent+label')

fig.update_layout(title = 'The percentage of Covid-19 Cases in the world',
                  title_x = 0.5,
                  title_font= dict(size = 18, color = 'MidnightBlue' ))

fig.show()

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df = df.sort_values("Date")

In [None]:
dftime = df.groupby("Date")[["Confirmed","Active","Recovered","Deaths"]].sum().reset_index()

In [None]:
fig = px.bar(dftime,
             x = 'Date',
             y = 'Confirmed',
             color = 'Confirmed',
             color_continuous_scale = 'rdpu',
             labels = {"Confirmed":"Confirmed Cases"})

fig.update_layout(title = 'Confirmed Cases',
                  title_x = 0.5,
                  title_font = dict(size= 18, color = 'Purple'),
                  xaxis = dict(title = 'Date'),
                  yaxis = dict(title = 'Confirmed Cases'))

fig.show()

In [None]:
fig = px.bar(dftime,
             x = 'Date',
             y = 'Recovered',
             color = 'Recovered',
             color_continuous_scale = 'greens',
             labels = {"Recovered":"Recovered Cases"})

fig.update_layout(title = 'Recovered Cases',
                  title_x = 0.5,
                  title_font = dict(size= 18, color = 'DarkGreen'),
                  xaxis = dict(title = 'Date'),
                  yaxis = dict(title = 'Recovered Cases'))

fig.show()

In [None]:
fig = px.bar(dftime,
             x = 'Date',
             y = 'Active',
             color = 'Active',
             color_continuous_scale = 'blues',
             labels = {"Active":"Active Cases"})

fig.update_layout(title = 'Active Cases',
                  title_x = 0.5,
                  title_font = dict(size= 18, color = 'DarkBlue'),
                  xaxis = dict(title = 'Date'),
                  yaxis = dict(title = 'Active Cases'))

fig.show()

In [None]:
fig = px.bar(dftime,
             x = 'Date',
             y = 'Deaths',
             color = 'Deaths',
             color_continuous_scale = 'gray',
             labels = {"Deaths":"Death Caaes"})

fig.update_layout(title = 'Death Cases',
                  title_x = 0.5,
                  title_font = dict(size= 18, color = 'DarkSlateGray'),
                  xaxis = dict(title = 'Date'),
                  yaxis = dict(title = 'Death Cases'))

fig.show()

In [None]:
df1 = df.groupby("Country")["Confirmed"].sum().sort_values(ascending = False).reset_index().head(30)

fig = px.bar(df1,
             x = 'Country',
             y = 'Confirmed',
             color = 'Confirmed',
             color_continuous_scale = 'rdpu',
             labels = {"Confirmed":"Confirmed Cases"})

fig.update_layout(title = 'Top 30 Countries with the most Confirmed Cases',
                  title_x = 0.5,
                  title_font = dict(size = 18, color = 'Purple'),
                  yaxis = dict(title = 'Confirmed Cases'),
                  xaxis = dict(tickangle = 45))
fig.show()

In [None]:
df1 = df.groupby("Country")["Active"].sum().sort_values(ascending = False).reset_index().head(30)

fig = px.bar(df1,
             x = 'Country',
             y = 'Active',
             color = 'Active',
             color_continuous_scale = 'blues',
             labels = {"Active":"Active Cases"})

fig.update_layout(title = 'Top 30 Countries with the most Active Cases',
                  title_x = 0.5,
                  title_font = dict(size = 18, color = 'DarkBlue'),
                  yaxis = dict(title = 'Active Cases'),
                  xaxis = dict(tickangle = 45))

fig.show()

In [None]:
df1 = df.groupby("Country")["Recovered"].sum().sort_values(ascending = False).reset_index().head(30)

fig = px.bar(df1,
             x = 'Country',
             y = 'Recovered',
             color = 'Recovered',
             color_continuous_scale = 'greens',
             labels = {"Confirmed":"Recovered Cases"})

fig.update_layout(title = 'Top 30 Countries with the most Recovered Cases',
                  title_x = 0.5,
                  title_font = dict(size = 18, color = 'DarkGreen'),
                  yaxis = dict(title = 'Recovered Cases'),
                  xaxis = dict(tickangle = 45))

fig.show()

In [None]:
df1 = df.groupby("Country")["Deaths"].sum().sort_values(ascending = False).reset_index().head(30)

fig = px.bar(df1,
             x = 'Country',
             y = 'Deaths',
             color = 'Deaths',
             color_continuous_scale = 'gray',
             labels = {"Deaths":"Death Cases"})

fig.update_layout(title = 'Top 30 Countries with the most Death Cases',
                  title_x = 0.5,
                  title_font = dict(size = 18, color = 'DarkSlateGray'),
                  yaxis = dict(title = 'Death Cases'),
                  xaxis = dict(tickangle = 45))

fig.show()


In [None]:
df['Year'] = df['ObservationDate'].apply(lambda x : x.split("/")[-1])
df.head()

Unnamed: 0,SNo,ObservationDate,State,Country,Last Update,Confirmed,Deaths,Recovered,Active,Date,Year
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1,0,0,1,2020-01-22,2020
22,23,01/22/2020,Qinghai,Mainland China,1/22/2020 17:00,0,0,0,0,2020-01-22,2020
23,24,01/22/2020,Shaanxi,Mainland China,1/22/2020 17:00,0,0,0,0,2020-01-22,2020
24,25,01/22/2020,Shandong,Mainland China,1/22/2020 17:00,2,0,0,2,2020-01-22,2020
25,26,01/22/2020,Shanghai,Mainland China,1/22/2020 17:00,9,0,0,9,2020-01-22,2020


In [None]:
df['Country'] = df['Country'].replace('India', 'Ind')

In [None]:
dfch = df[(df['Country'] == 'Ind') ].reset_index()

In [None]:
fig = go.Figure(data = [go.Table(
    header = dict(
        values = ['<b>Total Confirmed</b>','<b>Total Deaths</b>','<b>Total Recovered</b>','<b>Total Active Cases</b>'],
        line_color='darkslategray',
        fill_color= 'FireBrick',
        align='center',
        font=dict(color='white', size=12)
    ),
    cells = dict(
        values = [sum(dfch['Confirmed']),sum(dfch['Deaths']),sum(dfch['Recovered']),sum(dfch['Active'])],
        line_color='darkslategray',
        fill_color = 'white',
        font = dict(color = 'darkslategray', size = 12))
)])

fig.update_layout(title = 'India - Covid-19 Cases',
                  title_x = 0.5,
                  title_font = dict(size = 18, color = 'FireBrick'))

fig.show()

In [None]:
labels = ["Active Cases","Recovered Cases","Deaths"]

sumactive= sum(dfch['Active'])
sumrecoverd = sum(dfch['Recovered'])
sumdeaths = sum(dfch['Deaths'])

fig = px.pie(df, 
             values = [sumactive,sumrecoverd,sumdeaths],
             names = labels,
             color_discrete_sequence = ['SkyBlue','PaleGreen','LightSlateGray'])

fig.update_traces(textposition = 'inside', textinfo = 'percent+label')

fig.update_layout(title = 'The percentage of Covid-19 Cases in India',
                  title_x = 0.5,
                  title_font= dict(size = 18, color = 'MidnightBlue' ))

fig.show()

In [None]:
Data_CH= dfch.groupby("Date")["Confirmed","Deaths","Recovered","Active"].sum().reset_index().sort_values("Date").reset_index()

fig = go.Figure()

fig.add_trace(go.Scatter(x=Data_CH['Date'], 
                         y=Data_CH['Confirmed'],
                         mode='lines',
                         name='Confirmed Cases',
                         marker_color='purple'))

fig.add_trace(go.Scatter(x=Data_CH['Date'], 
                         y=Data_CH['Active'],
                         mode='lines',
                         name='Active Cases',
                         marker_color='RoyalBlue',
                         line=dict( dash='dot')))

fig.add_trace(go.Scatter(x=Data_CH['Date'], 
                         y=Data_CH['Deaths'],
                         name='Death Cases',
                         marker_color='DarkSlateGray',
                         mode='lines',
                         line=dict( dash='dot') ))

fig.add_trace(go.Scatter(x=Data_CH['Date'], 
                         y=Data_CH['Recovered'],
                         mode='lines',
                         name='Recovered Cases',
                         marker_color='green'))

fig.update_layout(title='Covid-19 Cases over the time in the India',
                  title_x = 0.5,
                  title_font = dict(size = 18, color = 'DarkSlateGray'),
                  template='plotly_white')

fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [None]:
State_ch= dfch.groupby(["State"])["Confirmed","Active","Deaths"].sum().sort_values("Confirmed",ascending=False).reset_index()

fig = px.pie(State_ch,
             values = 'Confirmed',
             names = 'State',
             hole = 0.3,
             color_discrete_sequence = px.colors.sequential.Rainbow
            
            )

fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(title = 'Confirmed Cases in India by States',
                  title_font = dict(size = 16, color = 'Darkblue'))

fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [None]:
State_ch= dfch.groupby(["State"])["Confirmed","Active","Deaths"].sum().sort_values("Active",ascending=False).reset_index()

fig = px.pie(State_ch,
             values = 'Active',
             names = 'State',
             hole = 0.3,
             color_discrete_sequence = px.colors.sequential.Rainbow)

fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(title = 'Active Cases in India by States',
                  title_font = dict(size = 16, color = 'Darkblue'))

fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [None]:
State_ch= dfch.groupby(["State"])["Confirmed","Active","Deaths"].sum().sort_values("Deaths",ascending=False).reset_index()

fig = px.pie(State_ch,
             values = 'Deaths',
             names = 'State',
             hole = 0.3,
             color_discrete_sequence = px.colors.sequential.Rainbow)

fig.update_traces(textposition='inside', textinfo='percent+label')

fig.update_layout(title = 'Death Cases in India by States',
                  title_font = dict(size = 16, color = 'Darkblue'))

fig.show()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.

