<a href="https://colab.research.google.com/github/sanjsvk/Covid-analysis/blob/main/covid_19.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [94]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
pd.set_option('display.max_rows', None)
from plotly.subplots import make_subplots
import seaborn as sns
import datetime
import warnings
%matplotlib inline

In [95]:
line_list_data = pd.read_csv('/content/drive/My Drive/archive/COVID19_line_list_data.csv')
line_list_data = line_list_data.drop(['Unnamed: 3','Unnamed: 21','Unnamed: 22','Unnamed: 23','Unnamed: 24','Unnamed: 25','Unnamed: 26'], axis =1)
#open_line_list = pd.read_csv('/content/drive/My Drive/archive/COVID19_open_line_list.csv')
#open_line_list = open_line_list.drop(['Unnamed: 33','Unnamed: 34','Unnamed: 35','Unnamed: 36','Unnamed: 37','Unnamed: 38','Unnamed: 39','Unnamed: 40','Unnamed: 41','Unnamed: 42','Unnamed: 43','Unnamed: 44'],axis = 1)
data = pd.read_csv('/content/drive/My Drive/archive/covid_19_data.csv')

In [96]:
data.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14.0,0.0,0.0
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6.0,0.0,0.0
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1.0,0.0,0.0
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0.0,0.0,0.0


Using data-set - covid_19_data

Contains data from 22nd Jan, 2020 to 23rd Sept, 2020



In [97]:
print("Total data length - {} \n".format(len(data)))
data.dtypes

Total data length - 116805 



SNo                  int64
ObservationDate     object
Province/State      object
Country/Region      object
Last Update         object
Confirmed          float64
Deaths             float64
Recovered          float64
dtype: object

In [98]:
data[["Confirmed","Deaths","Recovered"]] =data[["Confirmed","Deaths","Recovered"]].astype(int)

Null Values

In [99]:
#Null values imputation
print("Missing values")
count = data.isnull().sum() 
print(count)
print("\nProvince/State has {}% missing values".format(np.round((count[2]/len(data))*100,2)))

print("Imputing missing values with 'unknown'")
data["Province/State"]= data["Province/State"].fillna('Unknown')

Missing values
SNo                    0
ObservationDate        0
Province/State     35353
Country/Region         0
Last Update            0
Confirmed              0
Deaths                 0
Recovered              0
dtype: int64

Province/State has 30.27% missing values
Imputing missing values with 'unknown'


Adding column 'Active_case'

In [100]:
data['Active_case'] = data['Confirmed'] - data['Deaths'] - data['Recovered']
data.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active_case
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1,0,0,1
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14,0,0,14
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6,0,0,6
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1,0,0,1
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0,0,0,0


In [101]:
Data = data[data['ObservationDate'] == max(data['ObservationDate'])].reset_index()

In [102]:
#Total cases in the world
Data_world = Data.groupby(["ObservationDate"])[["Confirmed","Active_case","Recovered","Deaths"]].sum().reset_index()

In [103]:
Data_world

Unnamed: 0,ObservationDate,Confirmed,Active_case,Recovered,Deaths
0,09/23/2020,31779835,8914289,21890442,975104


In [104]:
labels = ["Active cases","Recovered","Deaths"]
values = Data_world.loc[0, ["Active_case","Recovered","Deaths"]]
fig = px.pie(Data_world, values=values, names=labels,color_discrete_sequence=['rgb(200,0,0)','rgb(0,200,0)','rgb(0,0,200)'],hole=0.5,width=800, height=400)
fig.update_layout(
    title='Total cases : '+str(Data_world["Confirmed"][0]),
)
fig.show()

##Cases over time

In [105]:
data_over_time= data.groupby(["ObservationDate"])[["Confirmed","Active_case","Recovered","Deaths"]].sum().reset_index().sort_values("ObservationDate",ascending=True).reset_index(drop=True)
data_over_time.head()

Unnamed: 0,ObservationDate,Confirmed,Active_case,Recovered,Deaths
0,01/22/2020,555,510,28,17
1,01/23/2020,653,605,30,18
2,01/24/2020,941,879,36,26
3,01/25/2020,1438,1357,39,42
4,01/26/2020,2118,2010,52,56


In [106]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Confirmed'],
                    mode='lines',
                    name='Confirmed cases'))

fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Active_case'],
                    mode='lines',marker_color='red',
                    name='Active cases'))

fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Recovered'],
                    mode='lines',
                    name='Recovered cases',marker_color='green'))

fig.add_trace(go.Scatter(x=data_over_time.index, y=data_over_time['Deaths'],name='Deaths',
                                   marker_color='black',mode='lines'))

fig.update_layout(
    title='Cases over time',
    template='plotly_white',
    yaxis_title="Cases",
    xaxis_title="Days",
)
fig.show()

##Cases by countries

In [107]:
Data_per_country = Data.groupby(["Country/Region"])["Confirmed","Active_case","Recovered","Deaths"].sum().reset_index().sort_values("Confirmed",ascending=False).reset_index(drop=True)
Data_per_countryy = Data_per_country.head(10)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



In [108]:
fig = go.Figure()
fig.add_trace(go.Bar(
            x=Data_per_countryy['Active_case'],
            y=Data_per_countryy['Country/Region'],
            name = 'Active cases',
            orientation='h',
            marker_color='#DC3912',))

fig.add_trace(go.Bar(
            x=Data_per_countryy['Recovered'],
            y=Data_per_countryy['Country/Region'],
            name = "Recovered cases",
            orientation='h',
            marker_color='#2CA02C',))

fig.add_trace(go.Bar(
            x=Data_per_countryy['Confirmed'],
            y=Data_per_countryy['Country/Region'],
            name = 'Confirmed cases',
            marker_color= 'black',
            orientation='h'))

fig.update_layout(
    title='Cases In Each Country',
    template='plotly_white',
     xaxis_title="Cases",
    yaxis_title="Countries",

)
fig.show()

###Confirmed cases

In [109]:
fig = px.choropleth(Data_per_country, locations=Data_per_country['Country/Region'],
                    color=Data_per_country['Confirmed'],locationmode='country names', 
                    hover_name=Data_per_country['Country/Region'], 
                    color_continuous_scale=px.colors.sequential.Tealgrn,template='plotly_dark', )
fig.update_layout(
    title='Confirmed Cases In Each Country',
)
fig.show()

In [110]:
data_per_country = data.groupby(["Country/Region","ObservationDate"])[["Confirmed","Active_case","Recovered","Deaths"]].sum().reset_index().sort_values("ObservationDate",ascending=True).reset_index(drop=True)

In [111]:
fig = px.choropleth(data_per_country, locations=data_per_country['Country/Region'],
                    color=data_per_country['Confirmed'],locationmode='country names', 
                    hover_name=data_per_country['Country/Region'], 
                    color_continuous_scale=px.colors.sequential.deep,
                    animation_frame="ObservationDate")
fig.update_layout(

    title='Evolution of confirmed cases In Each Country',
)
fig.show()

###Recovered Cases

In [112]:
Recovered_per_country = Data.groupby(["Country/Region"])["Recovered"].sum().reset_index().sort_values("Recovered",ascending=False).reset_index(drop=True)

In [113]:
fig = px.pie(Recovered_per_country, values=Recovered_per_country['Recovered'], names=Recovered_per_country['Country/Region'],
             title='Recovered cases by countries',
            )
fig.update_traces(textposition='inside', textinfo='percent+label')
fig.update_layout(
    template='plotly_white'
)
fig.show()

###Active Cases

In [114]:
Active_per_country = Data.groupby(["Country/Region"])["Active_case"].sum().reset_index().sort_values("Active_case",ascending=False).reset_index(drop=True)

In [115]:
fig = go.Figure(data=[go.Scatter(
    x=Active_per_country['Country/Region'][0:10],
    y=Active_per_country['Active_case'][0:10],
    mode='markers',
    marker=dict(
        color=10+np.random.randn(200),

        size=Active_per_country['Active_case'][0:10]/15000,
        showscale=True
        )
)])
fig.update_layout(
    title='Active Cases by Countries',
    xaxis_title="Countries",
    yaxis_title="Active Cases",
        template='plotly_white'

)
fig.show()

###Death Cases

In [116]:
Deaths_per_country = Data.groupby(["Country/Region"])["Deaths"].sum().reset_index().sort_values("Deaths",ascending=False).reset_index(drop=True)

In [117]:
fig = go.Figure(data=[go.Bar(
            x=Deaths_per_country['Country/Region'][0:10], y=Deaths_per_country['Deaths'][0:10],
            text=Deaths_per_country['Deaths'][0:10],
            textposition='auto',
            marker_color='black'

        )])
fig.update_layout(
    title='Countries with most deaths',
    xaxis_title="Countries",
    yaxis_title="Deaths",
        template='plotly_white'

)
fig.show()

##Cases in China
Since the virus originated in Wuhan, China - let's take a look at cases in China

What percentage of people who tested positive were from Wuhan or were visiting Wuhan?

In [118]:
#Fill NA values in 'from Wuhan' column
print("Imputing 4 missing values with 0 since >85% of values are 0")

line_list_data['from Wuhan'] = line_list_data['from Wuhan'].fillna(0)

Imputing 4 missing values with 0 since >85% of values are 0


In [119]:
#print("from Wuhan",int(line_list_data['from Wuhan'].sum()))
#print("visiting Wuhan",line_list_data['visiting Wuhan'].sum())
#common people in both columns
count = 0
for i in range(len(line_list_data)):
  if line_list_data['from Wuhan'][i] == 1 and line_list_data['visiting Wuhan'][i] == 1:
    count +=1

print("Percentage of infected people from/visiting Wuhan - {}%".format(np.round((line_list_data['from Wuhan'].sum() + line_list_data['visiting Wuhan'].sum() - count)/len(line_list_data)*100,2)))

Percentage of infected people from/visiting Wuhan - 31.98%


In [120]:
Data_china = data [(data['Country/Region'] == 'Mainland China') ].reset_index(drop=True)
Data_china.head()

Unnamed: 0,SNo,ObservationDate,Province/State,Country/Region,Last Update,Confirmed,Deaths,Recovered,Active_case
0,1,01/22/2020,Anhui,Mainland China,1/22/2020 17:00,1,0,0,1
1,2,01/22/2020,Beijing,Mainland China,1/22/2020 17:00,14,0,0,14
2,3,01/22/2020,Chongqing,Mainland China,1/22/2020 17:00,6,0,0,6
3,4,01/22/2020,Fujian,Mainland China,1/22/2020 17:00,1,0,0,1
4,5,01/22/2020,Gansu,Mainland China,1/22/2020 17:00,0,0,0,0


In [121]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=Data_china['ObservationDate'], y=Data_china['Confirmed'],
                    mode='lines',
                    name='Confirmed cases'))

fig.add_trace(go.Scatter(x=Data_china['ObservationDate'], y=Data_china['Recovered'],
                    mode='lines',
                    name='Recovered cases',marker_color='green'))

fig.add_trace(go.Scatter(x=Data_china['ObservationDate'], y=Data_china['Active_case'],
                    mode='lines',
                    name='Active cases',marker_color='red'))

fig.add_trace(go.Scatter(x=Data_china['ObservationDate'], y=Data_china['Deaths'],name='Deaths',
                                   marker_color='black',mode='lines' ))

fig.update_layout(
    title='Evolution of cases over time in China',
        template='plotly_white'
)
fig.show()

In [122]:
Data_china_last = Data_china[Data_china['ObservationDate'] == max(Data_china['ObservationDate'])].reset_index()
Data_last_china = Data_china_last.groupby(["ObservationDate"])[["Confirmed","Active_case","Recovered","Deaths"]].sum().reset_index()
Data_last_china

Unnamed: 0,ObservationDate,Confirmed,Active_case,Recovered,Deaths
0,09/23/2020,85314,171,80509,4634


In [123]:
colors = ['rgb(255,252,244)','rgb(65,171,93)', 'rgb(200,0,0)']
labels = ["Active cases","Recovered","Deaths"]
values = Data_last_china.loc[0, ["Active_case","Recovered","Deaths"]]

fig = go.Figure(data=[go.Pie(labels=labels,
                             values=values)])
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=20,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.show()

---