# Libraries

### Libraries you probably need to install first



In [13]:
# for calender map
# ! pip install calmap

# to get continent name from country name
# ! pip install pycountry_convert

# to get acess to interactive plots 
# ! pip install plotly

### Load necessary libraries

In [14]:
# datetime oprations
from datetime import timedelta

# for numerical analyiss
import numpy as np

# to access and use dataframes
import pandas as pd

# basic visualization package
import matplotlib.pyplot as plt

# advanced ploting
import seaborn as sns

# interactive visualization
import plotly.express as px
import plotly.graph_objs as go

# import plotly.figure_factory as ff
from plotly.subplots import make_subplots

# for offline ploting
from plotly.offline import plot, iplot, init_notebook_mode
init_notebook_mode(connected=True)

# ignore warnings
import warnings
warnings.filterwarnings('ignore')

### Color pallette

creating a color pallette to make a uniform theme for all the graphs and also reducing redundency.

In [15]:
# color pallette
cnf, dth, rec, act = '#ef476f', '#ffd166', '#06d6a0', '#118ab2' 


# Dataset

In [16]:
# Full data

full_table = pd.read_csv('input/covid_19_clean_complete.csv')
# full_table.head()

In [17]:
# Grouped by day, country

full_grouped = pd.read_csv('input/full_grouped.csv')
full_grouped['Date'] = pd.to_datetime(full_grouped['Date'])
# full_grouped.head()

In [18]:
# Day wise
# ========

day_wise = pd.read_csv('input/day_wise.csv')
day_wise['Date'] = pd.to_datetime(day_wise['Date'])
# day_wise.head()

In [19]:
# Country wise
# ============

country_wise = pd.read_csv('input/country_wise_latest.csv')
country_wise = country_wise.replace('', np.nan).fillna(0)
# country_wise.head()

In [20]:
# Worldometer data
# ================

worldometer_data = pd.read_csv('input/worldometer_data.csv')
worldometer_data = worldometer_data.replace('', np.nan).fillna(0)
# worldometer_data.head()

In [24]:
temp = day_wise[['Date','Deaths', 'Recovered', 'Active']].tail(1)
temp = temp.melt(id_vars="Date", value_vars=['Active', 'Deaths', 'Recovered'])
fig = px.treemap(temp, path=["variable"], values="value", height=225, 
                 color_discrete_sequence=[act, rec, dth])
fig.data[0].textinfo = 'label+text+value'


## Tree map
[Tree Maps](https://en.wikipedia.org/wiki/Treemapping) are primarily used to display data that is grouped and nested in a hierarchical (or tree-based) structure. In this particular example there are only 3 groups of data, thus the graph whould look like rather simple and has only 1 dimension.

In [26]:
fig.show()

In [27]:
def plot_map(df, col, pal):
    df = df[df[col]>0]
    fig = px.choropleth(df, locations="Country/Region", locationmode='country names', 
                  color=col, hover_name="Country/Region", 
                  title=col, hover_data=[col], color_continuous_scale=pal)
#     fig.update_layout(coloraxis_showscale=False)
    fig.show()

## Map Plot

Map plots are perfect for comparing the accumulation of data in different parts of a map, the map itself can be customized in terms of the visible areas. The function used to create a map plot in this example is using a special library and it is of course interactive due to usage of plotly. This map may not be visible in 3rd party websites because of the lack of installing this particular package. Also, there are other variations of this kind of plot that can mark down the spots selected on a map much like google maps.

In [28]:
plot_map(country_wise, 'Confirmed', 'matter')

# Cases over the time

In [31]:
def plot_daywise(col, hue):
    fig = px.bar(day_wise, x="Date", y=col, width=700, color_discrete_sequence=[hue])
    fig.update_layout(title=col, xaxis_title="", yaxis_title="")
    fig.show()

In [32]:
def plot_daywise_line(col, hue):
    fig = px.line(day_wise, x="Date", y=col, width=700, color_discrete_sequence=[hue])
    fig.update_layout(title=col, xaxis_title="", yaxis_title="")
    fig.show()

In [54]:
temp = full_grouped.groupby('Date')['Recovered', 'Deaths', 'Active'].sum().reset_index()
temp = temp.melt(id_vars="Date", value_vars=['Recovered', 'Deaths', 'Active'],
                 var_name='Case', value_name='Count')
#temp.head()

## Area Graph

Area Graphs are Line Graphs but with the area below the line filled in with a certain colour or texture. Area Graphs are drawn by first plotting data points on a Cartesian coordinate grid, joining a line between the points and finally filling in the space below the completed line. In this example, because of the cumulative behavior of data, the graph is always accending.

In [53]:
fig = px.area(temp, x="Date", y="Count", color='Case', height=600, width=700,
             title='Cases over time', color_discrete_sequence = [rec, dth, act])
fig.update_layout(xaxis_rangeslider_visible=True)
fig.show()

## Bar Chart

Bar charts have a discrete domain of categories, and are usually scaled so that all the data can fit on the chart. When there is no natural ordering of the categories being compared, bars on the chart may be arranged in any order. Categorical data is a grouping of data into discrete groups, such as months of the year, age group, shoe sizes, and animals. These categories are usually qualitative. In a column bar chart, the categories appear along the horizontal axis; the height of the bar corresponds to the value of each category.

You may see several bar charts down below, Some of them are cumulative as the data always accent but some of them are not like a 'Number of New Cases Daily' that may differ from one day to another. On the number of Deaths Daily' graph, you can see how good or bad the world adapt to new virus and control the cases also there is a term called 'wave'. A wave implies a rising number of sick individuals, a defined peak, and then a decline.

In [56]:
plot_daywise('Confirmed', '#333333')

In [35]:
plot_daywise('New cases', '#333333')

In [36]:
plot_daywise('Deaths', dth)

In [37]:
plot_daywise('New deaths', dth)

In [38]:
plot_daywise('Recovered', rec)

In [39]:
plot_daywise('New recovered', rec)

## Line graph

Line graphs are used to track changes over short and long periods of time. When smaller changes exist, line graphs are better to use than bar graphs. Line graphs can also be used to compare changes over the same period of time for more than one group.As you can see the X-axis of the graphs are always showing time and on the different graphs the y-axis differs from one to another and for example on the graph below it shows how often a person dies due to a virus, in another world how deadly the virus was at that time.

In [40]:
plot_daywise_line('Deaths / 100 Cases', dth)

In [41]:
plot_daywise_line('Deaths / 100 Recovered', dth)

In [42]:
plot_daywise_line('Recovered / 100 Cases', rec)

This is another Line graph that compares number of cases to recovered ones.

In [57]:
temp = day_wise[['Date', 'Recovered', 'Active']]
temp = temp.melt(id_vars='Date', value_vars=['Recovered', 'Active'], 
                 var_name='Variable', value_name='Count')
px.line(temp, x='Date', y='Count', color='Variable')

Bar plot showing the total number of countries affected by COVID-19, the data capped at 189 as the dataset I'm using has access to only 189 total number of countries.

In [58]:
plot_daywise('No. of countries', '#035aa6')

# Top 20 Countries


In this section, the plots show the top 20 countries in every way that virus affects them: Confirmed, Cases, Deaths, Active, etc. To represent the data in the best way possible I used horizontal bar charts, It's pretty much straightforward. Also, I sorted out the data in descending order as the topic suggests this graph shows the top 20 countries only.

In [59]:
def plot_hbar(df, col, n, hover_data=[]):
    fig = px.bar(df.sort_values(col).tail(n), 
                 x=col, y="Country/Region", color='WHO Region',  
                 text=col, orientation='h', width=700, hover_data=hover_data,
                 color_discrete_sequence = px.colors.qualitative.Dark2)
    fig.update_layout(title=col, xaxis_title="", yaxis_title="", 
                      yaxis_categoryorder = 'total ascending',
                      uniformtext_minsize=8, uniformtext_mode='hide')
    fig.show()

In [60]:
def plot_hbar_wm(col, n, min_pop=1000000, sort='descending'):
    df = worldometer_data[worldometer_data['Population']>min_pop]
    df = df.sort_values(col, ascending=True).tail(n)
    fig = px.bar(df,
                 x=col, y="Country/Region", color='WHO Region',  
                 text=col, orientation='h', width=700, 
                 color_discrete_sequence = px.colors.qualitative.Dark2)
    fig.update_layout(title=col+' (Only countries with > 1M Pop)', 
                      xaxis_title="", yaxis_title="", 
                      yaxis_categoryorder = 'total ascending',
                      uniformtext_minsize=8, uniformtext_mode='hide')
    fig.show()

In [61]:
plot_hbar(country_wise, 'Confirmed', 15)

In [62]:
plot_hbar(country_wise, 'Active', 15)

In [63]:
plot_hbar(country_wise, 'New cases', 15)

In [64]:
plot_hbar(country_wise, 'Deaths', 15)

In [65]:
plot_hbar(country_wise, 'New deaths', 15)

In [66]:
plot_hbar(country_wise, 'Deaths / 100 Cases', 15)

In [67]:
plot_hbar(country_wise, 'Recovered', 15)

In [68]:
plot_hbar(country_wise, 'New recovered', 15)

In [69]:
plot_hbar(country_wise, 'Recovered / 100 Cases', 15)

In [70]:
plot_hbar(country_wise, '1 week change', 15)

In [71]:
plot_hbar(country_wise, '1 week % increase', 15)

In [72]:
plot_hbar_wm('Tot Cases/1M pop', 15, 1000000)

In [74]:
plot_hbar_wm('Deaths/1M pop', 15, 1000000)

In [75]:
plot_hbar_wm('TotalTests', 15, 1000000)

In [76]:
plot_hbar_wm('Tests/1M pop', 15)

## Stacked Bar Plot

A stacked bar graph (or stacked bar chart) is a chart that uses bars to show comparisons between categories of data, but with ability to break down and compare parts of a whole. Each bar in the chart represents a whole, and segments in the bar represent different parts or categories of that whole.

In this example the longer a country's bar is representing more people affected by that cause.

you may notice I used the 2000 reports with most values that's simply due to reducing lags and make the notebook a bit easier to load all this plots are interactive so you may zoom in and investigate the data for yourself.

In [98]:
def plot_stacked(col):
    fig = px.bar(full_grouped.sort_values(col).tail(2000), x="Date", y=col, color='Country/Region', 
                 height=600, title=col, 
                 color_discrete_sequence = px.colors.cyclical.mygbm)
    fig.update_layout(showlegend=True)
    fig.show()

In [93]:
plot_stacked('Confirmed')

In [95]:
plot_stacked('Deaths')

In [96]:
plot_stacked('New cases')

## Line plots

A similiar representation to Stacked bar plot. It's equivelant to connecting the bars in bar plot. Once again the data used to represent this graphs are only 2000 highest value reports to reduce the lag.

In [102]:
def plot_line(col):
    fig = px.line(full_grouped.sort_values(col).tail(2000), x="Date", y=col, color='Country/Region', 
                  height=600, title=col, 
                  color_discrete_sequence = px.colors.cyclical.mygbm)
    fig.update_layout(showlegend=True)
    fig.show()

In [103]:
plot_line('Confirmed')

In [104]:
plot_line('Deaths')

In [105]:
plot_line('Active')

In [106]:
plot_line('New cases')