In [None]:
pip install folium --quiet

In [None]:
pip install plotly --quiet

In [None]:
pip install pandas --quiet 

In [None]:
#Importing relevant modules 

#with plotly library, only a few lines of codes are necessary 
#to create aesthetically pleasing, interactive plots. 
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
#Folium is a Python library used for visualizing geospatial data. 
#It is easy to use and yet a powerful library. Folium is a Python wrapper for Leaflet. js 
#which is a leading open-source JavaScript library for plotting interactive maps.
import folium


import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

import math
import random
#timedelta submodule generally used for calculating differences 
#in dates and also can be used for date manipulations in Python.
#It also provides us with custom dates.
from datetime import timedelta

import warnings
#This ignores warnings issued by the compiler
warnings.filterwarnings('ignore') 

#color templates/palletes to be used later in the code
cnf = "#393e46"
dth = "#ff2e63"
rec = "#21bf73"
act = "#fe9801"

In [None]:
import plotly as py
#Below code allows plotly to work in offline mode as well
py.offline.init_notebook_mode(connected = True)

In [None]:
import os

In [None]:
try:
    os.system("rm -rf Covid-19-Preprocessed-Dataset")
except:
    print("No such directory exists")

In [None]:
!git clone https://github.com/laxmimerit/Covid-19-Preprocessed-Dataset.git

In [None]:
df = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/covid_19_data_cleaned.csv', parse_dates = ['Date'])
countrywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/countrywise.csv')
country_daywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/country_daywise.csv', parse_dates = ['Date'])
daywise = pd.read_csv('Covid-19-Preprocessed-Dataset/preprocessed/daywise.csv', parse_dates = ['Date'])

In [None]:
df['Province/State'] = df['Province/State'].fillna("")
df.head()

In [None]:
confirmed = df.groupby('Date').sum()['Confirmed'].reset_index()
recovered = df.groupby('Date').sum()['Recovered'].reset_index()
deaths = df.groupby('Date').sum()['Deaths'].reset_index()
deaths

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df.query('20211201 < Date < 20220301' and 'Country == "Azerbaijan"')

# Total Confirmed, Recovered Cases and Deaths Worldwide

In [None]:
confirmed.tail()

In [None]:
recovered.tail()

In [None]:
deaths.tail()

In [None]:
#Creates a 2D canvas upon which confirmed, recovered cases and 
#deaths will be plotted

fig = go.Figure()

#adding plots to the canvas(displayed below are first 100 rows of the DataFrame)
fig.add_trace(go.Scatter(x = confirmed['Date'].head(100), y = confirmed['Confirmed'].head(100), mode = 'lines+markers', name = 'Confirmed Cases', line = dict(color = "Orange", width = 1)))
fig.add_trace(go.Scatter(x = recovered['Date'].head(100), y = recovered['Recovered'].head(100), mode = 'lines+markers', name = 'Recovered Cases', line = dict(color = "Green", width = 1)))
fig.add_trace(go.Scatter(x = deaths['Date'].head(100), y = deaths['Deaths'].head(100), mode = 'lines+markers', name = 'Deaths', line = dict(color = "Red", width = 1)))
fig.update_layout(title = 'Global Covid-19 Data', xaxis = dict(title = 'Date', titlefont_size = 15, tickfont_size = 15), yaxis = dict(title = 'Number of Cases', titlefont_size = 15))
fig.show()

# COVID-19 Case Density Plot On World Map

In [None]:
df['Date'] = df['Date'].astype(str)
df.info()

In [None]:
#The code below produces a heat map of covid cases by state / province accross various countries


fig = px.density_mapbox(df, lat = 'Lat', lon = 'Long', hover_name = 'Country', hover_data = ['Confirmed', 'Recovered', 'Deaths'], animation_frame = 'Date', color_continuous_scale = 'Portland', radius = 7, zoom = 0, height = 700)
fig.update_layout(title = "Worldwide COVID-19 Cases Against Dates")
fig.update_layout(mapbox_style = 'open-street-map', mapbox_center_lon = 0)

fig.show()

# COVID-19 Cases Over Time With Area Plot

In [None]:
#Representing data for only the first 100 entries for clarity of the plot
cases = df.groupby('Date')['Confirmed', 'Recovered', 'Deaths', 'Active'].sum().reset_index().head(100)
cases = cases[cases['Date'] == max(cases['Date'])].reset_index(drop = True)
#Plotting the active, recovered and death cases on a melt plot, 
#here each subrectangle (with a different color) represents one variable, and their relative width is based on their value.
cs = cases.melt(id_vars = 'Date', value_vars = ['Active', 'Recovered', 'Deaths'])

fig = px.treemap(cs, path = ['variable'], values = 'value', height = 250, width = 800, color_discrete_sequence = [act, rec, dth])
fig.data[0].textinfo = 'label+text+value'

fig.show()

In [None]:
#Area plot representing active, recovered and death cases (showing only first 100 entries to fit to plot)
cases = df.groupby('Date')['Active', 'Recovered', 'Deaths'].sum().reset_index().head(100)
cases = cases.melt(id_vars = 'Date', value_vars = ['Active', 'Recovered', 'Deaths'], var_name = 'Case', value_name = 'Count')

fig = px.area(cases, x = 'Date', y = 'Count', color = 'Case', height = 600, title = 'Cases over time', color_discrete_sequence = [act, rec, dth])
fig.update_layout(xaxis_rangeslider_visible = True)
fig.show()

# Folium Maps

In [None]:
#Encircling locations on a world map for latest covid cases. Information 
#displayed about Confirmed, recovered and death cases
cases = df[df['Date'] == max(df['Date'])]
cases

area_map = folium.Map(location = [0, 0], tiles = 'cartodbpositron', min_zoom = 1, max_zoom = 4, zoom_start = 1)
for i in range(0, len(cases)):
    folium.Circle(location = [cases.iloc[i]['Lat'], cases.iloc[i]['Long']], color = 'red', fill = 'red',
                  tooltip = ('<l1><bold> Country: ' + str(cases.iloc[i]['Country']) + 
                            '<l1><bold> Province/State: ' + str(cases.iloc[i]['Province/State']) + 
                            '<l1><bold> Confirmed Cases: ' + str(cases.iloc[i]['Confirmed']) + 
                            '<l1><bold> Recovered Cases: ' + str(cases.iloc[i]['Recovered']) +
                            '<l1><bold> Deaths: ' + str(cases.iloc[i]['Deaths'])),
                  radius = int(cases.iloc[i]['Confirmed'])**0.5).add_to(area_map)
area_map

# Confirmed Cases Analysis with Choropleth Maps

In [None]:
#Choropleth maps are thematic maps in which areas are sided in proportion
#of the confirmed cases. More confirmed cases are indicated by a darker color
#and lighter colors for less cases.

#using plotly express to design the map

In [None]:
#Countries with darker colors (near black) have lowest confirmed cases and countries
# near the yellow color have the highest confirmed cases. These colors change 
#over time as confirmed cases values changes over the following dates.

#color changes are a logarthmic function of the total confirmed cases in a particular country

fig = px.choropleth(country_daywise, locations = 'Country', locationmode = 'country names', color = np.log(country_daywise['Confirmed']),
                    hover_name = 'Country', animation_frame = country_daywise['Date'].dt.strftime('%Y-%m-%d'),
                    title = 'Cases over time', color_continuous_scale = px.colors.sequential.Inferno)
fig.update(layout_coloraxis_showscale = True)
fig.show()

# Analysis of Deaths and Recoveries per 100 Cases

In [None]:
#Analysis of confirmed and death cases using bar plots

fig_conf = px.bar(daywise, x = 'Date', y = 'Confirmed', color_discrete_sequence = [act])
fig_dth = px.bar(daywise, x = 'Date', y = 'Deaths', color_discrete_sequence = [dth])

fig = make_subplots(rows = 1, cols = 2, shared_xaxes = False, horizontal_spacing = 0.1, 
                    subplot_titles = ['Confirmed Cases', 'Death Cases'])

fig.add_trace(fig_conf['data'][0], row = 1, col = 1)
fig.add_trace(fig_dth['data'][0], row = 1, col = 2)
fig.update_traces(dict(marker_line_width = 0))

fig.update_layout(height = 350)

fig.show()

# Confirmed and Death Cases using Static Colormap

In [None]:
fig_conf = px.choropleth(countrywise, locations = 'Country', locationmode = 'country names',
                         color = np.log(countrywise['Confirmed']), hover_name = 'Country',
                         hover_data = ['Confirmed'])

temp = countrywise[countrywise['Deaths'] > 0]
fig_dth = px.choropleth(temp, locations = 'Country', locationmode = 'country names',
                         color = np.log(temp['Deaths']), hover_name = 'Country',
                         hover_data = ['Deaths'])

fig = make_subplots(rows = 1, cols = 2, subplot_titles = ['Confirmed', 'Deaths'],
                   specs = [[{'type' : 'choropleth'}, {'type' : 'choropleth'}]])

fig.add_trace(fig_conf['data'][0], row = 1, col = 1)
fig.add_trace(fig_dth['data'][0], row = 1, col = 2)

fig.update(layout_coloraxis_showscale = False)

fig.show()

# Deaths and Recoveries per 100 Cases

In [None]:
#Analysis of deaths per 100 cases, recoveries per 100 cases and
#deaths per 100 recoveries using line graphs
fig1 = px.line(daywise, x = 'Date', y = 'Deaths / 100 Cases', color_discrete_sequence=[dth])
fig2 = px.line(daywise, x = 'Date', y = 'Recovered / 100 Cases', color_discrete_sequence=[rec])
fig3 = px.line(daywise, x = 'Date', y = 'Deaths / 100 Recovered', color_discrete_sequence=[cnf])

fig = make_subplots(rows = 1, cols = 3, shared_xaxes = False,
                   subplot_titles = ('Deaths / 100 cases', 'Recoveries / 100 Cases', 'Deaths / 100 Recoveries'))

fig.add_trace(fig1['data'][0], row = 1, col = 1)
fig.add_trace(fig2['data'][0], row = 1, col = 2)
fig.add_trace(fig3['data'][0], row = 1, col = 3)

fig.update_layout(height = 400)

fig.show()

# New Cases Per Day and No. Of Countries Affected

In [None]:
fig_con = px.bar(daywise, x = 'Date', y = 'New Cases', color_discrete_sequence = [cnf])
fig_dth = px.bar(daywise, x = 'Date', y = 'No. of Countries', color_discrete_sequence = [dth])

fig = make_subplots(rows = 1, cols = 2, shared_xaxes = False, horizontal_spacing = 0.1,
                   subplot_titles = ('No. of New Cases per day', 'No. of Countries'))

fig.add_trace(fig_con['data'][0], row = 1, col = 1)
fig.add_trace(fig_dth['data'][0], row = 1, col = 2)
fig.update_traces(dict(marker_line_width = 0))

fig.show()


# Top 20 Countries Cases Analysis

In [None]:
top = 20
fig_con = px.bar(countrywise.sort_values('Confirmed').tail(top), x = 'Confirmed', y = 'Country',
                text = 'Confirmed', orientation = 'h', color_discrete_sequence = [cnf])
fig_dth = px.bar(countrywise.sort_values('Deaths').tail(top), x = 'Deaths', y = 'Country',
                text = 'Deaths', orientation = 'h', color_discrete_sequence = [dth])

fig_act = px.bar(countrywise.sort_values('Active').tail(top), x = 'Confirmed', y = 'Country',
                text = 'Active', orientation = 'h', color_discrete_sequence = [act])
fig_rec = px.bar(countrywise.sort_values('Recovered').tail(top), x = 'Deaths', y = 'Country',
                text = 'Recovered', orientation = 'h', color_discrete_sequence = [rec])

fig_dthc = px.bar(countrywise.sort_values('Deaths / 100 Cases').tail(top), x = 'Deaths / 100 Cases', y = 'Country',
                text = 'Deaths / 100 Cases', orientation = 'h', color_discrete_sequence = ['#f78345'])
fig_nc = px.bar(countrywise.sort_values('New Cases').tail(top), x = 'New Cases', y = 'Country',
                text = 'New Cases', orientation = 'h', color_discrete_sequence = ['#e82734'])

fig_cpm = px.bar(countrywise.sort_values('Cases / Million People').tail(top), x = 'Cases / Million People', y = 'Country',
                text = 'Cases / Million People', orientation = 'h', color_discrete_sequence = ['#b38472'])
fig_clw = px.bar(countrywise.sort_values('Confirmed last week').tail(top), x = 'Confirmed last week', y = 'Country',
                text = 'Confirmed last week', orientation = 'h', color_discrete_sequence = ['#c82374'])

fig_wc = px.bar(countrywise.sort_values('1 week change').tail(top), x = '1 week change', y = 'Country',
                text = '1 week change', orientation = 'h', color_discrete_sequence = ['#d32435'])
fig_pwc = px.bar(countrywise.sort_values('1 week % increase').tail(top), x = '1 week % increase', y = 'Country',
                text = '1 week % increase', orientation = 'h', color_discrete_sequence = ['#a12392'])

fig = make_subplots(rows = 5, cols = 2, shared_xaxes = False, horizontal_spacing = 0.2, vertical_spacing = 0.05,
                   subplot_titles = ('Top 20 Countries by Confirmed Cases', 'Top 20 Countries by Deaths',
                                     'Top 20 Countries by Active Cases',
                                     'Top 20 Countries by Recovered', 
                                     'Deaths per 100 Cases', 'New Cases', 'Cases per Million People', 
                                    'Confirmed Last Week', '1 Week Change', '1 Week % Change'))

fig.add_trace(fig_con['data'][0], row = 1, col = 1)
fig.add_trace(fig_dth['data'][0], row = 1, col = 2)
fig.add_trace(fig_act['data'][0], row = 2, col = 1)
fig.add_trace(fig_rec['data'][0], row = 2, col = 2)
fig.add_trace(fig_dthc['data'][0], row = 3, col = 1)
fig.add_trace(fig_nc['data'][0], row = 3, col = 2)
fig.add_trace(fig_cpm['data'][0], row = 4, col = 1)
fig.add_trace(fig_clw['data'][0], row = 4, col = 2)
fig.add_trace(fig_wc['data'][0], row = 5, col = 1)
fig.add_trace(fig_pwc['data'][0], row = 5, col = 2)
fig.update_layout(height = 3000, width = 1000)

fig.show()

# Deaths vs Confirmed Cases Scatter Plot

In [None]:
#Here we use a scatter plot to highlight the extent of deaths vs the number 
#of confirmed cases in each country.
fig = px.scatter(countrywise.sort_values('Deaths', ascending = False).iloc[:20],
                x = 'Confirmed', y = 'Deaths', color = 'Country', size = 'Confirmed', height = 800,
                text = 'Country', log_x = True, log_y = True, title = 'Deaths vs Confirmed Cases (Cases are on log10 scale)')

fig.update_traces(textposition = 'top center')
fig.update_layout(showlegend = True, xaxis_rangeslider_visible = True)

# Confirmed Cases, Deaths, New Cases vs Country and Date

In [None]:
fig = px.bar(country_daywise, x = 'Date', y = 'Confirmed', color = 'Country', height = 600,
            title = 'Confirmed Cases', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

In [None]:
fig = px.bar(country_daywise, x = 'Date', y = 'Deaths', color = 'Country', height = 600,
            title = 'Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

In [None]:
fig = px.bar(country_daywise, x = 'Date', y = 'Recovered', color = 'Country', height = 600,
            title = 'Recovered Cases', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

# Line Plot

In [None]:
fig = px.line(country_daywise, x = 'Date', y = 'Confirmed', color = 'Country', height = 600,
             title = 'Confirmed Cases', color_discrete_sequence = px.colors.cyclical.mygbm)
fig.show()

fig = px.line(country_daywise, x = 'Date', y = 'Deaths', color = 'Country', height = 600,
             title = 'Deaths', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

fig = px.line(country_daywise, x = 'Date', y = 'Recovered', color = 'Country', height = 600,
             title = 'Recovered Cases', color_discrete_sequence = px.colors.cyclical.mygbm)

fig.show()

# Case Growth Rate After First 100 Cases

In [None]:
cond = country_daywise[country_daywise['Confirmed'] > 100]['Country'].unique()
temp = df[df['Country'].isin(cond)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed'] > 100]

min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Base Date']

from_100th_case = pd.merge(temp, min_date, on = 'Country')
from_100th_case['N days'] = (pd.to_datetime(from_100th_case['Date']) - pd.to_datetime(from_100th_case['Base Date'])).dt.days

fig = px.line(from_100th_case, x = 'N days', y = 'Confirmed', color = 'Country',
             title = 'N Days after 100 Cases', height = 600)

fig.show()

# Case Growth Rate After First 1000 Cases

In [None]:
cond = country_daywise[country_daywise['Confirmed'] > 100]['Country'].unique()
temp = df[df['Country'].isin(cond)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed'] > 1000]

min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Base Date']

from_1000th_case = pd.merge(temp, min_date, on = 'Country')
from_1000th_case['N days'] = (pd.to_datetime(from_1000th_case['Date']) - pd.to_datetime(from_1000th_case['Base Date'])).dt.days

fig = px.line(from_1000th_case, x = 'N days', y = 'Confirmed', color = 'Country',
             title = 'N Days after 1000 Cases', height = 600)

fig.show()

# Growth Rate After 10000 and 100000 Cases

In [None]:
cond = country_daywise[country_daywise['Confirmed'] > 100]['Country'].unique()
temp = df[df['Country'].isin(cond)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed'] > 10000]

min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Base Date']

from_10000th_case = pd.merge(temp, min_date, on = 'Country')
from_10000th_case['N days'] = (pd.to_datetime(from_10000th_case['Date']) - pd.to_datetime(from_10000th_case['Base Date'])).dt.days

fig = px.line(from_10000th_case, x = 'N days', y = 'Confirmed', color = 'Country',
             title = 'N Days after 10000 Cases', height = 600)

fig.show()


cond = country_daywise[country_daywise['Confirmed'] > 100]['Country'].unique()
temp = df[df['Country'].isin(cond)]

temp = temp.groupby(['Country', 'Date'])['Confirmed'].sum().reset_index()
temp = temp[temp['Confirmed'] > 100000]

min_date = temp.groupby('Country')['Date'].min().reset_index()
min_date.columns = ['Country', 'Base Date']

from_100000th_case = pd.merge(temp, min_date, on = 'Country')
from_100000th_case['N days'] = (pd.to_datetime(from_100000th_case['Date']) - pd.to_datetime(from_100000th_case['Base Date'])).dt.days

fig = px.line(from_100000th_case, x = 'N days', y = 'Confirmed', color = 'Country',
             title = 'N Days after 100000 Cases', height = 600)

fig.show()