In [None]:
!pip install adjustText

Collecting adjustText
  Downloading https://files.pythonhosted.org/packages/9e/15/4157718bf323fd5f5b81c891c660d0f388e042d2689a558bf1389632dc44/adjustText-0.7.3.tar.gz
Building wheels for collected packages: adjustText
  Building wheel for adjustText (setup.py) ... [?25l[?25hdone
  Created wheel for adjustText: filename=adjustText-0.7.3-cp36-none-any.whl size=7099 sha256=c034bf6cec09441f5f8efc1dee352497b60bd2b089b8e4b117268a2ba9cabce5
  Stored in directory: /root/.cache/pip/wheels/41/95/74/7d347e136d672f8bc28e937032bc92baf4f80856763a7e7b72
Successfully built adjustText
Installing collected packages: adjustText
Successfully installed adjustText-0.7.3


In [None]:
# Data from here:
# https://www.ecdc.europa.eu/en/publications-data/download-todays-data-geographic-distribution-covid-19-cases-worldwide
import pandas as pd
import numpy as np
import seaborn as sns
from scipy import stats
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
from adjustText import adjust_text
from matplotlib.ticker import ScalarFormatter

from bokeh.models import ColumnDataSource, LabelSet, Arrow, NormalHead, OpenHead, VeeHead, ColorBar, Span
from bokeh.models import Label, Title, NumeralTickFormatter
from bokeh.plotting import figure, output_file, show
from bokeh.transform import linear_cmap
from bokeh.palettes import RdBu, Spectral10, Spectral6

In [None]:
# READ, RENAME
covid_cumulative = pd.read_excel("covid_a_master_cumulative_table.xlsx")
covid_cumulative.drop(columns=['Unnamed: 0'], inplace=True)

## FILTER THE LAST DAY OF THE RESULTS AND PICK THE TOP N IN # OF CASES

In [None]:
# N: HOW MANY TOP COUNTRIES YOU WANT TO PICK IN TERMS OF # OF CASES
N = 50
last_day = covid_cumulative.sort_values('Date', ascending=True).groupby('Countries').last()
last_day['Countries'] = last_day.index
last_day.sort_values('Cases', ascending=False, inplace=True)

top_countries = last_day[:N]['Countries'].tolist() + ['Greece'] 
covid_top_countries = covid_cumulative[covid_cumulative.Countries.isin(top_countries)]

## GENERATE THE RELEVANT METRICS: Case fatality rate, Infection rate per 1m AND Mortality rate per 1m


In [None]:
max_cases = covid_top_countries.groupby(by=['Countries']).max()
max_cases['Case_fatality_rate'] = max_cases['Deaths']/max_cases['Cases']*100
max_cases.sort_values('Cases', ascending=False)
max_cases['Infection_rate_per_1m'] = max_cases['Cases']/max_cases['Population']*1000000
max_cases['Mortality_rate_per_1m'] = max_cases['Deaths']/max_cases['Population']*1000000
# max_cases.to_excel('covid_b_cases_cumulative_table.xlsx')

In [None]:
max_cases.loc[max_cases['Continent'] == 'Asia', 'Color'] =  Spectral10[0]
max_cases.loc[max_cases['Continent'] == 'Europe', 'Color'] =  Spectral10[3]
max_cases.loc[max_cases['Continent'] == 'America', 'Color'] =  Spectral10[6]
max_cases.loc[max_cases['Continent'] == 'Africa', 'Color'] =  Spectral10[9]

In [None]:
Average_infection_rate_per_1m = max_cases['Cases'].sum()/max_cases['Population'].sum()*1000000
print("The average infection rate is: {:.1f} per 1m people".format(Average_infection_rate_per_1m))
Average_death_rate_per_1m = max_cases['Deaths'].sum()/max_cases['Population'].sum()*1000000
print("The average death rate is: {:.1f} per 1m people".format(Average_death_rate_per_1m))
Average_case_fatality_rate = max_cases['Deaths'].sum()/max_cases['Cases'].sum()*100
print("The average mortality rate is: {:.1f} per 100 people infected".format(Average_case_fatality_rate))

The average infection rate is: 3852.8 per 1m people
The average death rate is: 134.2 per 1m people
The average mortality rate is: 3.5 per 100 people infected


In [None]:
source_1 = ColumnDataSource(max_cases)

In [None]:
TITLE = "Infection rate vs Case fatality rate"
TOOLS = "hover, pan, wheel_zoom, box_zoom, reset, save"
p = figure(title=TITLE, tools=TOOLS, toolbar_location="above", plot_width=1200, x_axis_type="log")
p.toolbar.logo = "grey"
p.background_fill_color = "#dddddd"

p.xaxis.axis_label = "Infection rate per 1m"
p.yaxis.axis_label = "Case fatality rate (%)"
p.xaxis.axis_label_text_font_size = "14pt"
p.yaxis.axis_label_text_font_size = "14pt"
p.xaxis.major_label_text_font_size = "14pt"
p.yaxis.major_label_text_font_size = "14pt"
p.xaxis.formatter = NumeralTickFormatter(format="0,0")
p.grid.grid_line_color = "white"

# #Use the field name of the column source
# mapper = linear_cmap('Infection_rate_per_1m', 'Turbo256', max_cases['Infection_rate_per_1m'].min(), max_cases['Infection_rate_per_1m'].max())

p.circle("Infection_rate_per_1m", "Case_fatality_rate", size=12, source=source_1, 
#          fill_color=linear_cmap('Infection_rate_per_1m', 'Turbo256', max_cases['Infection_rate_per_1m'].min(), max_cases['Infection_rate_per_1m'].max()),
         legend_group='Continent',
         color='Color',
         line_color='black',
         fill_alpha=0.8)

p.hover.tooltips = [
    ("Country", "@{Countries}"),
    ("Infection rate per 1m", "@Infection_rate_per_1m{0,0}"),
    ("Case fatality rate (%)", "@Case_fatality_rate{0.1}" ),
    ("Cases", "@Cases{0,0}"),
    ("Deaths", "@Deaths{0,0}"),
]

labels = LabelSet(x="Infection_rate_per_1m", y="Case_fatality_rate", text="Countries", y_offset=8,
                  text_font_size="11px", text_color="black", text_font_style='bold',
                  source=source_1, text_align='center')
p.add_layout(labels)

hline = Span(location=Average_case_fatality_rate,
             dimension='width', line_color='black',
             line_dash='dashed', line_width=1)
p.add_layout(hline)

vline = Span(location=Average_infection_rate_per_1m,
             dimension='height', line_color='black',
             line_dash='dashed', line_width=1)
p.add_layout(vline)

citation = Label(x=100, y=12, text='Dashed lines show the average of each axis', 
                 text_font_size="12px", 
                 text_color="black", 
                 text_font_style='bold', 
                 render_mode='css',
                 border_line_color='black', 
                 border_line_alpha=1.0,
                 background_fill_color='white', 
                 background_fill_alpha=1.0)
p.add_layout(citation)

# color_bar = ColorBar(color_mapper=mapper['transform'], width=8,  location=(0,0))
# p.add_layout(color_bar, 'right')

output_file("infection_rate_vs_case_fatality.html", title="infection rate vs case fatality")

show(p)

------------------------------NEW GENERATED CODE BLOCK STARTS HERE-----------------------------------

In [None]:
TITLE = "Infection rate vs Mortality rate"
TOOLS = "hover, pan, wheel_zoom, box_zoom, reset, save"
p1 = figure(title=TITLE, tools=TOOLS, toolbar_location="above", plot_width=1200, x_axis_type="log", y_axis_type="log")
p1.toolbar.logo = "grey"
p1.background_fill_color = "#dddddd"

p1.xaxis.axis_label = "Infection rate per 1m"
p1.yaxis.axis_label = "Mortality rate per 1m"
p1.xaxis.axis_label_text_font_size = "14pt"
p1.yaxis.axis_label_text_font_size = "14pt"
p1.xaxis.major_label_text_font_size = "14pt"
p1.yaxis.major_label_text_font_size = "14pt"
p1.xaxis.formatter = NumeralTickFormatter(format="0,0")
p1.grid.grid_line_color = "white"

# #Use the field name of the column source
# mapper = linear_cmap('Infection_rate_per_1m', 'Turbo256', max_cases['Infection_rate_per_1m'].min(), max_cases['Infection_rate_per_1m'].max())

p1.circle("Infection_rate_per_1m", "Mortality_rate_per_1m", size=12, source=source_1, 
#          fill_color=linear_cmap('Infection_rate_per_1m', 'Turbo256', max_cases['Infection_rate_per_1m'].min(), max_cases['Infection_rate_per_1m'].max()),
         legend_group='Continent',
         color='Color',
         line_color='black',
         fill_alpha=0.8)

p1.hover.tooltips = [
    ("Country", "@{Countries}"),
    ("Infection rate per 1m", "@Infection_rate_per_1m{0,0}"),
    ("Mortality rate per 1m", "@Mortality_rate_per_1m{0,0}" ),
    ("Cases", "@Cases{0,0}"),
    ("Deaths", "@Deaths{0,0}"),
]

labels = LabelSet(x="Infection_rate_per_1m", y="Mortality_rate_per_1m", text="Countries", y_offset=8,
                  text_font_size="11px", text_color="black", text_font_style='bold',
                  source=source_1, text_align='center')
p1.add_layout(labels)

hline = Span(location=Average_death_rate_per_1m,
             dimension='width', line_color='black',
             line_dash='dashed', line_width=1)
p1.add_layout(hline)

vline = Span(location=Average_infection_rate_per_1m,
             dimension='height', line_color='black',
             line_dash='dashed', line_width=1)
p1.add_layout(vline)

citation = Label(x=100, y=12, text='Dashed lines show the average of each axis', 
                 text_font_size="12px", 
                 text_color="black", 
                 text_font_style='bold', 
                 render_mode='css',
                 border_line_color='black', 
                 border_line_alpha=1.0,
                 background_fill_color='white', 
                 background_fill_alpha=1.0)
p1.add_layout(citation)

# color_bar = ColorBar(color_mapper=mapper['transform'], width=8,  location=(0,0))
# p.add_layout(color_bar, 'right')

output_file("infection_rate_vs_mortality_rate.html", title="infection rate vs Mortality rate")

show(p1)