In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from plotly import subplots
import plotly.figure_factory as ff
import matplotlib.pyplot as plt

from pandas_profiling import ProfileReport
import seaborn as sns
from sklearn import metrics
from scipy import stats

from copy import deepcopy

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# Defining all our palette colours.
primary_blue = "#496595"
primary_blue2 = "#85a1c1"
primary_blue3 = "#3f4d63"
primary_grey = "#c6ccd8"
primary_grey2 = "#696969"
primary_black = "#202022"
primary_bgcolor = "#f4f0ea"

primary_green = px.colors.qualitative.Plotly[2]

# <p style="background-color:skyblue; font-family:newtimeroman; font-size:250%; text-align:center; border-radius: 15px 50px;">üíâ COVID-19 ü¶†üß¨ World Vaccination Progress üíâ</p>

The data contains the following information:  

* **Country** - this is the country for which the vaccination information is provided;     
* **Country ISO Code** - ISO code for the country;   
* **Date**- date for the data entry; for some of the dates we have only the daily vaccinations, for others, only the (cumulative) total;   
* **Total number of vaccin ations** - this is the absolute number of total immunizations in the country;  
* **Total number of people vaccinated** - a person, depending on the immunization scheme, will receive one or more (typically 2) vaccines; at a certain moment, the number of vaccination might be larger than the number of people;  
* **Total number of people fully vaccinated** - this is the number of people that received the entire set of immunization according to the immunization scheme (typically 2); at a certain moment in time, there might be a certain number of people that received one vaccine and another number (smaller) of people that received all vaccines in the scheme;  
* **Daily vaccinations (raw)** - for a certain data entry, the number of vaccination for that date/country;  
* **Daily vaccinations** - for a certain data entry, the number of vaccination for that date/country;  
* **Total vaccinations per hundred** - ratio (in percent) between vaccination number and total population up to the date in the country;  
* **Total number of people vaccinated per hundred** - ratio (in percent) between population immunized and total population up to the date in the country;  
* **Total number of people fully vaccinated per hundred** - ratio (in percent) between population fully immunized and total population up to the date in the country;   
* **Number of vaccinations per day** - number of daily vaccination for that day and country;   
* **Daily vaccinations per million** -  ratio (in ppm) between vaccination number and total population for the current date in the country;    
* **Vaccines used in the country** - total number of vaccines used in the country (up to date);    
* **Source name** - source of the information (national authority, international organization, local organization etc.);   
* **Source website** - website of the source of information;

In [None]:
df = pd.read_csv('/kaggle/input/d/gpreda/covid-world-vaccination-progress/country_vaccinations.csv')
df.head()

In [None]:
report = ProfileReport(df)

In [None]:
report

<a id='1'></a>
# <p style="background-color:skyblue; font-family:newtimeroman; font-size:150%; text-align:center; border-radius: 15px 50px;">1. Data cleaning ‚öôÔ∏è</p>

We are going to fix some of the missings so the EDA becomes easier and clear

In [None]:
df.info()

In [None]:
df.loc[df['iso_code'].isnull(), 'country'].value_counts()

We can fix this easyly with imputation ('GBR') 

In [None]:
df['iso_code'] = df['iso_code'].fillna('GBR')

### Columns to drop

As we can see, 'daily_vaccinations_raw' has a cleaned variable 'daily_vaccinations' with less missing values, so we can now drop 'daily_vaccinations_raw'

In [None]:
df = df.drop('daily_vaccinations_raw', axis=1)

### Lets transform date column

In [None]:
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date', ascending=True)

df['date'] = df['date'].dt.strftime('%Y-%m-%d')

In [None]:
unique_dates = df['date'].unique()

As we can see, not all of the ISO_CODEs has all the dates. So we are going to create a row for those dates with missing values.

In [None]:
original_df = df.copy()

In [None]:
for iso_code in df['iso_code'].unique():
    for inc_date in unique_dates:
        if df.loc[df['iso_code'] == iso_code, 'date'].str.contains(inc_date).any():
            continue
        else:
            df.loc[len(df)] = [None, iso_code, inc_date] + 11 * [None]

In [None]:
df.shape

Now re-order the dataframe based on '**date**' so we can plot correctly later

In [None]:
df['date'] = pd.to_datetime(df['date'])
df = df.sort_values('date', ascending=True)

df['date'] = df['date'].dt.strftime('%m-%d-%Y')

df.head()

<a id='2'></a>
# <p style="background-color:skyblue; font-family:newtimeroman; font-size:150%; text-align:center; border-radius: 15px 50px;">2. Data visualization üìä by Countries</p>

Special thanks to Sharlto (https://www.kaggle.com/dwin183287/covid-19-world-vaccination) for this amazing resume chart.

In [None]:
# I will adapt this to Plotly in future versions
plt.rcParams['figure.dpi'] = 300

fig = plt.figure(figsize=(5, 0.8), facecolor='#f6f5f5')
gs = fig.add_gridspec(1, 1)
gs.update(wspace=0, hspace=0)

background_color = "#f6f5f5"

ax0 = fig.add_subplot(gs[0, 0])

ax0.set_facecolor(primary_bgcolor)
for s in ["top","right", 'left', 'bottom']:
    ax0.spines[s].set_visible(False)
ax0.set_xticks([])
ax0.set_yticks([])

ax0.grid(which='major', axis='y', zorder=0, color='#EEEEEE')
ax0.text(-0.12, 0.8, 'General Overview', color=primary_black, fontsize=8, ha='left', weight='bold', va='bottom')
ax0.text(-0.12, 0.79, 'A quick glance of world vaccination progress', color='#292929', fontsize=6, ha='left', va='top')

ax0.text(0, 0, '150', color=primary_blue, fontsize=20, ha='center', weight='bold', va='bottom')
ax0.text(0, 0, 'country with\nVaccination Program', color=primary_grey2, fontsize=6, ha='center', va='top', weight='bold')

ax0.text(0.2, 0, '10', color=primary_blue, fontsize=20, ha='center', weight='bold', va='bottom')
ax0.text(0.2, 0, 'vaccines\nused', color=primary_grey2, fontsize=6, ha='center', va='top', weight='bold')

ax0.text(0.4, 0, '475', color=primary_blue, fontsize=20, ha='center', weight='bold', va='bottom')
ax0.text(0.4, 0, 'million of\ntotal vaccinations', color=primary_grey2, fontsize=6, ha='center', va='top', weight='bold')

ax0.text(0.65, 0, '269', color=primary_blue, fontsize=20, ha='center', weight='bold', va='bottom')
ax0.text(0.65, 0, 'million of\npeople vaccinated', color=primary_grey2, fontsize=6, ha='center', va='top', weight='bold')

ax0.text(0.9, 0, '103', color=primary_blue, fontsize=20, ha='center', weight='bold', va='bottom')
ax0.text(0.9, 0, 'million of people\nfully vaccinated', color=primary_grey2, fontsize=6, ha='center', va='top', weight='bold')

plt.show()

<a id='2.1'></a>
# <p style="background-color:skyblue; font-family:newtimeroman; font-size:120%; text-align:center; border-radius: 15px 50px;">2.1 Distribution of vaccinations by country</p>

We have to fill the missing with forward filling method from pandas.

As ***people_fully_vaccinated*** has many missings and the values are only for low number of countries, we are note going to plot.

In [None]:
tdf = df.copy()


for iso_code in tdf['iso_code'].unique():
    tdf.loc[tdf['iso_code'] == iso_code, :] = tdf.loc[tdf['iso_code'] == iso_code, :].fillna(method='ffill').fillna(0)


In [None]:
fig = px.choropleth(
    tdf,                            # Input Dataframe
    locations="iso_code",           # identify country code column
    color="total_vaccinations",                     # identify representing column
    hover_name="country",              # identify hover name
    animation_frame="date",
    #category_orders={"frame": list(sorted(tdf['date'].unique()))},            # identify date column
    color_continuous_scale= 'viridis',
    projection="natural earth",        # select projection
    range_color=[0,5000000],
    title='<span style="font-size:36px; font-family:Times New Roman">Number of vaccinations per country</span>',
)             # select range of dataset     
fig.show() 

In [None]:
fig = px.choropleth(
    tdf,                            # Input Dataframe
    locations="iso_code",           # identify country code column
    color="daily_vaccinations",                     # identify representing column
    hover_name="country",              # identify hover name
    animation_frame="date",
    #category_orders={"frame": list(sorted(tdf['date'].unique()))},            # identify date column
    color_continuous_scale= 'viridis',
    projection="natural earth",        # select projection
    range_color=[0,1000000],
    title='<span style="font-size:36px; font-family:Times New Roman">Number of daily vaccinations</span>',
)             # select range of dataset     
fig.show() 

<a id='2.2'></a>
# <p style="background-color:skyblue; font-family:newtimeroman; font-size:120%; text-align:center; border-radius: 15px 50px;">2.2 Top vaccines Laboratories</p>

In [None]:
tdf = df.copy()
tdf = tdf.dropna(subset=['vaccines'])

vac_df = tdf.groupby(['iso_code','vaccines']).max().reset_index()
vac_df['vaccines_split'] = vac_df['vaccines'].apply(lambda x: [w.strip() for w in x.split(',')])
vac_df.head()

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer

one_hot = MultiLabelBinarizer()

vac_data = one_hot.fit_transform(vac_df['vaccines_split'])
vac_names = one_hot.classes_
vac_countries = vac_df['country']

final_vac_df = pd.DataFrame(data=vac_data, columns=vac_names, index=vac_countries)
final_vac_df = final_vac_df.reset_index()
final_vac_df.head()

In [None]:
ncountrys_vac = final_vac_df[vac_names].sum(axis=0).sort_values()
colors =  [primary_grey]*4 + [primary_blue2]*4 + [primary_blue]*2 


fig = go.Figure(go.Bar(
    x = ncountrys_vac.values,
    y = ncountrys_vac.index,
    orientation = 'h',
))
fig.update_traces(
    marker_color=colors,
    marker_line_color=primary_black,
    marker_line_width=1.5, 
    opacity=0.6,
)
fig.update_layout(
    title='<span style="font-size:36px; font-family:Times New Roman">Vaccines laboratory distribution by countries</span>',
)

'''
fig.add_layout_image(
    dict(
        source='https://upload.wikimedia.org/wikipedia/commons/9/9a/Flag_of_Afghanistan.svg',
        x=35, y="Moderna",
        sizex=2.5, sizey=0.75,
        xanchor="left", yanchor="middle",
        sizing='stretch',
        xref='x',
        yref="y",
    )
)
'''

fig.show()

<a id='2.3'></a>
# <p style="background-color:skyblue; font-family:newtimeroman; font-size:120%; text-align:center; border-radius: 15px 50px;">2.3 Distribution of vaccinatios by country</p>

In [None]:
regions_df = pd.read_csv('/kaggle/input/countries-iso-codes-continent-flags-url/countries_continents_codes_flags_url.csv')
full_df = df.merge(regions_df[['alpha-3', 'region', 'image_url']], left_on='iso_code', right_on='alpha-3')

world_summary_df = pd.read_csv('/kaggle/input/covid19-global-dataset/worldometer_coronavirus_summary_data.csv')
full_df = full_df.merge(world_summary_df, on='country', how='left')
full_df.head()

In [None]:
full_df['total_deaths_ratio'] = full_df['total_deaths'] / full_df['population']
full_df['total_confirmed_ratio'] = full_df['total_confirmed'] / full_df['population']
full_df['total_recovered_ratio'] = full_df['total_recovered'] / full_df['population']

world_summary_df['total_deaths_ratio'] = world_summary_df['total_deaths'] / world_summary_df['population']
world_summary_df['total_confirmed_ratio'] = world_summary_df['total_confirmed'] / world_summary_df['population']
world_summary_df['total_recovered_ratio'] = world_summary_df['total_recovered'] / world_summary_df['population']

In [None]:
def get_multi_line_title(title:str, subtitle:str):
    return f'<span style="font-size:32px; font-family:Times New Roman">{title}<br><sub>{subtitle}</sub></span>'

# Thanks to 
def plotly_bar_chart(data: pd.DataFrame, xcolumn: str, ycolumn:str, title:str, colors:str, ylabel="Count", n=None):
    hovertemplate ='<br><b>%{x}</b>'+f'<br><b>{ylabel}: </b>'+'%{y}<br><extra></extra>'    
    data = data.sort_values(ycolumn, ascending=False).dropna(subset=[ycolumn])        
    
    if n is not None: 
        data = data.iloc[:n]
    else:
        n = ""
    fig = go.Figure(go.Bar(
        hoverinfo='skip',
        x=data[xcolumn], 
        y=data[ycolumn], 
        hovertemplate = hovertemplate,
        marker=dict(
            color = data[ycolumn],
            colorscale=colors,
        ),
    ))
    
    max_y_val = data[ycolumn].max()
    for country, flag_url, ppl_vac in zip(data[xcolumn], data['image_url'], data[ycolumn]):
        if not flag_url or not isinstance(flag_url, str):
            continue
        fig.add_layout_image(
            dict(
                source=flag_url,
                x=country, 
                y=ppl_vac + 0.05 * max_y_val,
                sizex=0.5, 
                sizey=0.08 * max_y_val,
                xanchor="center", yanchor="bottom",
                sizing='stretch',
                xref='x',
                yref="y",
            ),
        )
    
    fig.update_yaxes(range=[0, max_y_val + 0.15*max_y_val])
    
    fig.update_layout(
        title=title,
        xaxis_title=f"Top {n} {xcolumn.title()}",
        yaxis_title=ylabel,
        plot_bgcolor='rgba(0,0,0,0)',
        hovermode="x",
    )
    
    fig.show()

In [None]:
tdf = full_df.sort_values('people_vaccinated', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)
tdf.head()

In [None]:
title = get_multi_line_title("People Vaccinated", "Individuals who received the first dose of the vaccine")
plotly_bar_chart(tdf, 'country', "people_vaccinated", title, "Blugrn", n=10)

In [None]:
tdf = full_df.copy()
tdf = tdf.sort_values('people_vaccinated_per_hundred', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)

In [None]:
title = get_multi_line_title("People Vaccinated per Hundred", "Percent of individuals who received the first dose of the vaccine")
plotly_bar_chart(tdf, 'country', "people_vaccinated_per_hundred", title, "Blugrn", n=10, ylabel='Percent')

In [None]:
tdf = full_df.copy()
tdf = tdf.sort_values('total_vaccinations', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)

In [None]:
title = get_multi_line_title("Total Vaccinations", "Total number of vaccinations by country")
plotly_bar_chart(tdf, 'country', "total_vaccinations", title, "Purp", n=10)

In [None]:
tdf = full_df.copy()
tdf = tdf.sort_values('total_vaccinations_per_hundred', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)

In [None]:
title = get_multi_line_title("Total Vaccinations per Hundred", "Ratio between vaccination number and total population up to the date in the country;")
plotly_bar_chart(tdf, 'country', "total_vaccinations_per_hundred", title, "Purp", n=10)

<a id='3'></a>
# <p style="background-color:skyblue; font-family:newtimeroman; font-size:150%; text-align:center; border-radius: 15px 50px;">3. Data visualization üìä by Continent</p>

In this case, I think that percent of people vaccinated per hundred is the most representative value so I will only plot this values.

In [None]:
regions_df = pd.read_csv('/kaggle/input/countries-iso-codes-continent-flags-url/countries_continents_codes_flags_url.csv')

regions_df.head()

In [None]:
tdf = df.merge(regions_df[['alpha-3', 'region', 'image_url']], left_on='iso_code', right_on='alpha-3')
tdf.head()

In [None]:
continents_tdf = tdf.sort_values('people_vaccinated_per_hundred', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)

continents_tdf.head()

In [None]:
continents = ['Europe', 'Africa', 'Asia', 'Americas']

# Initialize figure
fig = go.Figure()

for continent in continents:
    temp_df = continents_tdf.query(f'region == "{continent}"')
    temp_df = temp_df.sort_values('people_vaccinated_per_hundred', ascending=False).iloc[:5]
    
    # Add Traces
    fig.add_trace(
        go.Bar(
            y=temp_df['people_vaccinated_per_hundred'],
            x=temp_df['country'],
            name=continent,
            marker={'color': temp_df['people_vaccinated_per_hundred'], 'colorscale': 'tealgrn'},
            visible=True if continent == 'Europe' else False
        )
    )
    
buttons = []
for i, continent in enumerate(continents):
    buttons.append(dict(
        label=continent,
        method="update",
        args=[{"visible": [False] * i + [True] + [False] * (2-i+1)},
              {"title": f"Top 5 countries for {continent}"}]
    ))
    
fig.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction="right",
            active=0,
            x=0.57,
            y=1.2,
            buttons=buttons,
        )
    ])

# Set title
fig.update_layout(
    title_text="Top 5 Publishers per region",
    xaxis_domain=[0.05, 1.0]
)

fig.show()

<a id='3.1'></a>
## <p style="background-color:skyblue; font-family:newtimeroman; font-size:140%; text-align:center; border-radius: 15px 50px;">3.1 World Map: vaccinations distribution</p>

In [None]:
tdf = world_summary_df.sort_values('total_deaths_ratio', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)
tdf.head()

tdf = tdf.dropna(subset=['total_deaths_ratio'])

In [None]:
# Thanks to https://www.kaggle.com/ivannatarov for the idea
fig = px.scatter_geo(
         tdf, # Passing the dataframe
         locations='country', # Select the column with the name of the countries
         color='continent',
         locationmode='country names', # We pass the parameter of determining the country on the map (by name)
         hover_name='country',  # Passing values for the signature on hover
         size='total_deaths_ratio' # Passing a column with values
)

fig.update_layout(
    # Set the name of the map
    title_text='Deaths ration by country <br><sub>Total number of deaths divided by the population</sub>',
    legend_orientation='h', # Place the legend caption under the chart
    legend_title_text='', # Remove the name of the legend group
    # Determine the map display settings (remove the frame, etc.)
    geo=dict(
       showframe=False,
       showcoastlines=False,
       projection_type='equirectangular'
    ),
    # Setting parameters for the text
    font=dict(
       family='TimesNewRoman',
       size=18, 
       color='black'
    )
)

fig.show()

In [None]:
tdf = world_summary_df.sort_values('total_confirmed_ratio', ascending=False).\
    drop_duplicates(subset=['country'], keep='first', ignore_index=True)
tdf.head()

tdf = tdf.dropna(subset=['total_confirmed_ratio'])

In [None]:
# Thanks to https://www.kaggle.com/ivannatarov for the idea
fig = px.scatter_geo(
         tdf, # Passing the dataframe
         locations='country', # Select the column with the name of the countries
         color='continent',
         locationmode='country names', # We pass the parameter of determining the country on the map (by name)
         hover_name='country',  # Passing values for the signature on hover
         size='total_confirmed_ratio' # Passing a column with values
)

fig.update_layout(
    # Set the name of the map
    title_text='Confirmed cases ration by country <br><sub>Total number of confirmed cases divided by the population</sub>',
    legend_orientation='h', # Place the legend caption under the chart
    legend_title_text='', # Remove the name of the legend group
    # Determine the map display settings (remove the frame, etc.)
    geo=dict(
       showframe=False,
       showcoastlines=False,
       projection_type='equirectangular'
    ),
    # Setting parameters for the text
    font=dict(
       family='TimesNewRoman',
       size=18, 
       color='black'
    )
)

fig.show()

<a id='4'></a>
# <p style="background-color:skyblue; font-family:newtimeroman; font-size:150%; text-align:center; border-radius: 15px 50px;">4. World Summary</p>

In [None]:
world_summary_df.head()

In [None]:
corrdata = world_summary_df.drop(columns=['country', 'continent'])

## correlation 
corr = corrdata.corr().abs()
mask = np.triu(np.ones_like(corr, dtype=np.bool))
corr1 = corr.mask(mask)

fig = ff.create_annotated_heatmap(
    z=corr1.to_numpy().round(2),
    x=list(corr1.index.values),
    y=list(corr1.columns.values),       
    xgap=3, ygap=3,
    zmin=0, zmax=1,
    colorscale='blugrn',
    colorbar_thickness=30,
    colorbar_ticklen=3,
)

fig.update_layout(
    title_text='<span style="font-size:32px; font-family:Times New Roman">Features Correlation Matrix</span>', 
    font_family="Serif",
    titlefont={'size': 24},
    width=800, height=700,
    xaxis={'side': 'bottom'},
    xaxis_showgrid=False,
    yaxis_showgrid=False,
    yaxis_autorange='reversed', 
    paper_bgcolor=primary_bgcolor,
    plot_bgcolor=primary_bgcolor,
    margin=dict(l=70, r=70, t=70, b=70, pad=1),
)
fig.update_xaxes(
    ticklabelposition="outside bottom",
)

fig.show()

<a id='4.1'></a>
## <p style="background-color:skyblue; font-family:newtimeroman; font-size:140%; text-align:center; border-radius: 15px 50px;">4.1 Confirmed cases evolution by continent</p>

In [None]:
regions_df = pd.read_csv('/kaggle/input/countries-iso-codes-continent-flags-url/countries_continents_codes_flags_url.csv')
full_df = original_df.merge(regions_df[['alpha-3', 'region', 'image_url']], left_on='iso_code', right_on='alpha-3')

full_df.head()

In [None]:
vaccination_progress_df = full_df.groupby(['region', 'date']).agg({'people_vaccinated': 'sum', 'daily_vaccinations': 'sum', 'people_vaccinated_per_hundred': 'sum'})
vaccination_progress_df = vaccination_progress_df.reset_index().sort_values('date')
vaccination_progress_df = vaccination_progress_df.query('date > "2021-01-03" and date < "2021-04-27"')
vaccination_progress_df.head()

In [None]:
fig = go.Figure()
for region in vaccination_progress_df['region'].unique():
    fig.add_traces(go.Scatter(
        x = vaccination_progress_df.query(f'region == "{region}"')['date'],
        y = vaccination_progress_df.query(f'region == "{region}"')['people_vaccinated'],
        fill = 'tozeroy',
        mode = 'lines',
        name = region,
    ))
    
fig.update_layout(
    # Set the name of the map
    title_text='People vaccinated over time <br><sub>Total number of persons vaccinated between 03/01 and 27/04</sub>',
    font=dict(
       family='Serif',
       size=18, 
       color='black'
    )
)
fig.show()

In [None]:
fig = px.area(vaccination_progress_df, x='date', y='people_vaccinated_per_hundred', color='region')
fig.update_layout(
    # Set the name of the map
    title_text='People vaccinated per hundred over time',
    font=dict(
       family='Serif',
       size=18, 
       color='black'
    )
)
fig.show()

<a id='5'></a>
# <p style="background-color:skyblue; font-family:newtimeroman; font-size:150%; text-align:center; border-radius: 15px 50px;">5. Covid World Dataset</p>

In [None]:
covid_cum = pd.read_csv('../input/covid19-global-dataset/worldometer_coronavirus_summary_data.csv')
covid_cum.head()

In [None]:
countries= ['Spain', 'France', 'Germany', 'Turkey', 'UK', 'Italy']
europe_mortality = covid_cum[covid_cum['country'].isin(countries)].sort_values('total_deaths_per_1m_population')

fig = go.Figure(go.Bar(
    x=europe_mortality['total_deaths_per_1m_population'],
    y=europe_mortality['country'],
    orientation='h',
    marker={'color': europe_mortality['total_deaths_per_1m_population'], 'colorscale': 'tealgrn'},
) ,layout=go.Layout(height=400, width=700))
# Customize aspect
fig.update_traces(
    # marker_color='rgb(158,202,225)', 
    marker_line_color='rgb(8,48,107)',
    marker_line_width=1.5, 
    opacity=0.6,
)
fig.update_layout(
    title_text="<span style='font-size:30px; font-family:Serif'>Mortality rate - Some European countries</span>",
    xaxis_title='Total deaths per 1m population',
)

In [None]:
countries= ['Spain', 'France', 'Germany', 'Turkey', 'UK', 'Italy']
europe_cases = covid_cum[covid_cum['country'].isin(countries)].sort_values('total_cases_per_1m_population')

fig = go.Figure(go.Bar(
    x=europe_cases['total_cases_per_1m_population'],
    y=europe_cases['country'],
    orientation='h',
    marker={'color': europe_cases['total_cases_per_1m_population'], 'colorscale': 'tealgrn'},
) ,layout=go.Layout(height=400, width=700))
# Customize aspect
fig.update_traces(
    # marker_color='rgb(158,202,225)', 
    marker_line_color='rgb(8,48,107)',
    marker_line_width=1.5, 
    opacity=0.6,
)
fig.update_layout(
    title_text="<span style='font-size:30px; font-family:Serif'>Mortality rate - Some European countries</span>",
    xaxis_title='Total cases per 1m population',
)