In [168]:
# Install plotting library plotly Chart Studio
!pip install chart_studio

# Install cufflinks to connect plotly to pandas
!pip install cufflinks

# Install seaborn for styling matplotlib charts
!pip install seaborn

# Import everything we need 
import pandas as pd
import numpy as np
import chart_studio.plotly as py
import cufflinks as cf
import seaborn as sns
import plotly.express as px

# Import the extremely popular data science library called pandas
import pandas as pd

# Display plotting charts directly inside the notebook
%matplotlib inline

# Make Plotly work in your Jupyter Notebook
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

# Use Plotly locally instead of in the cloud 
cf.go_offline()


# Read the CSV file into a pandas dataframe
df = pd.read_csv('owid-covid-data.csv')

# Display a preview of the dataframe
df



Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-02-24,5.0,5.0,,,,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
1,AFG,Asia,Afghanistan,2020-02-25,5.0,0.0,,,,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
2,AFG,Asia,Afghanistan,2020-02-26,5.0,0.0,,,,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
3,AFG,Asia,Afghanistan,2020-02-27,5.0,0.0,,,,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
4,AFG,Asia,Afghanistan,2020-02-28,5.0,0.0,,,,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259875,ZWE,Africa,Zimbabwe,2023-02-20,263642.0,0.0,79.857,5662.0,0.0,0.429,...,30.7,36.791,1.7,61.49,0.571,16320539.0,,,,
259876,ZWE,Africa,Zimbabwe,2023-02-21,263642.0,0.0,79.857,5662.0,0.0,0.429,...,30.7,36.791,1.7,61.49,0.571,16320539.0,,,,
259877,ZWE,Africa,Zimbabwe,2023-02-22,263921.0,279.0,39.857,5663.0,1.0,0.143,...,30.7,36.791,1.7,61.49,0.571,16320539.0,,,,
259878,ZWE,Africa,Zimbabwe,2023-02-23,263921.0,,,5663.0,0.0,0.143,...,30.7,36.791,1.7,61.49,0.571,16320539.0,,,,


In [169]:
# Sum all of the new deaths grouped by location (while preserving ISO code and continent)
# We use .to_frame to turn the Series object back into a Dataframe

df_country_deaths = df.groupby(['location', 'iso_code', 'continent']).sum()['new_deaths'].to_frame()

# 'new_deaths' is no longer an appropriate name for the column since now that we have summed it, it needs to actually say 
# something like 'total_confirmed deaths'

# inplace parameter let's us do the renaming without having to reassign the df_country_deaths variable like in the above example

df_country_deaths.rename(columns = {'new_deaths':'total_confirmed_deaths'}, inplace = True)

In [170]:
# Let's sort the list by descending confirmed deaths
df_country_deaths = df_country_deaths.sort_values(by=['total_confirmed_deaths'], ascending=False)
df_country_deaths

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_confirmed_deaths
location,iso_code,continent,Unnamed: 3_level_1
United States,USA,North America,1120056.0
Brazil,BRA,South America,699054.0
India,IND,Asia,523391.0
Russia,RUS,Europe,388000.0
Mexico,MEX,North America,325918.0
...,...,...,...
Northern Cyprus,OWID_CYN,Asia,0.0
Niue,NIU,Oceania,0.0
England,OWID_ENG,Europe,0.0
Tuvalu,TUV,Oceania,0.0


In [171]:
# Reset the index so that `location` is it's own column and not an index column
df_country_deaths.reset_index(inplace=True)

In [172]:
fig = px.bar(
    df_country_deaths.head(25), # .head(25) takes the 25 countries with the most deaths 
    y='total_confirmed_deaths', # Use deaths for the y axis
    x='location', # Use the location for the x axis 
    text='total_confirmed_deaths', # Display total confirmed deaths
    color='continent' # Apply the same color for bars that belong to the same continent
)
# Put bar total value above bars with 2 valuetotal_confirmed_deathss of precision
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')

# Set fontsize and uniformtext_mode='hide' to hide the text if it won't fit
fig.update_layout(uniformtext_minsize=8)

# Rotate labels 45 degrees (gives the country labels a sloped angle)
fig.update_layout(xaxis_tickangle=-45)
fig

In [175]:
fig = px.bar(
    df_country_deaths.head(25), # .head(25) takes the 25 countries with the most deaths 
    y='total_confirmed_deaths', # Use deaths for the y axis
    x='location', # Use the location for the x axis 
    text='total_confirmed_deaths', # Display total confirmed deaths
    color='location' # Apply the same color for bars that belong to the same continent
)
# Put bar total value above bars with 2 valuetotal_confirmed_deathss of precision
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')

# Set fontsize and uniformtext_mode='hide' to hide the text if it won't fit
fig.update_layout(uniformtext_minsize=8)

# Rotate labels 45 degrees (gives the country labels a sloped angle)
fig.update_layout(xaxis_tickangle=-45)
fig

In [176]:
fig = px.scatter_geo(
    df_country_deaths, # the pandas dataframe, in this case we're using all the data, not just top 25
    locations="iso_code", # the column name to use for the ISO code to plot the country
    color="continent", # which column to use to set the color of markers
    hover_name="location", # title for hover tooltip
    size="total_confirmed_deaths", # size of circle markers
    projection="orthographic", # the projection with which to display the 3D globe 
)
fig