## Covid-19 Data Analysis

Dataset from - https://github.com/owid/covid-19-data/tree/master/public/data

1. Understanding Our Data
2. Covid 19 World Analysis - Most Recent Info
3. World Covid-19 Growth Over Time
4. Confirmed Cases in Each Country
5. Mapping Covid Around the World
6. Evolution of Confirmed Cases PM
7. Comparing Continents & Countries
8. Preparing Dataset for Flourish Bar Chart Race
9. Flourish - Making a Covid Bar Chart Race

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import seaborn as sns

## because we're pulling from the url, the data will always be up to date
url='https://covid.ourworldindata.org/data/owid-covid-data.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
0,AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,,,,,0.026,0.026,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
1,AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
2,AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
3,AFG,Asia,Afghanistan,2020-02-27,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
4,AFG,Asia,Afghanistan,2020-02-28,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,


In [2]:
df.info

<bound method DataFrame.info of        iso_code continent  ... human_development_index excess_mortality
0           AFG      Asia  ...                   0.511              NaN
1           AFG      Asia  ...                   0.511              NaN
2           AFG      Asia  ...                   0.511              NaN
3           AFG      Asia  ...                   0.511              NaN
4           AFG      Asia  ...                   0.511              NaN
...         ...       ...  ...                     ...              ...
109513      ZWE    Africa  ...                   0.571              NaN
109514      ZWE    Africa  ...                   0.571              NaN
109515      ZWE    Africa  ...                   0.571              NaN
109516      ZWE    Africa  ...                   0.571              NaN
109517      ZWE    Africa  ...                   0.571              NaN

[109518 rows x 62 columns]>

In [3]:
df[df['location'] == "Canada"]

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
17676,CAN,North America,Canada,2020-01-26,1.0,1.0,,,,,0.026,0.026,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.78,37742157.0,4.037,41.4,16.984,10.797,44017.591,0.5,105.599,7.37,12.0,16.6,,2.5,82.43,0.929,4.03
17677,CAN,North America,Canada,2020-01-27,1.0,0.0,,,,,0.026,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.78,37742157.0,4.037,41.4,16.984,10.797,44017.591,0.5,105.599,7.37,12.0,16.6,,2.5,82.43,0.929,
17678,CAN,North America,Canada,2020-01-28,2.0,1.0,,,,,0.053,0.026,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.78,37742157.0,4.037,41.4,16.984,10.797,44017.591,0.5,105.599,7.37,12.0,16.6,,2.5,82.43,0.929,
17679,CAN,North America,Canada,2020-01-29,2.0,0.0,,,,,0.053,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.78,37742157.0,4.037,41.4,16.984,10.797,44017.591,0.5,105.599,7.37,12.0,16.6,,2.5,82.43,0.929,
17680,CAN,North America,Canada,2020-01-30,2.0,0.0,,,,,0.053,0.000,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.78,37742157.0,4.037,41.4,16.984,10.797,44017.591,0.5,105.599,7.37,12.0,16.6,,2.5,82.43,0.929,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18238,CAN,North America,Canada,2021-08-10,1451040.0,1334.0,1395.000,26635.0,4.0,9.429,38446.133,35.345,36.961,705.710,0.106,0.250,1.53,207.0,5.485,512.0,13.566,,,,,45617.0,39070051.0,1035.183,1.209,59680.0,1.581,0.023,42.8,tests performed,50850367.0,27259827.0,23590540.0,,137421.0,155420.0,134.73,72.23,62.50,,4118.0,60.65,37742157.0,4.037,41.4,16.984,10.797,44017.591,0.5,105.599,7.37,12.0,16.6,,2.5,82.43,0.929,
18239,CAN,North America,Canada,2021-08-11,1453000.0,1960.0,1535.000,26641.0,6.0,8.429,38498.065,51.931,40.671,705.869,0.159,0.223,,219.0,5.803,530.0,14.043,,,,,55134.0,39125185.0,1036.644,1.461,58793.0,1.558,0.026,38.3,tests performed,50988809.0,27288715.0,23700094.0,,138442.0,151146.0,135.10,72.30,62.79,,4005.0,60.65,37742157.0,4.037,41.4,16.984,10.797,44017.591,0.5,105.599,7.37,12.0,16.6,,2.5,82.43,0.929,
18240,CAN,North America,Canada,2021-08-12,1455141.0,2141.0,1623.286,26647.0,6.0,6.571,38554.792,56.727,43.010,706.027,0.159,0.174,,233.0,6.173,571.0,15.129,,,,,73972.0,39199157.0,1038.604,1.960,61612.0,1.632,0.026,38.0,tests performed,51142544.0,27321586.0,23820958.0,,153735.0,145900.0,135.51,72.39,63.11,,3866.0,60.65,37742157.0,4.037,41.4,16.984,10.797,44017.591,0.5,105.599,7.37,12.0,16.6,,2.5,82.43,0.929,
18241,CAN,North America,Canada,2021-08-13,1457458.0,2317.0,1732.571,26654.0,7.0,6.143,38616.182,61.390,45.905,706.213,0.185,0.163,,235.0,6.226,583.0,15.447,,,,,,,,,,,,,,51290984.0,27356765.0,23934219.0,,148440.0,141949.0,135.90,72.48,63.42,,3761.0,60.65,37742157.0,4.037,41.4,16.984,10.797,44017.591,0.5,105.599,7.37,12.0,16.6,,2.5,82.43,0.929,


### Summary of the data:

In [4]:
## We'll use list comprehension to perform this 
## Since there we have quite a few null values, we'll use .isna() to find these values
## Then use the .mean function to get the average
## occurence and then multiply by 100 to convert to a percentage

NAN = [(c, df[c].isna().mean()*100) for c in df]

## Name the columns appropriately

NAN = pd.DataFrame(NAN, columns=["column_name", "percentage"])
NAN

Unnamed: 0,column_name,percentage
0,iso_code,0.000000
1,continent,4.624811
2,location,0.000000
3,date,0.000000
4,total_cases,4.285140
...,...,...
57,handwashing_facilities,55.187275
58,hospital_beds_per_thousand,18.823390
59,life_expectancy,5.092314
60,human_development_index,10.692306


In [5]:
# let's get an idea of how many countries we have in the dataset
print(df['location'].unique())
print(len(df['location'].unique()))

['Afghanistan' 'Africa' 'Albania' 'Algeria' 'Andorra' 'Angola' 'Anguilla'
 'Antigua and Barbuda' 'Argentina' 'Armenia' 'Aruba' 'Asia' 'Australia'
 'Austria' 'Azerbaijan' 'Bahamas' 'Bahrain' 'Bangladesh' 'Barbados'
 'Belarus' 'Belgium' 'Belize' 'Benin' 'Bermuda' 'Bhutan' 'Bolivia'
 'Bonaire Sint Eustatius and Saba' 'Bosnia and Herzegovina' 'Botswana'
 'Brazil' 'British Virgin Islands' 'Brunei' 'Bulgaria' 'Burkina Faso'
 'Burundi' 'Cambodia' 'Cameroon' 'Canada' 'Cape Verde' 'Cayman Islands'
 'Central African Republic' 'Chad' 'Chile' 'China' 'Colombia' 'Comoros'
 'Congo' 'Cook Islands' 'Costa Rica' "Cote d'Ivoire" 'Croatia' 'Cuba'
 'Curacao' 'Cyprus' 'Czechia' 'Democratic Republic of Congo' 'Denmark'
 'Djibouti' 'Dominica' 'Dominican Republic' 'Ecuador' 'Egypt'
 'El Salvador' 'Equatorial Guinea' 'Eritrea' 'Estonia' 'Eswatini'
 'Ethiopia' 'Europe' 'European Union' 'Faeroe Islands' 'Falkland Islands'
 'Fiji' 'Finland' 'France' 'French Polynesia' 'Gabon' 'Gambia' 'Georgia'
 'Germany' 'Ghana'

232 countries but we see that there is "World" and "International"

- let's delete world summary figures
- we should also delete the continents as well

In [6]:
df.drop(df[df.location == "Africa"].index, inplace=True)
df.drop(df[df.location == "Europe"].index, inplace=True)
df.drop(df[df.location == "North America"].index, inplace=True)
df.drop(df[df.location == "South America"].index, inplace=True)
df.drop(df[df.location == "European Union"].index, inplace=True)
df.drop(df[df.location == "World"].index, inplace=True)
df.drop(df[df.location == "Asia"].index, inplace=True)

In [7]:

# getting the most recent date (max gives you the latest date)
df_most_recent = df[df['date'] == max(df['date'])].reset_index()

df_most_recent.head()

Unnamed: 0,index,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
0,537,AFG,Asia,Afghanistan,2021-08-14,151770.0,0.0,280.0,7000.0,0.0,17.286,3898.702,0.0,7.193,179.818,0.0,0.444,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
1,1623,ALB,Europe,Albania,2021-08-14,135550.0,410.0,260.0,2463.0,2.0,0.571,47101.953,142.47,90.347,855.862,0.695,0.199,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2877800.0,104.871,38.0,13.188,8.643,11803.431,1.1,304.195,10.08,7.1,51.2,,2.89,78.57,0.795,
2,2160,DZA,Africa,Algeria,2021-08-14,186655.0,753.0,899.857,4766.0,36.0,35.143,4256.569,17.172,20.521,108.686,0.821,0.801,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,43851043.0,17.348,29.1,6.211,3.857,13913.839,0.5,278.364,6.73,0.7,30.4,83.741,1.9,76.88,0.748,
3,2691,AND,Europe,Andorra,2021-08-14,14924.0,0.0,12.571,129.0,0.0,0.143,193153.433,0.0,162.705,1669.579,0.0,1.849,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,77265.0,163.755,,,,,,109.135,7.97,29.0,37.8,,,83.73,0.868,
4,3204,AGO,Africa,Angola,2021-08-14,44534.0,206.0,134.571,1086.0,4.0,6.0,1355.006,6.268,4.095,33.043,0.122,0.183,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,32866268.0,23.89,16.8,2.405,1.362,5819.495,,276.045,3.94,,,26.664,,61.15,0.581,


### Create summary table

In [8]:
df_world = df_most_recent.groupby(["date"])[["total_cases","new_cases","new_deaths","total_deaths"]].sum().reset_index()

# Create Column headers/labels
labels = ["Last Update", "Total Confirmed","New Cases","New Deaths","Total Deaths"]

fig = go.Figure(data=[go.Table(header = dict(values=labels),
                 cells=dict(values=df_world.loc[0,["date","total_cases","new_cases","new_deaths","total_deaths"]]))
                     ])
fig.update_layout(title='Covid-19 World Summary: ',)
fig.show()

20,868 people died yesterday, August 12, 2021 from covid-19. 

In total 9.4 million people have died from covid.

## 3. World Covid-19 Growth Over Time

In [9]:
df_over_time = df.groupby(["date"])[["total_cases", "new_cases", "total_deaths", "new_deaths"]].sum().reset_index().sort_values("date", ascending=True).reset_index(drop=True)

In [10]:
df_over_time

Unnamed: 0,date,total_cases,new_cases,total_deaths,new_deaths
0,2020-01-01,0.0,0.0,0.0,0.0
1,2020-01-02,0.0,0.0,0.0,0.0
2,2020-01-03,0.0,0.0,0.0,0.0
3,2020-01-04,0.0,0.0,0.0,0.0
4,2020-01-05,0.0,0.0,0.0,0.0
...,...,...,...,...,...
587,2021-08-10,204117732.0,638009.0,4316018.0,11578.0
588,2021-08-11,204825644.0,707912.0,4325989.0,9971.0
589,2021-08-12,205511337.0,685693.0,4336284.0,10295.0
590,2021-08-13,206352879.0,841542.0,4347559.0,11275.0


## Confirmed Cases starting from day Zero (first covid case reported)

In [11]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_over_time.index, y=df_over_time['total_cases'],
                         mode='lines',
                         name="Confirmed cases"))

fig.update_layout(
    title="Evolution of Confirmed Covid-19 cases over time in the world",
    template="plotly_white",
    yaxis_title="Confirmed Cases",
    xaxis_title="Days",
)

fig.show()

Using Date axis instead of Days

In [12]:
fig = go.Figure(go.Bar(
    x=df_over_time['date'],
    y = df_over_time['total_cases'],
))

fig.update_layout(
    title="Confirmed Cases in Each Day",
    template='plotly_white',
    xaxis_title = "Confirmed Cases",
    yaxis_title="Days",
)

fig.show()

In [13]:
# let's see another layout with plotly just for fun.
# also need practice writing these plotly commands. repetition is key 

fig = go.Figure()

fig.add_trace(go.Scatter(x=df_over_time.index, y=df_over_time['total_deaths'],
                         mode='lines',marker_color='red',
                         name='Total Deaths',line=dict(dash='dot')))

fig.update_layout(
    title="Evolution of Covid-19 Deaths vs Confirmed Cases Over Time in the World",
    template="plotly_dark",
    yaxis_title="Number of Deaths",
    xaxis_title="Days",
)

fig.show()

Let's check ouf the new cases over time now

In [14]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_over_time.date, y=df_over_time['new_cases'],
                         mode='lines', marker_color='green',
                         name="New Covid Cases Daily", line=dict(dash="dot")))

fig.update_layout(
    title="Evolution of Covid-19 New Cases Over Time in the World",
    template="plotly_dark",
    yaxis_title="New Cases",
    xaxis_title="Days",
)

fig.show()

In [15]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_over_time.date, y=df_over_time['new_deaths'],
                    mode='lines',marker_color='red',
                    name='New Case Daily',line=dict( dash='dot')))

fig.update_layout(
    title='Evolution of Covid-19 New Deaths over time in the world',
        template='plotly_dark',
      yaxis_title="New Deaths",
    xaxis_title="Days",

)

fig.show()

## 4. Confirmed Cases in Each Country

In [16]:
df_per_country = df.groupby(['location'])['new_cases', 'new_deaths'].sum().reset_index().sort_values("new_cases", ascending=False).reset_index(drop=True)

df_per_country.head(10)


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0,location,new_cases,new_deaths
0,United States,36636852.0,621226.0
1,India,32192576.0,431225.0
2,Brazil,20350142.0,568788.0
3,France,6819929.0,112705.0
4,Russia,6490421.0,166795.0
5,United Kingdom,6270673.0,131210.0
6,Turkey,5236551.0,53005.0
7,Argentina,5080908.0,108936.0
8,Colombia,4864629.0,123356.0
9,Spain,4693540.0,82470.0


In [17]:
# Let's make column names better and get rid of decimal points

In [18]:
df_per_country.columns = ['Country', 'Total Cases', 'Total Deaths']
df_per_country = df_per_country.astype({"Country": str, "Total Cases": int, "Total Deaths": int})
df_per_country.head(5)

Unnamed: 0,Country,Total Cases,Total Deaths
0,United States,36636852,621226
1,India,32192576,431225
2,Brazil,20350142,568788
3,France,6819929,112705
4,Russia,6490421,166795


### Let's display this as a barchart to see things in perspective

In [19]:
fig = go.Figure(go.Bar(
            x=df_per_country['Total Cases'],
            y=df_per_country['Country'],
            orientation='h'))
fig.update_layout(
    title='Confirmed Cases In Each Country',
    template='plotly_white',
     xaxis_title="Confirmed Cases",
    yaxis_title="Countries",
)
fig.show()

plotly cleans the graph nicely. Unfortuantely we can't specify which countries we want to see, but they selected the country names at random.

In [20]:
fig = go.Figure(go.Bar(
            x=df_per_country['Total Deaths'],
            y=df_per_country['Country'],
            orientation='h',
            marker_color='black'))
fig.update_layout(
    title='Deaths in Each Country',
    template='plotly_white',
     xaxis_title="Deaths",
    yaxis_title="Countries",
)
fig.show()

## 5. Mapping Covid-19 Across the World

Let's look at total case numbers on a world map

In [21]:
fig = px.choropleth(df_per_country, locations=df_per_country["Country"],
                    color=df_per_country['Total Cases'], locationmode='country names',
                    hover_name = df_per_country["Country"],
                    color_continuous_scale = px.colors.sequential.OrRd, template='plotly_dark',
                    ) 

fig.update_layout(
    title='Confirmed Cases in Each Country',
)
fig.show()

Let's adjust for population sizes as the above doesn't look very representative

In [22]:
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
0,AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,,,,,0.026,0.026,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
1,AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
2,AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
3,AFG,Asia,Afghanistan,2020-02-27,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
4,AFG,Asia,Afghanistan,2020-02-28,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,


We have total casese pr million as a column. Let's use that

In [23]:
# group by country and get the total number of cases ad deaths per million

df_per_country_pm = df.groupby(['location'])['new_cases_per_million','new_deaths_per_million'].sum().reset_index().sort_values("new_cases_per_million", ascending=False).reset_index(drop=True)
df_per_country_pm.columns = ['Country', 'Total Cases PM', 'Total Deaths PM']
df_per_country_pm = df_per_country_pm.astype({"Country": str, "Total Cases PM": int, "Total Deaths PM": int})
df_per_country_pm.head()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0,Country,Total Cases PM,Total Deaths PM
0,Andorra,193153,1669
1,Seychelles,192139,996
2,Montenegro,168375,2614
3,Bahrain,159155,813
4,Czechia,156511,2836


In [24]:
# let's use this df to do the same map as before
fig = px.choropleth(df_per_country_pm, locations=df_per_country_pm["Country"],
                    color=df_per_country_pm['Total Cases PM'], locationmode='country names',
                    hover_name = df_per_country_pm["Country"],
                    color_continuous_scale = px.colors.sequential.Viridis, template='plotly_dark',
                    ) 

fig.update_layout(
    title='Confirmed Cases Per Million in Each Country',
)
fig.show()

Let's look at the top 10 countries per million on a Scatter Plot

In [25]:
fig = go.Figure(data=[go.Scatter(
    x=df_per_country_pm['Country'][0:10],
    y=df_per_country_pm["Total Cases PM"][0:10],
    mode='markers',
    marker=dict(
        color=100+np.random.randn(1500),
        size=(df_per_country_pm['Total Cases PM'][0:10]/1500), # Change 1500 to larger as data evolves
        showscale=True
    )
)])

fig.update_layout(
    title="Most Infected Countries PM",
    xaxis_title="Countries",
    yaxis_title="Total Cases PM",
    template='plotly_white'
)

fig.show()

In [26]:
# Sort in descending order

df_per_country_pm = df_per_country_pm.sort_values(by="Total Deaths PM", ascending=False)

fig = go.Figure(data=[go.Scatter(
    x=df_per_country_pm['Country'][0:10],
    y=df_per_country_pm['Total Deaths PM'][0:10],
    mode='markers',
    marker=dict(
        color=df_per_country_pm['Total Cases PM'],
        size=(df_per_country_pm['Total Deaths PM'][0:10]/100), # change number to larger as data evolves
        showscale=True
    )
)])

fig.update_layout(
    title="Most Affected Countries by Deaths PM",
    xaxis_title="Countries",
    yaxis_title="Total Deaths PM",
    template='plotly_white',
)

fig.show()

## 6. Evolution of Confirmed Cases PM

In [27]:
df.tail()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
109513,ZWE,Africa,Zimbabwe,2021-08-10,117258.0,405.0,689.0,3950.0,31.0,39.143,7889.294,27.249,46.357,265.762,2.086,2.634,0.7,,,,,,,,,2631.0,1025127.0,68.972,0.177,5909.0,0.398,0.117,8.6,tests performed,,,,,,53633.0,,,,,3609.0,,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,
109514,ZWE,Africa,Zimbabwe,2021-08-11,117954.0,696.0,632.571,3991.0,41.0,40.0,7936.122,46.828,42.56,268.52,2.759,2.691,,,,,,,,,,6182.0,1031309.0,69.388,0.416,5524.0,0.372,0.115,8.7,tests performed,3017341.0,1932929.0,1084412.0,,,59050.0,20.3,13.01,7.3,,3973.0,,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,
109515,ZWE,Africa,Zimbabwe,2021-08-12,118754.0,800.0,609.286,4047.0,56.0,41.857,7989.947,53.825,40.994,272.288,3.768,2.816,,,,,,,,,,,,,,,,,,,3193256.0,2013049.0,1180207.0,,175915.0,58465.0,21.48,13.54,7.94,,3934.0,,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,
109516,ZWE,Africa,Zimbabwe,2021-08-13,119508.0,754.0,580.429,4073.0,26.0,38.286,8040.677,50.73,39.052,274.038,1.749,2.576,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,
109517,ZWE,Africa,Zimbabwe,2021-08-14,119853.0,345.0,566.143,4090.0,17.0,37.714,8063.889,23.212,38.091,275.181,1.144,2.537,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,14862927.0,42.729,19.6,2.822,1.882,1899.775,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,


In [28]:
fig = px.choropleth(df, locations = df['location'],
                    color = df['total_cases'], locationmode='country names',
                    hover_name = df['location'],
                    color_continuous_scale = px.colors.sequential.Inferno,
                    animation_frame = 'date')

fig.update_layout(
    title="Evolution of Confirmed Cases in each Country",
    template = "plotly_dark"
)

fig.show()

In [29]:
fig = px.choropleth(df, locations = df['location'],
                    color = df['total_cases_per_million'],locationmode='country names', 
                    hover_name = df['location'], 
                    color_continuous_scale = px.colors.sequential.GnBu,
                    animation_frame="date")
fig.update_layout(

    title='Evolution of total cases per million in each country',
    template='plotly_white'
)
fig.show()

## 7. Comparing Continents and Countries

In [30]:
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
0,AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,,,,,0.026,0.026,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
1,AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
2,AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
3,AFG,Asia,Afghanistan,2020-02-27,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
4,AFG,Asia,Afghanistan,2020-02-28,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,


In [31]:
df_per_country_pm = df.groupby(['continent'])['new_cases_per_million', 'new_deaths_per_million'].sum().reset_index().sort_values('new_cases_per_million', ascending=False).reset_index(drop=True)
df_per_country_pm.columns = ['Country', 'Total Cases PM', 'Total Deaths PM']
df_per_country_pm = df_per_country_pm.astype({"Country": str, "Total Cases PM": int, "Total Deaths PM": int})
df_per_country_pm.head()


Indexing with multiple keys (implicitly converted to a tuple of keys) will be deprecated, use a list instead.



Unnamed: 0,Country,Total Cases PM,Total Deaths PM
0,Europe,3843924,68577
1,Asia,1703616,17455
2,South America,783455,24170
3,North America,744063,14052
4,Africa,702649,11371


First let's compare USA vs the Rest of the World

In [32]:
df_usa = df[df['location'] == "United States"]
df_usa.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
103784,USA,North America,United States,2020-01-22,1.0,,,,,,0.003,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,331002647.0,35.608,38.3,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
103785,USA,North America,United States,2020-01-23,1.0,0.0,,,,,0.003,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,331002647.0,35.608,38.3,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
103786,USA,North America,United States,2020-01-24,2.0,1.0,,,,,0.006,0.003,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,331002647.0,35.608,38.3,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
103787,USA,North America,United States,2020-01-25,2.0,0.0,,,,,0.006,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,331002647.0,35.608,38.3,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,
103788,USA,North America,United States,2020-01-26,5.0,3.0,,,,,0.015,0.009,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,331002647.0,35.608,38.3,15.413,9.732,54225.446,1.2,151.089,10.79,19.1,24.6,,2.77,78.86,0.926,0.75


In [33]:
url='https://covid.ourworldindata.org/data/owid-covid-data.csv'
df = pd.read_csv(url)

df_world = df[df['location'] == "World"]
df_world.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
107427,OWID_WRL,,World,2020-01-22,557.0,0.0,,17.0,0.0,,0.071,0.0,,0.002,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7794799000.0,58.045,30.9,8.696,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737,
107428,OWID_WRL,,World,2020-01-23,655.0,98.0,,18.0,1.0,,0.084,0.013,,0.002,0.0,,3.11,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7794799000.0,58.045,30.9,8.696,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737,
107429,OWID_WRL,,World,2020-01-24,941.0,286.0,,26.0,8.0,,0.121,0.037,,0.003,0.001,,3.28,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7794799000.0,58.045,30.9,8.696,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737,
107430,OWID_WRL,,World,2020-01-25,1433.0,492.0,,42.0,16.0,,0.184,0.063,,0.005,0.002,,3.44,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7794799000.0,58.045,30.9,8.696,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737,
107431,OWID_WRL,,World,2020-01-26,2118.0,685.0,,56.0,14.0,,0.272,0.088,,0.007,0.002,,3.51,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,7794799000.0,58.045,30.9,8.696,5.355,15469.207,10.0,233.07,8.51,6.434,34.635,60.13,2.705,72.58,0.737,


In [34]:
df_india = df[df['location'] == "India"]
df_india.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
45250,IND,Asia,India,2020-01-30,1.0,1.0,,,,,0.001,0.001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.19,1380004000.0,450.419,28.2,5.989,3.414,6426.674,21.2,282.28,10.39,1.9,20.6,59.55,0.53,69.66,0.645,
45251,IND,Asia,India,2020-01-31,1.0,0.0,,,,,0.001,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.19,1380004000.0,450.419,28.2,5.989,3.414,6426.674,21.2,282.28,10.39,1.9,20.6,59.55,0.53,69.66,0.645,
45252,IND,Asia,India,2020-02-01,1.0,0.0,,,,,0.001,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.19,1380004000.0,450.419,28.2,5.989,3.414,6426.674,21.2,282.28,10.39,1.9,20.6,59.55,0.53,69.66,0.645,
45253,IND,Asia,India,2020-02-02,2.0,1.0,,,,,0.001,0.001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.19,1380004000.0,450.419,28.2,5.989,3.414,6426.674,21.2,282.28,10.39,1.9,20.6,59.55,0.53,69.66,0.645,
45254,IND,Asia,India,2020-02-03,3.0,1.0,,,,,0.002,0.001,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,10.19,1380004000.0,450.419,28.2,5.989,3.414,6426.674,21.2,282.28,10.39,1.9,20.6,59.55,0.53,69.66,0.645,


In [35]:
fig = go.Figure()

fig.add_trace(go.Scatter(x = df_usa['date'], y= df_usa['total_cases'],
                         mode='lines',
                         name='Confirmed Cases in US'))

fig.add_trace(go.Scatter(x = df_india['date'], y= df_india['total_cases'],
                         mode='lines',
                         name='Confirmed Cases in India'))

fig.add_trace(go.Scatter(x = df_world['date'], y= df_world['total_cases'],
                         mode='lines',
                         name='Confirmed Cases in the rest of the World'))

fig.update_layout(
    title='Evolution of Confirmed cases over time in the US and the Rest of the World',
    template='plotly_white'
)

fig.show()

Let's compare deaths in the US vs the Rest of the World

In [36]:
fig = go.Figure()

fig.add_trace(go.Scatter(x=df_usa['date'], y = df_usa['total_deaths'],
                         mode='lines',
                         name='Confirmed Deaths in the US'))

fig.add_trace(go.Scatter(x=df_world['date'], y = df_world['total_deaths'],
                         mode='lines',
                         name='Confirmed Deaths in the rest of the world'))

fig.update_layout(
    title='Evolution of Confirmed Deaths over time in US and The Rest Of The Word',
        template='plotly_white'

)

fig.show()


# Let's create a dataset we can import into Flourish

In [57]:
url='https://covid.ourworldindata.org/data/owid-covid-data.csv'
df = pd.read_csv(url)
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
0,AFG,Asia,Afghanistan,2020-02-24,1.0,1.0,,,,,0.026,0.026,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
1,AFG,Asia,Afghanistan,2020-02-25,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
2,AFG,Asia,Afghanistan,2020-02-26,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
3,AFG,Asia,Afghanistan,2020-02-27,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
4,AFG,Asia,Afghanistan,2020-02-28,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,


We'll have to wrangle the data bit to meet the flourish requirements

- country
- image URL (country)
- dates

In [58]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 109518 entries, 0 to 109517
Data columns (total 62 columns):
 #   Column                                 Non-Null Count   Dtype  
---  ------                                 --------------   -----  
 0   iso_code                               109518 non-null  object 
 1   continent                              104453 non-null  object 
 2   location                               109518 non-null  object 
 3   date                                   109518 non-null  object 
 4   total_cases                            104825 non-null  float64
 5   new_cases                              104822 non-null  float64
 6   new_cases_smoothed                     103812 non-null  float64
 7   total_deaths                           94384 non-null   float64
 8   new_deaths                             94539 non-null   float64
 9   new_deaths_smoothed                    103812 non-null  float64
 10  total_cases_per_million                104270 non-null  

In [59]:
# let's start with changing the date format from object to datetime

df['date'] = pd.to_datetime(df.date)

# change format 
df['date'] = df['date'].dt.strftime('%y/%m/%d')
df.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
0,AFG,Asia,Afghanistan,20/02/24,1.0,1.0,,,,,0.026,0.026,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
1,AFG,Asia,Afghanistan,20/02/25,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
2,AFG,Asia,Afghanistan,20/02/26,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
3,AFG,Asia,Afghanistan,20/02/27,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
4,AFG,Asia,Afghanistan,20/02/28,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,


In [60]:
# Let's set the date as the index
df = df.set_index('date')
df.head()

Unnamed: 0_level_0,iso_code,continent,location,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,total_cases_per_million,new_cases_per_million,new_cases_smoothed_per_million,total_deaths_per_million,new_deaths_per_million,new_deaths_smoothed_per_million,reproduction_rate,icu_patients,icu_patients_per_million,hosp_patients,hosp_patients_per_million,weekly_icu_admissions,weekly_icu_admissions_per_million,weekly_hosp_admissions,weekly_hosp_admissions_per_million,new_tests,total_tests,total_tests_per_thousand,new_tests_per_thousand,new_tests_smoothed,new_tests_smoothed_per_thousand,positive_rate,tests_per_case,tests_units,total_vaccinations,people_vaccinated,people_fully_vaccinated,total_boosters,new_vaccinations,new_vaccinations_smoothed,total_vaccinations_per_hundred,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,new_vaccinations_smoothed_per_million,stringency_index,population,population_density,median_age,aged_65_older,aged_70_older,gdp_per_capita,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1
20/02/24,AFG,Asia,Afghanistan,1.0,1.0,,,,,0.026,0.026,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
20/02/25,AFG,Asia,Afghanistan,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
20/02/26,AFG,Asia,Afghanistan,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
20/02/27,AFG,Asia,Afghanistan,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,
20/02/28,AFG,Asia,Afghanistan,1.0,0.0,,,,,0.026,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,8.33,38928341.0,54.422,18.6,2.581,1.337,1803.987,,597.029,9.59,,,37.746,0.5,64.83,0.511,


In [61]:
df_flourish = pd.pivot_table(df,values='total_cases',
                             index=['location'],
                             columns='date').reset_index()
df_flourish = df_flourish.fillna(0)
df_flourish.head()

date,location,20/01/22,20/01/23,20/01/24,20/01/25,20/01/26,20/01/27,20/01/28,20/01/29,20/01/30,20/01/31,20/02/01,20/02/02,20/02/03,20/02/04,20/02/05,20/02/06,20/02/07,20/02/08,20/02/09,20/02/10,20/02/11,20/02/12,20/02/13,20/02/14,20/02/15,20/02/16,20/02/17,20/02/18,20/02/19,20/02/20,20/02/21,20/02/22,20/02/23,20/02/24,20/02/25,20/02/26,20/02/27,20/02/28,20/02/29,...,21/07/06,21/07/07,21/07/08,21/07/09,21/07/10,21/07/11,21/07/12,21/07/13,21/07/14,21/07/15,21/07/16,21/07/17,21/07/18,21/07/19,21/07/20,21/07/21,21/07/22,21/07/23,21/07/24,21/07/25,21/07/26,21/07/27,21/07/28,21/07/29,21/07/30,21/07/31,21/08/01,21/08/02,21/08/03,21/08/04,21/08/05,21/08/06,21/08/07,21/08/08,21/08/09,21/08/10,21/08/11,21/08/12,21/08/13,21/08/14
0,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,127464.0,129021.0,130113.0,131586.0,131586.0,131586.0,134653.0,134653.0,136643.0,137853.0,137853.0,137853.0,137853.0,141489.0,142414.0,142414.0,143183.0,143439.0,143439.0,143439.0,144285.0,145008.0,145552.0,145996.0,146523.0,146523.0,146523.0,147985.0,148572.0,148933.0,149361.0,149810.0,149810.0,149810.0,150778.0,151013.0,151291.0,151563.0,151770.0,151770.0
1,Africa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,3.0,3.0,...,5730967.0,5775360.0,5822819.0,5869400.0,5914774.0,5950013.0,5984845.0,6023213.0,6067392.0,6115896.0,6157430.0,6196245.0,6232371.0,6262767.0,6296294.0,6329548.0,6368317.0,6399752.0,6435060.0,6467841.0,6502260.0,6533610.0,6582266.0,6630064.0,6673032.0,6708893.0,6743069.0,6777745.0,6810335.0,6855440.0,6903374.0,6945369.0,6981688.0,7015331.0,7048926.0,7083510.0,7117990.0,7164329.0,7204781.0,7238561.0
2,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,132544.0,132557.0,132565.0,132580.0,132587.0,132592.0,132597.0,132608.0,132616.0,132629.0,132647.0,132665.0,132686.0,132697.0,132740.0,132763.0,132797.0,132828.0,132853.0,132875.0,132891.0,132922.0,132952.0,132999.0,133036.0,133081.0,133121.0,133146.0,133211.0,133310.0,133442.0,133591.0,133730.0,133912.0,133981.0,134201.0,134487.0,134761.0,135140.0,135550.0
3,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,...,142447.0,143032.0,143652.0,144483.0,145296.0,146064.0,146942.0,147883.0,148797.0,149906.0,151103.0,152210.0,153309.0,154486.0,155784.0,157005.0,158213.0,159563.0,160868.0,162155.0,163660.0,165204.0,167131.0,168668.0,170189.0,171392.0,172564.0,173922.0,175229.0,176724.0,178013.0,179216.0,180356.0,181376.0,182368.0,183347.0,184191.0,185042.0,185902.0,186655.0
4,Andorra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,13991.0,14021.0,14050.0,14075.0,14075.0,14075.0,14155.0,14167.0,14167.0,14239.0,14273.0,14273.0,14273.0,14359.0,14379.0,14379.0,14464.0,14498.0,14498.0,14498.0,14577.0,14586.0,14586.0,14655.0,14678.0,14678.0,14678.0,14747.0,14766.0,14797.0,14809.0,14836.0,14836.0,14836.0,14836.0,14873.0,14891.0,14908.0,14924.0,14924.0


Let's use the flags in our plot for countries

In [62]:
file_name = "https://raw.githubusercontent.com/rajeevratan84/covid/master/flags_world.csv"
flags = pd.read_csv(file_name)
flags

Unnamed: 0,country,Image URL
0,Afghanistan,https://upload.wikimedia.org/wikipedia/commons...
1,Angola,https://cdn.countryflags.com/thumbs/angola/fla...
2,Albania,https://cdn.countryflags.com/thumbs/albania/fl...
3,Andorra,https://cdn.countryflags.com/thumbs/andorra/fl...
4,Argentina,https://cdn.countryflags.com/thumbs/argentina/...
...,...,...
203,Kosovo,https://cdn.countryflags.com/thumbs/kosovo/fla...
204,"Yemen, Rep.",https://flagpedia.net/data/flags/w580/ye.png
205,South Africa,https://cdn.countryflags.com/thumbs/south-afri...
206,Zambia,https://cdn.countryflags.com/thumbs/zambia/fla...


## Merging our data with the flag URL dataset

In [63]:
# merge flag onto flourish df
df_flourish = pd.merge(df_flourish, flags, left_on='location', right_on='country')

df_flourish.head()

Unnamed: 0,location,20/01/22,20/01/23,20/01/24,20/01/25,20/01/26,20/01/27,20/01/28,20/01/29,20/01/30,20/01/31,20/02/01,20/02/02,20/02/03,20/02/04,20/02/05,20/02/06,20/02/07,20/02/08,20/02/09,20/02/10,20/02/11,20/02/12,20/02/13,20/02/14,20/02/15,20/02/16,20/02/17,20/02/18,20/02/19,20/02/20,20/02/21,20/02/22,20/02/23,20/02/24,20/02/25,20/02/26,20/02/27,20/02/28,20/02/29,...,21/07/08,21/07/09,21/07/10,21/07/11,21/07/12,21/07/13,21/07/14,21/07/15,21/07/16,21/07/17,21/07/18,21/07/19,21/07/20,21/07/21,21/07/22,21/07/23,21/07/24,21/07/25,21/07/26,21/07/27,21/07/28,21/07/29,21/07/30,21/07/31,21/08/01,21/08/02,21/08/03,21/08/04,21/08/05,21/08/06,21/08/07,21/08/08,21/08/09,21/08/10,21/08/11,21/08/12,21/08/13,21/08/14,country,Image URL
0,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,...,130113.0,131586.0,131586.0,131586.0,134653.0,134653.0,136643.0,137853.0,137853.0,137853.0,137853.0,141489.0,142414.0,142414.0,143183.0,143439.0,143439.0,143439.0,144285.0,145008.0,145552.0,145996.0,146523.0,146523.0,146523.0,147985.0,148572.0,148933.0,149361.0,149810.0,149810.0,149810.0,150778.0,151013.0,151291.0,151563.0,151770.0,151770.0,Afghanistan,https://upload.wikimedia.org/wikipedia/commons...
1,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,132565.0,132580.0,132587.0,132592.0,132597.0,132608.0,132616.0,132629.0,132647.0,132665.0,132686.0,132697.0,132740.0,132763.0,132797.0,132828.0,132853.0,132875.0,132891.0,132922.0,132952.0,132999.0,133036.0,133081.0,133121.0,133146.0,133211.0,133310.0,133442.0,133591.0,133730.0,133912.0,133981.0,134201.0,134487.0,134761.0,135140.0,135550.0,Albania,https://cdn.countryflags.com/thumbs/albania/fl...
2,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,...,143652.0,144483.0,145296.0,146064.0,146942.0,147883.0,148797.0,149906.0,151103.0,152210.0,153309.0,154486.0,155784.0,157005.0,158213.0,159563.0,160868.0,162155.0,163660.0,165204.0,167131.0,168668.0,170189.0,171392.0,172564.0,173922.0,175229.0,176724.0,178013.0,179216.0,180356.0,181376.0,182368.0,183347.0,184191.0,185042.0,185902.0,186655.0,Algeria,https://cdn.countryflags.com/thumbs/algeria/fl...
3,Andorra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14050.0,14075.0,14075.0,14075.0,14155.0,14167.0,14167.0,14239.0,14273.0,14273.0,14273.0,14359.0,14379.0,14379.0,14464.0,14498.0,14498.0,14498.0,14577.0,14586.0,14586.0,14655.0,14678.0,14678.0,14678.0,14747.0,14766.0,14797.0,14809.0,14836.0,14836.0,14836.0,14836.0,14873.0,14891.0,14908.0,14924.0,14924.0,Andorra,https://cdn.countryflags.com/thumbs/andorra/fl...
4,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,39593.0,39791.0,39881.0,39958.0,40055.0,40138.0,40327.0,40530.0,40631.0,40707.0,40805.0,40906.0,41061.0,41227.0,41405.0,41629.0,41736.0,41780.0,41879.0,42110.0,42288.0,42486.0,42646.0,42777.0,42815.0,42970.0,43070.0,43158.0,43269.0,43487.0,43592.0,43662.0,43747.0,43890.0,43998.0,44174.0,44328.0,44534.0,Angola,https://cdn.countryflags.com/thumbs/angola/fla...


In [44]:
# bring image url to second column between location and jan 1
# we have extra country column we don't need

Generating our final file for flourish

In [64]:
from datetime import datetime

# use current date for timestamped filename 
dateTimeObj = datetime.now()
timestampStr = dateTimeObj.strftime('%d_%b_%Y')

# delete unnecessary columns
# del df_flourish['country']

# Create a series to hold the image urls
mid = df_flourish['Image URL']

# delete the existing image urls
df_flourish.drop(labels=['Image URL'], axis=1, inplace=True)

# Insert Mid into the second position in the dataframe 
df_flourish.insert(1, 'Image URL', mid)

# Give the CSV a name with the current timestamp
csv_name = 'world_covid_cases_' +timestampStr + '.csv'
df_flourish.to_csv(csv_name)
from google.colab import files
files.download(csv_name)
df_flourish.head()

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,location,Image URL,20/01/22,20/01/23,20/01/24,20/01/25,20/01/26,20/01/27,20/01/28,20/01/29,20/01/30,20/01/31,20/02/01,20/02/02,20/02/03,20/02/04,20/02/05,20/02/06,20/02/07,20/02/08,20/02/09,20/02/10,20/02/11,20/02/12,20/02/13,20/02/14,20/02/15,20/02/16,20/02/17,20/02/18,20/02/19,20/02/20,20/02/21,20/02/22,20/02/23,20/02/24,20/02/25,20/02/26,20/02/27,20/02/28,...,21/07/07,21/07/08,21/07/09,21/07/10,21/07/11,21/07/12,21/07/13,21/07/14,21/07/15,21/07/16,21/07/17,21/07/18,21/07/19,21/07/20,21/07/21,21/07/22,21/07/23,21/07/24,21/07/25,21/07/26,21/07/27,21/07/28,21/07/29,21/07/30,21/07/31,21/08/01,21/08/02,21/08/03,21/08/04,21/08/05,21/08/06,21/08/07,21/08/08,21/08/09,21/08/10,21/08/11,21/08/12,21/08/13,21/08/14,country
0,Afghanistan,https://upload.wikimedia.org/wikipedia/commons...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,...,129021.0,130113.0,131586.0,131586.0,131586.0,134653.0,134653.0,136643.0,137853.0,137853.0,137853.0,137853.0,141489.0,142414.0,142414.0,143183.0,143439.0,143439.0,143439.0,144285.0,145008.0,145552.0,145996.0,146523.0,146523.0,146523.0,147985.0,148572.0,148933.0,149361.0,149810.0,149810.0,149810.0,150778.0,151013.0,151291.0,151563.0,151770.0,151770.0,Afghanistan
1,Albania,https://cdn.countryflags.com/thumbs/albania/fl...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,132557.0,132565.0,132580.0,132587.0,132592.0,132597.0,132608.0,132616.0,132629.0,132647.0,132665.0,132686.0,132697.0,132740.0,132763.0,132797.0,132828.0,132853.0,132875.0,132891.0,132922.0,132952.0,132999.0,133036.0,133081.0,133121.0,133146.0,133211.0,133310.0,133442.0,133591.0,133730.0,133912.0,133981.0,134201.0,134487.0,134761.0,135140.0,135550.0,Albania
2,Algeria,https://cdn.countryflags.com/thumbs/algeria/fl...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,...,143032.0,143652.0,144483.0,145296.0,146064.0,146942.0,147883.0,148797.0,149906.0,151103.0,152210.0,153309.0,154486.0,155784.0,157005.0,158213.0,159563.0,160868.0,162155.0,163660.0,165204.0,167131.0,168668.0,170189.0,171392.0,172564.0,173922.0,175229.0,176724.0,178013.0,179216.0,180356.0,181376.0,182368.0,183347.0,184191.0,185042.0,185902.0,186655.0,Algeria
3,Andorra,https://cdn.countryflags.com/thumbs/andorra/fl...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,14021.0,14050.0,14075.0,14075.0,14075.0,14155.0,14167.0,14167.0,14239.0,14273.0,14273.0,14273.0,14359.0,14379.0,14379.0,14464.0,14498.0,14498.0,14498.0,14577.0,14586.0,14586.0,14655.0,14678.0,14678.0,14678.0,14747.0,14766.0,14797.0,14809.0,14836.0,14836.0,14836.0,14836.0,14873.0,14891.0,14908.0,14924.0,14924.0,Andorra
4,Angola,https://cdn.countryflags.com/thumbs/angola/fla...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,39491.0,39593.0,39791.0,39881.0,39958.0,40055.0,40138.0,40327.0,40530.0,40631.0,40707.0,40805.0,40906.0,41061.0,41227.0,41405.0,41629.0,41736.0,41780.0,41879.0,42110.0,42288.0,42486.0,42646.0,42777.0,42815.0,42970.0,43070.0,43158.0,43269.0,43487.0,43592.0,43662.0,43747.0,43890.0,43998.0,44174.0,44328.0,44534.0,Angola


Index(['07/12/21', '07/13/20', '07/13/21', '07/14/20', '07/14/21', '07/15/20',
       '07/15/21', '07/16/20', '07/16/21', '07/17/20',
       ...
       '12/23/20', '12/24/20', '12/25/20', '12/26/20', '12/27/20', '12/28/20',
       '12/29/20', '12/30/20', '12/31/20', 'country'],
      dtype='object', length=207)

In [None]:
# f.to_csv('sample.csv')
# from google.colab import files
# files.download("sample.csv")