In [1]:
import pandas as pd
import tqdm.notebook
import util

---
## Load data downloaded from CSSE repository
- [COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE)](https://github.com/CSSEGISandData/COVID-19/tree/master/csse_covid_19_data/csse_covid_19_daily_reports)

In [2]:
data_list = util.get_data_list('data/')
len(data_list)

1025

In [3]:
df_all = util.extract_confirmed(data_list[0])
for filepath in tqdm.tqdm(data_list[1:]):
    df = util.extract_confirmed(filepath)
    df_all = pd.merge(df_all, df, on='Country_Region', how='outer')

100%|██████████| 1024/1024 [01:25<00:00, 12.01it/s]


---
## Process data

In [4]:
df_all

Unnamed: 0,Country_Region,2022.01.21,2020.02.26,2022.01.20,2020.02.27,2021.07.04,2021.07.05,2022.02.12,2022.02.13,2021.12.31,...,2021.10.12,2021.07.29,2021.09.30,2021.07.28,2022.03.03,2022.03.02,2021.05.26,2021.05.27,2021.06.15,2021.06.14
0,Afghanistan,159516.0,5.0,159303.0,5.0,124748.0,125937.0,170152.0,170604.0,158084.0,...,155599.0,145996.0,155174.0,145552.0,174214.0,174073.0,67743.0,68366.0,93272.0,91458.0
1,Albania,244182.0,,241512.0,,132535.0,132537.0,267551.0,268008.0,210224.0,...,175664.0,132999.0,170131.0,132952.0,271825.0,271825.0,132244.0,132264.0,132469.0,132461.0
2,Algeria,232325.0,1.0,230470.0,1.0,141471.0,141966.0,261226.0,261752.0,218432.0,...,204790.0,168668.0,203359.0,167131.0,265130.0,265079.0,127646.0,127926.0,134115.0,133742.0
3,Andorra,33025.0,,32201.0,,13918.0,13918.0,37140.0,37140.0,23740.0,...,15307.0,14655.0,15222.0,14586.0,38342.0,38249.0,13671.0,13682.0,13828.0,13826.0
4,Angola,95676.0,,95220.0,,39230.0,39300.0,98514.0,98514.0,81593.0,...,61794.0,42486.0,56583.0,42288.0,98746.0,98746.0,33338.0,33607.0,36921.0,36790.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,Curacao,,,,,,,,,,...,,,,,,,,,,
243,Cape Verde,,,,,,,,,,...,,,,,,,,,,
244,East Timor,,,,,,,,,,...,,,,,,,,,,
245,St. Martin,,,,,,,,,,...,,,,,,,,,,


In [5]:
df = df_all.copy().fillna(0)
df = df.reindex(sorted(df_all.columns[1:]), axis=1)
df.insert(
        loc=0, 
        column='Country_Region', 
        value=df_all['Country_Region']
    )
df

Unnamed: 0,Country_Region,2020.01.22,2020.01.23,2020.01.24,2020.01.25,2020.01.26,2020.01.27,2020.01.28,2020.01.29,2020.01.30,...,2022.11.02,2022.11.03,2022.11.04,2022.11.05,2022.11.06,2022.11.07,2022.11.08,2022.11.09,2022.11.10,2022.11.11
0,Afghanistan,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,203265.0,203395.0,203497.0,203574.0,203681.0,203829.0,203942.0,204094.0,204287.0,204392.0
1,Albania,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,332996.0,332996.0,333027.0,333046.0,333055.0,333058.0,333071.0,333088.0,333103.0,333125.0
2,Algeria,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,270840.0,270847.0,270856.0,270862.0,270873.0,270881.0,270891.0,270906.0,270917.0,270924.0
3,Andorra,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,46588.0,46588.0,46588.0,46588.0,46588.0,46588.0,46588.0,46664.0,46664.0,46664.0
4,Angola,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,Curacao,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
243,Cape Verde,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
244,East Timor,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
245,St. Martin,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
df.to_csv('intermediate/processed_data.csv')

---
## Retrieve country flags
- [countryflags](https://www.countryflags.com/)

In [7]:
df = pd.read_csv('intermediate/processed_data.csv', index_col=0)

In [8]:
flag_urls_series = df['Country_Region'].apply(util.make_flag_url)

df_flags = df.copy()
df_flags.insert(1, 'flag_url', flag_urls_series)
df_flags.loc[df['Country_Region'] == 'Others', 'flag_url'] = '' # Remove others' flag url
df_flags

Unnamed: 0,Country_Region,flag_url,2020.01.22,2020.01.23,2020.01.24,2020.01.25,2020.01.26,2020.01.27,2020.01.28,2020.01.29,...,2022.11.02,2022.11.03,2022.11.04,2022.11.05,2022.11.06,2022.11.07,2022.11.08,2022.11.09,2022.11.10,2022.11.11
0,Afghanistan,https://cdn.countryflags.com/thumbs/afghanista...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,203265.0,203395.0,203497.0,203574.0,203681.0,203829.0,203942.0,204094.0,204287.0,204392.0
1,Albania,https://cdn.countryflags.com/thumbs/albania/fl...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,332996.0,332996.0,333027.0,333046.0,333055.0,333058.0,333071.0,333088.0,333103.0,333125.0
2,Algeria,https://cdn.countryflags.com/thumbs/algeria/fl...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,270840.0,270847.0,270856.0,270862.0,270873.0,270881.0,270891.0,270906.0,270917.0,270924.0
3,Andorra,https://cdn.countryflags.com/thumbs/andorra/fl...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,46588.0,46588.0,46588.0,46588.0,46588.0,46588.0,46588.0,46664.0,46664.0,46664.0
4,Angola,https://cdn.countryflags.com/thumbs/angola/fla...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
242,Curacao,https://cdn.countryflags.com/thumbs/curacao/fl...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
243,Cape Verde,https://cdn.countryflags.com/thumbs/cape-verde...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
244,East Timor,https://cdn.countryflags.com/thumbs/east-timor...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
245,St. Martin,https://cdn.countryflags.com/thumbs/st.-martin...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
df_flags.to_csv('intermediate/result_flags.csv')

---
## Retrieve conitnent and population data
- [Coronavirus Pandemic Data](https://ourworldindata.org/coronavirus)

In [10]:
# Load Our World In Data (OWID)
## It's a large file so I didn't upload to Github.
owid = pd.read_csv('owid-covid-data.csv')
owid.head()

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,population,excess_mortality_cumulative_absolute,excess_mortality_cumulative,excess_mortality,excess_mortality_cumulative_per_million
0,AFG,Asia,Afghanistan,2020-02-24,5.0,5.0,,,,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
1,AFG,Asia,Afghanistan,2020-02-25,5.0,0.0,,,,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
2,AFG,Asia,Afghanistan,2020-02-26,5.0,0.0,,,,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
3,AFG,Asia,Afghanistan,2020-02-27,5.0,0.0,,,,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,
4,AFG,Asia,Afghanistan,2020-02-28,5.0,0.0,,,,,...,,37.746,0.5,64.83,0.511,41128772.0,,,,


In [11]:
# Process data
df_cont = owid.copy()[['continent', 'location']]
df_cont = (
    df_cont
    [~df_cont['continent'].isna()]
    .drop_duplicates('location')
    .rename(columns={'location': 'Country_Region'})
    .reset_index(drop=True)
)

# To keep 'United States' name consistent
df_cont.loc[df_cont['Country_Region'] == 'United States', 'Country_Region'] = 'US'
df_cont

Unnamed: 0,continent,Country_Region
0,Asia,Afghanistan
1,Europe,Albania
2,Africa,Algeria
3,Europe,Andorra
4,Africa,Angola
...,...,...
230,Oceania,Wallis and Futuna
231,Africa,Western Sahara
232,Asia,Yemen
233,Africa,Zambia


In [12]:
df_flags = pd.read_csv('intermediate/result_flags.csv', index_col=0)
df_flags_cont = df_cont.set_index('Country_Region').join(df_flags.set_index('Country_Region'), how='inner')
df_flags_cont

Unnamed: 0_level_0,continent,flag_url,2020.01.22,2020.01.23,2020.01.24,2020.01.25,2020.01.26,2020.01.27,2020.01.28,2020.01.29,...,2022.11.02,2022.11.03,2022.11.04,2022.11.05,2022.11.06,2022.11.07,2022.11.08,2022.11.09,2022.11.10,2022.11.11
Country_Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Afghanistan,Asia,https://cdn.countryflags.com/thumbs/afghanista...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,203265.0,203395.0,203497.0,203574.0,203681.0,203829.0,203942.0,204094.0,204287.0,204392.0
Albania,Europe,https://cdn.countryflags.com/thumbs/albania/fl...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,332996.0,332996.0,333027.0,333046.0,333055.0,333058.0,333071.0,333088.0,333103.0,333125.0
Algeria,Africa,https://cdn.countryflags.com/thumbs/algeria/fl...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,270840.0,270847.0,270856.0,270862.0,270873.0,270881.0,270891.0,270906.0,270917.0,270924.0
Andorra,Europe,https://cdn.countryflags.com/thumbs/andorra/fl...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,46588.0,46588.0,46588.0,46588.0,46588.0,46588.0,46588.0,46664.0,46664.0,46664.0
Angola,Africa,https://cdn.countryflags.com/thumbs/angola/fla...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0,103131.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Venezuela,South America,https://cdn.countryflags.com/thumbs/venezuela/...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,545926.0,545951.0,545963.0,546047.0,546110.0,546110.0,546122.0,546202.0,546202.0,546262.0
Vietnam,Asia,https://cdn.countryflags.com/thumbs/vietnam/fl...,0.0,2.0,2.0,2.0,2.0,2.0,2.0,2.0,...,11504091.0,11504910.0,11505249.0,11505608.0,11505849.0,11506214.0,11506656.0,11507124.0,11507540.0,11508170.0
Yemen,Asia,https://cdn.countryflags.com/thumbs/yemen/flag...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,11944.0,11944.0,11945.0,11945.0,11945.0,11945.0,11945.0,11945.0,11945.0,11945.0
Zambia,Africa,https://cdn.countryflags.com/thumbs/zambia/fla...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,333684.0,333685.0,333685.0,333685.0,333685.0,333685.0,333685.0,333685.0,333685.0,333685.0


In [13]:
df_flags_cont.to_csv('out/result_flags_cont.csv')

### Completed! 
### It's time for visualization!
1. Go to [Flourish](https://flourish.studio/).
2. Sign in and choose 'Bar chart race'.
3. Upload `result_flags_cont.csv`.
4. Modify the options on the right side bar as you like.

---
There's another Way to get CSSE data: Use [Akshare datasets](https://www.akshare.xyz/tutorial.html#id1).

In [14]:
import akshare
data_ak = akshare.covid_19_csse_global_confirmed()
data_ak.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,11/7/22,11/8/22,11/9/22,11/10/22,11/11/22,11/12/22,11/13/22,11/14/22,11/15/22,11/16/22
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,203829,203942,204094,204287,204392,204417,204510,204610,204724,204820
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,333058,333071,333088,333103,333125,333138,333156,333161,333197,333215
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,270881,270891,270906,270917,270924,270929,270939,270952,270969,270981
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,46588,46588,46664,46664,46664,46664,46664,46664,46664,46824
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,103131,103131,103131,103131,103131,103131,103131,103131,103131,103131
