In [40]:
import pandas as pd

import plotly.graph_objects as go

import pickle

In [41]:
base_url = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/"
new_base_url = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/'
#'https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_time_series/'

def loadData(fileName, columnName):
    data = pd.read_csv(base_url + fileName) \
             .melt(id_vars=['Province/State', 'Country/Region', 'Lat', 'Long'], var_name='date', value_name=columnName) \
             .fillna('<all>')
    data['date'] = data['date'].astype('datetime64[ns]')
    return data

In [42]:
# Getting data
all_data = loadData("time_series_covid19_confirmed_global.csv", "CumConfirmed") \
    .merge(loadData("time_series_covid19_deaths_global.csv", "CumDeaths"))

all_data.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,date,CumConfirmed,CumDeaths
0,<all>,Afghanistan,33.0,65.0,2020-01-22,0,0
1,<all>,Albania,41.1533,20.1683,2020-01-22,0,0
2,<all>,Algeria,28.0339,1.6596,2020-01-22,0,0
3,<all>,Andorra,42.5063,1.5218,2020-01-22,0,0
4,<all>,Angola,-11.2027,17.8739,2020-01-22,0,0


In [43]:
grouped_countries = all_data.groupby('Country/Region').agg({'CumConfirmed': 'max', 'CumDeaths': 'max'}).reset_index()
grouped_countries.head()

Unnamed: 0,Country/Region,CumConfirmed,CumDeaths
0,Afghanistan,281,6
1,Albania,304,17
2,Algeria,1171,105
3,Andorra,439,16
4,Angola,8,2


In [44]:
# Opening pickled age dictionary
with open('./pickled_files/age_dict.pkl', 'rb') as f:
    age_dict = pickle.load(f)
age_dict

{'Aruba': 13.5509471396854,
 'Afghanistan': 2.58492693988024,
 'Angola': 2.21637364776329,
 'Albania': 13.7447359109736,
 'Andorra': nan,
 'Arab World': 4.55787642361103,
 'United Arab Emirates': 1.0850009848448101,
 'Argentina': 11.1177888760774,
 'Armenia': 11.2538176570131,
 'American Samoa': nan,
 'Antigua and Barbuda': 8.79982551980558,
 'Australia': 15.656475227559099,
 'Austria': 19.0015664595899,
 'Azerbaijan': 6.195182750714929,
 'Burundi': 2.24694046033717,
 'Belgium': 18.7887437383395,
 'Benin': 3.25360529746338,
 'Burkina Faso': 2.4069808286635497,
 'Bangladesh': 5.15839063962068,
 'Bulgaria': 21.021914434268,
 'Bahrain': 2.42633387832394,
 'Bahamas, The': 7.25760235662035,
 'Bosnia and Herzegovina': 16.4703174710621,
 'Belarus': 14.8451481743098,
 'Belize': 4.736458776571441,
 'Bermuda': nan,
 'Bolivia': 7.19194739218447,
 'Brazil': 8.92283783244003,
 'Barbados': 15.802693962133802,
 'Brunei Darussalam': 4.8731475675058205,
 'Bhutan': 6.003011712805611,
 'Botswana': 4.2238

In [45]:
# Mapping mean_ages with country
grouped_countries['pop_65'] = grouped_countries['Country/Region'].map(age_dict)
grouped_countries.set_index('Country/Region', inplace=True)
grouped_countries.head()

Unnamed: 0_level_0,CumConfirmed,CumDeaths,pop_65
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Afghanistan,281,6,2.584927
Albania,304,17,13.744736
Algeria,1171,105,6.362497
Andorra,439,16,
Angola,8,2,2.216374


In [47]:
# Checking for missing values
grouped_countries[grouped_countries.pop_65.isna()]

Unnamed: 0_level_0,CumConfirmed,CumDeaths,pop_65
Country/Region,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Andorra,439,16,
Bahamas,24,1,
Brunei,134,1,
Burma,20,1,
Congo (Brazzaville),22,2,
Congo (Kinshasa),134,13,
Czechia,4091,53,
Diamond Princess,712,11,
Dominica,12,0,
Egypt,985,66,
