# covid stuff

----------------------------------------

- **created** by z: `2020-03-30`
- last **updated**: `2020-04-02T11:25:14PDT`

## _preamble_

#### import packages

In [51]:
import pathlib
import requests
import re
import math
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt

#### disable request warning

In [52]:
from requests.packages.urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)

#### directories

In [53]:
# cwd = pathlib.Path.cwd()
# cwd
# docs_dir = pathlib.Path("/private/var/mobile/Library/Mobile Documents/iCloud~AsheKube~Carnets/Documents")
# data_dir = docs_dir / "data"

# where data will be saved/loaded
data_dir = pathlib.Path("/Users/zarek/Dropbox/code/github/zcovid/data")
if not data_dir.is_dir():
    data_dir.mkdir()
    print(">>> created dir {}".format(data_dir))

#### URLs

In [54]:
# base URL for data downloads
base_tsdata_url = "https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data"

#### regexes

In [55]:
date_re = re.compile(r"\d+/\d+/\d+")

#### time-series keys

In [56]:
# for file names and saving data
tskeys = [
    'confirmed_US',
    'confirmed_global',
    'deaths_US',
    'deaths_global',
    'recovered_global'
]

#### utility functions

In [57]:
# sinum(): format number SI

SINUM_PREFIXES = {
    -6: {'short': "a", 'long': "atto"},
    -5: {'short': "f", 'long': "femto"},
    -4: {'short': "p", 'long': "pico"},
    -3: {'short': "n", 'long': "nano"},
    -2: {'short': "μ", 'long': "micro"},
    -1: {'short': "m", 'long': "milli"},
    0: {'short': " ", 'long': "-"},
    1: {'short': "k", 'long': "kilo"},
    2: {'short': "M", 'long': "mega"},
    3: {'short': "G", 'long': "giga"},
    4: {'short': "T", 'long': "tera"},
    5: {'short': "P", 'long': "peta"},
    6: {'short': "E", 'long': "exa"}
}

def sinum(num, unit='B', fmt="{coef:.3f} {pfx}{unit}", long_pfx=False, strip_zeros=True, binary=False, verbose=False):

    # check inputs
    assert isinstance(num, (int, float))
    assert isinstance(unit, str)
    
    # verbosity...
    if verbose:
        print(f">>> sinum(num={num!r}, unit={unit!r}, fmt={fmt!r}, long_pfx={long_pfx}, strip_zeros={strip_zeros}, binary={binary}, verbose={verbose})")
    def _verb(name, value):
        if verbose:
            print("\t{:<12s} = {:>20s}".format(
                name, 
                value if isinstance(value, str) else repr(value)
            ))
            
    # binary mods
    if binary:
        log_base = 1024
        unit = f"i{unit!s}"        
    else:
        log_base = 1000
        unit = str(unit)
        
    # order of magnitude
    if num == 0:
        oom = 0
    else:
        oom = math.floor(math.log(-num if num < 0 else num, log_base))
    if oom < -6:
        oom = -6
    if oom > 6:
        oom = 6
    _verb('oom', oom)
    
    # coefficient
    coef = num / (log_base ** oom)
    _verb('coef', coef)
    
    # SI prefix
    pfx = SINUM_PREFIXES[oom]['long' if long_pfx else 'short']
    _verb('pfx', pfx)
    
    # string out
    out = fmt.format(coef=coef, pfx=pfx, unit=unit)
    # if strip_zeros:
    #     while re.match(r"\d+\.0{2,}", out):
    #         # out = re.sub(r"(?:\d+(?:\.0*)?)0", ' ', out)
    #         out = re.sub(r"(\d+\.0*?)(0)\b", '\1 ', out)
    _verb('out', out)

    return out


# tests
if False:
    print(sinum(82457891234, verbose=True), end='\n\n')
    print(sinum(82457891234, verbose=True, unit=''), end='\n\n')
    print(sinum(82457891234, verbose=True, unit='bloops'), end='\n\n')
    print(sinum(82457891234, binary=True, verbose=True), end='\n\n')
    print(sinum(0.00082457891234, verbose=True), end='\n\n')
    print(sinum(824578912342345.2345, verbose=True), end='\n\n')
    print(sinum(0.00082457891234, binary=True, verbose=True), end='\n\n')
    print(sinum(824578912342345.2345, binary=True, verbose=True), end='\n\n')
    print(sinum(824578912342345.2345, binary=True, verbose=True, fmt="{coef:12.5f} // {pfx} // {unit}"), end='\n\n')
    print(sinum(824578912342345.2345, binary=True, fmt="{coef:12.5f} // {pfx} // {unit}"), end='\n\n')
    print(sinum(1, verbose=True), end='\n\n')
    print(sinum(-1, verbose=True), end='\n\n')
    print(sinum(0, verbose=True), end='\n\n')
    print(sinum(-82457891234, verbose=True), end='\n\n')
    print(sinum(8786996786798967896872457891234, verbose=True), end='\n\n')

## load data

#### initialize data container `d`

In [58]:
# a dict with tskeys as keys to uniform dicts
d = {}
for tsk in tskeys:
    d[tsk] = {}
d

{'confirmed_US': {},
 'confirmed_global': {},
 'deaths_US': {},
 'deaths_global': {},
 'recovered_global': {}}

#### download data from github, save to file

In [59]:
# same load and save process for each tskey
print(f">>> loading CSVs, saving in ``{data_dir}''\n")
for tsk in tskeys:
    print(">>> getting data for '{}'".format(tsk))
    d[tsk]['url'] = f"{base_tsdata_url}/csse_covid_19_time_series/time_series_covid19_{tsk}.csv"
    d[tsk]['req'] = requests.get(d[tsk]['url'], auth=('user', 'pass'))
    d[tsk]['raw'] = d[tsk]['req'].content
    d[tsk]['csv'] = data_dir / f"time_series_covid19_{tsk}.csv"
    if d[tsk]['csv'].is_file() and d[tsk]['csv'].stat().st_size > 0:
        print(f"--> CSV for '{tsk}' will be overwritten")
    with d[tsk]['csv'].open('w') as f:
        print("--> writing ``.../{}'' ... ".format(d[tsk]['csv'].name), end='')
        f.write(d[tsk]['raw'].decode())
        print("wrote {}\n".format(sinum(d[tsk]['csv'].stat().st_size)))
    del d[tsk]['raw']
# d

>>> loading CSVs, saving in ``/Users/zarek/Dropbox/code/github/zcovid/data''

>>> getting data for 'confirmed_US'
--> CSV for 'confirmed_US' will be overwritten
--> writing ``.../time_series_covid19_confirmed_US.csv'' ... wrote 786.770 kB

>>> getting data for 'confirmed_global'
--> CSV for 'confirmed_global' will be overwritten
--> writing ``.../time_series_covid19_confirmed_global.csv'' ... wrote 54.836 kB

>>> getting data for 'deaths_US'
--> CSV for 'deaths_US' will be overwritten
--> writing ``.../time_series_covid19_deaths_US.csv'' ... wrote 796.938 kB

>>> getting data for 'deaths_global'
--> CSV for 'deaths_global' will be overwritten
--> writing ``.../time_series_covid19_deaths_global.csv'' ... wrote 45.681 kB

>>> getting data for 'recovered_global'
--> CSV for 'recovered_global' will be overwritten
--> writing ``.../time_series_covid19_recovered_global.csv'' ... wrote 46.789 kB



#### load data from CSV just saved

In [60]:
# iterate tskeys, loading from CSV saved above
for tsk in tskeys:
    d[tsk]['df'] = pd.read_csv(d[tsk]['csv'])
    print(f"--> read CSV data for {tsk}")
# create backup of d
d_BAK = d.copy()

--> read CSV data for confirmed_US
--> read CSV data for confirmed_global
--> read CSV data for deaths_US
--> read CSV data for deaths_global
--> read CSV data for recovered_global


## clean up data

#### add index columns `d[tsk]['df']` (actual dataframe for the key), then reorder as desired

In [61]:
# column name substitutions
col_subs = {
    'Province_State': 'subregion',
    'Province/State': 'subregion',
    'Country_Region': 'region',
    'Country/Region': 'region',
    'Long_': 'long'    
} 

# columns to move to the beginning (in order)
priority_cols = [
    'locid',
    'region',
    'subregion',
    'combined_key',
    'lat',
    'long',
    'population'
]

# collect all columns since different dataframes dont have same columns
all_indx_cols = []
all_date_cols = []

# iterate through tskeys, cleaning up each
for tsk in tskeys:
    
    print(f">>> cleaning up dataframe for '{tsk}'")
    
    # add other index cols
    d[tsk]['df']['tskey'] = tsk
    d[tsk]['df']['domain'] = tsk.split('_')[1]
    d[tsk]['df']['datum'] = tsk.split('_')[0]
    d[tsk]['df']['locid'] = d[tsk]['df'].index
    
    d[tsk]['all_cols'] = list(d[tsk]['df'].columns)
    
    # clean up column names
    for i, c in enumerate(d[tsk]['all_cols']):
        if c in col_subs:
            c = col_subs[c]
        c = c.lower()
        d[tsk]['all_cols'][i] = c
    # print(d[tsk]['all_cols'])
    
    # get column subsets
    d[tsk]['df'].columns = d[tsk]['all_cols']
    d[tsk]['date_cols'] = list(filter(date_re.match, d[tsk]['all_cols']))
    d[tsk]['indx_cols'] = [i for i in d[tsk]['all_cols'] if i not in d[tsk]['date_cols']]

    # reorder columns
    col_idxs = list(range(len(d[tsk]['indx_cols'])))
    for col in priority_cols[::-1]:
        if col in d[tsk]['indx_cols']:
            idx = d[tsk]['indx_cols'].index(col)
            col_idxs.remove(idx)
            col_idxs.insert(0, idx)
    print(col_idxs)
    d[tsk]['indx_cols'] = [d[tsk]['indx_cols'][i] for i in col_idxs]
        
    # add to all_indx_cols
    for col in d[tsk]['indx_cols']:
        if col not in all_indx_cols:
            all_indx_cols.append(col)

    # add to all_date_cols
    for col in d[tsk]['date_cols']:
        if col not in all_date_cols:
            all_date_cols.append(col)

    # save dataframe with reordered columns
    d[tsk]['all_cols'] = [*d[tsk]['indx_cols'], *d[tsk]['date_cols']]
    d[tsk]['df'] = d[tsk]['df'][d[tsk]['all_cols']]

# d[tsk]

print(all_indx_cols)
print(all_date_cols)

d[tsk]['df']


>>> cleaning up dataframe for 'confirmed_US'
[14, 7, 6, 10, 0, 1, 2, 3, 4, 5, 8, 9, 11, 12, 13]
>>> cleaning up dataframe for 'confirmed_global'
[7, 1, 0, 2, 3, 4, 5, 6]
>>> cleaning up dataframe for 'deaths_US'
[15, 7, 6, 10, 0, 1, 2, 3, 4, 5, 8, 9, 11, 12, 13, 14]
>>> cleaning up dataframe for 'deaths_global'
[7, 1, 0, 2, 3, 4, 5, 6]
>>> cleaning up dataframe for 'recovered_global'
[7, 1, 0, 2, 3, 4, 5, 6]
['locid', 'region', 'subregion', 'combined_key', 'uid', 'iso2', 'iso3', 'code3', 'fips', 'admin2', 'lat', 'long', 'tskey', 'domain', 'datum', 'population']
['1/22/20', '1/23/20', '1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20', '1/30/20', '1/31/20', '2/1/20', '2/2/20', '2/3/20', '2/4/20', '2/5/20', '2/6/20', '2/7/20', '2/8/20', '2/9/20', '2/10/20', '2/11/20', '2/12/20', '2/13/20', '2/14/20', '2/15/20', '2/16/20', '2/17/20', '2/18/20', '2/19/20', '2/20/20', '2/21/20', '2/22/20', '2/23/20', '2/24/20', '2/25/20', '2/26/20', '2/27/20', '2/28/20', '2/29/20', '3/1/20', '

Unnamed: 0,locid,region,subregion,lat,long,tskey,domain,datum,1/22/20,1/23/20,...,3/23/20,3/24/20,3/25/20,3/26/20,3/27/20,3/28/20,3/29/20,3/30/20,3/31/20,4/1/20
0,0,Afghanistan,,33.000000,65.000000,recovered_global,global,recovered,0,0,...,1,1,2,2,2,2,2,2,5,5
1,1,Albania,,41.153300,20.168300,recovered_global,global,recovered,0,0,...,2,10,17,17,31,31,33,44,52,67
2,2,Algeria,,28.033900,1.659600,recovered_global,global,recovered,0,0,...,65,24,65,29,29,31,31,37,46,61
3,3,Andorra,,42.506300,1.521800,recovered_global,global,recovered,0,0,...,1,1,1,1,1,1,1,10,10,10
4,4,Angola,,-11.202700,17.873900,recovered_global,global,recovered,0,0,...,0,0,0,0,0,0,0,0,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
237,237,United Kingdom,Turks and Caicos Islands,21.694000,-71.797900,recovered_global,global,recovered,0,0,...,0,0,0,0,0,0,0,0,0,0
238,238,MS Zaandam,,0.000000,0.000000,recovered_global,global,recovered,0,0,...,0,0,0,0,0,0,0,0,0,0
239,239,Botswana,,-22.328500,24.684900,recovered_global,global,recovered,0,0,...,0,0,0,0,0,0,0,0,0,0
240,240,Burundi,,-3.373100,29.918900,recovered_global,global,recovered,0,0,...,0,0,0,0,0,0,0,0,0,0


#### create backups of `d[tsk]['df']`

In [62]:
for tsk in tskeys:
    # print(d[tsk]['indx_cols'])
    # if type(d[tsk]['df'].columns).__name__ == 'Index':
    if ('df_BAK' not in d[tsk]) or isinstance(d[tsk]['df'].columns, pd.Index):
        d[tsk]['df_BAK'] = d[tsk]['df'].copy()

#### convert `d[tsk]['df']` such that rows are dates and columns are multi-index

In [94]:
for tsk in tskeys:
    
    # create multiindex dataframe (df with just index cols)
    mindx_df = d[tsk]['df_BAK'][d[tsk]['indx_cols']]
    # create multiindex
    mindx = pd.MultiIndex.from_frame(mindx_df)
    # create new dataframe, old df transposed
    d[tsk]['df'] = d[tsk]['df_BAK'][d[tsk]['date_cols']].transpose()
    # add the new multiindex
    d[tsk]['df'].columns = mindx
    # convert index from str to datetime
    d[tsk]['df'].index = pd.to_datetime(d[tsk]['df'].index)
    
# mindx_df
# mindx

# d[tskeys[0]]['df']

#### add levels to multi-index of `d[tsk]['df']` so they are uniform across all

In [95]:
for tsk in tskeys:  # [tskeys[1]]
    # mindx = d[tsk]['df'].columns
    # mindx_df = mindx.to_frame()
    # mindx_df = mindx_df.reindex(columns=all_indx_cols)                
    # # for col in all_indx_cols:
    # #     if col not in mindx_df.columns:
    # #         mindx_df = mindx_df.assign(**{col: np.nan})
    # mindx = pd.MultiIndex.from_frame(mindx_df)
    # d[tsk]['df'].columns = mindx
    d[tsk]['df'].columns = pd.MultiIndex.from_frame(
        d[tsk]['df'].columns
            .to_frame()
            .reindex(columns=all_indx_cols)
    )

# print(all_indx_cols)
# print(mindx_df.columns)
# mindx_df
d[tskeys[3]]['df']

locid,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Australia,...,Canada,Kosovo,Burma,United Kingdom,United Kingdom,United Kingdom,MS Zaandam,Botswana,Burundi,Sierra Leone
subregion,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Australian Capital Territory,New South Wales,...,Yukon,NaN,NaN,Anguilla,British Virgin Islands,Turks and Caicos Islands,NaN,NaN,NaN,NaN
combined_key,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
uid,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
iso2,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
iso3,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
code3,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
fips,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
admin2,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
lat,33.000000,41.153300,28.033900,42.506300,-11.202700,17.060800,-38.416100,40.069100,-35.473500,-33.868800,...,64.282300,42.602636,21.916200,18.220600,18.420700,21.694000,0.000000,-22.328500,-3.373100,8.460555
long,65.000000,20.168300,1.659600,1.521800,17.873900,-61.796400,-63.616700,45.038200,149.012400,151.209300,...,-135.000000,20.902977,95.956000,-63.068600,-64.640000,-71.797900,0.000000,24.684900,29.918900,-11.779889
tskey,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,...,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global
domain,global,global,global,global,global,global,global,global,global,global,...,global,global,global,global,global,global,global,global,global,global
datum,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,...,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths
population,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
2020-01-22,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-24,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-25,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-26,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-03-28,4,10,29,3,0,0,18,1,0,8,...,0,1,0,0,0,0,0,0,0,0
2020-03-29,4,10,31,6,2,0,19,3,0,8,...,0,1,0,0,0,0,0,0,0,0
2020-03-30,4,11,35,8,2,0,23,3,1,8,...,0,1,0,0,0,0,0,0,0,0
2020-03-31,4,15,44,12,2,0,27,3,1,8,...,0,1,1,0,0,0,0,1,0,0


In [65]:
d['confirmed_US']['df'].join(d['confirmed_global']['df'])



Unnamed: 0,"(0, US, American Samoa, American Samoa, US, 16, AS, ASM, 16, 60.0, nan, -14.270999999999999, -170.132, confirmed_US, US, confirmed)","(1, US, Guam, Guam, US, 316, GU, GUM, 316, 66.0, nan, 13.4443, 144.7937, confirmed_US, US, confirmed)","(2, US, Northern Mariana Islands, Northern Mariana Islands, US, 580, MP, MNP, 580, 69.0, nan, 15.0979, 145.6739, confirmed_US, US, confirmed)","(3, US, Puerto Rico, Puerto Rico, US, 630, PR, PRI, 630, 72.0, nan, 18.2208, -66.5901, confirmed_US, US, confirmed)","(4, US, Virgin Islands, Virgin Islands, US, 850, VI, VIR, 850, 78.0, nan, 18.3358, -64.8963, confirmed_US, US, confirmed)","(5, US, Alabama, Autauga, Alabama, US, 84001001, US, USA, 840, 1001.0, Autauga, 32.53952745, -86.64408227, confirmed_US, US, confirmed)","(6, US, Alabama, Baldwin, Alabama, US, 84001003, US, USA, 840, 1003.0, Baldwin, 30.72774991, -87.72207058, confirmed_US, US, confirmed)","(7, US, Alabama, Barbour, Alabama, US, 84001005, US, USA, 840, 1005.0, Barbour, 31.868263, -85.3871286, confirmed_US, US, confirmed)","(8, US, Alabama, Bibb, Alabama, US, 84001007, US, USA, 840, 1007.0, Bibb, 32.99642064, -87.12511459999996, confirmed_US, US, confirmed)","(9, US, Alabama, Blount, Alabama, US, 84001009, US, USA, 840, 1009.0, Blount, 33.98210918, -86.56790593, confirmed_US, US, confirmed)",...,"(246, Canada, Yukon, 64.2823, -135.0, confirmed_global, global, confirmed)","(247, Kosovo, nan, 42.602636, 20.902977, confirmed_global, global, confirmed)","(248, Burma, nan, 21.9162, 95.956, confirmed_global, global, confirmed)","(249, United Kingdom, Anguilla, 18.2206, -63.0686, confirmed_global, global, confirmed)","(250, United Kingdom, British Virgin Islands, 18.4207, -64.64, confirmed_global, global, confirmed)","(251, United Kingdom, Turks and Caicos Islands, 21.69400000000001, -71.7979, confirmed_global, global, confirmed)","(252, MS Zaandam, nan, 0.0, 0.0, confirmed_global, global, confirmed)","(253, Botswana, nan, -22.3285, 24.6849, confirmed_global, global, confirmed)","(254, Burundi, nan, -3.3731, 29.9189, confirmed_global, global, confirmed)","(255, Sierra Leone, nan, 8.460555000000001, -11.779889, confirmed_global, global, confirmed)"
2020-01-22,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-24,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-25,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-26,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-03-28,0,55,0,100,22,6,10,0,0,5,...,4,91,8,2,2,4,2,0,0,0
2020-03-29,0,56,0,127,0,6,15,0,0,5,...,4,94,10,2,2,4,2,0,0,0
2020-03-30,0,58,0,174,0,6,18,0,2,5,...,4,94,14,2,2,5,2,3,0,0
2020-03-31,0,69,2,239,30,7,19,0,3,5,...,5,112,15,2,3,5,2,4,2,1


In [66]:
# ##### fignum = 0
# fig = plt.figure(fignum)
# plot_data = deaths_global.iloc[:, [0, 1, 2]]
# # plot_data.columns.names
# plot_regions = plot_data.columns[[0, 1, 2]].get_level_values('region')
# plt.plot(plot_data)
# plt.legend(plot_regions)

In [93]:
for tsk in tskeys:  # [tskeys[1]]
    # mindx = d[tsk]['df'].columns
    # mindx_df = mindx.to_frame()
    # mindx_df = mindx_df.reindex(columns=all_indx_cols)                
    # # for col in all_indx_cols:
    # #     if col not in mindx_df.columns:
    # #         mindx_df = mindx_df.assign(**{col: np.nan})
    # mindx = pd.MultiIndex.from_frame(mindx_df)
    # d[tsk]['df'].columns = mindx
    d[tsk]['df'].columns = pd.MultiIndex.from_frame(
        d[tsk]['df'].columns
            .to_frame()
            .reindex(columns=all_indx_cols)
    )



print(all_indx_cols)
print(mindx_df.columns)
# mindx_df
d[tskeys[3]]['df']

['locid', 'region', 'subregion', 'combined_key', 'uid', 'iso2', 'iso3', 'code3', 'fips', 'admin2', 'lat', 'long', 'tskey', 'domain', 'datum', 'population']
Index(['locid', 'region', 'subregion', 'combined_key', 'uid', 'iso2', 'iso3',
       'code3', 'fips', 'admin2', 'lat', 'long', 'tskey', 'domain', 'datum',
       'population'],
      dtype='object')


locid,0,1,2,3,4,5,6,7,8,9,...,246,247,248,249,250,251,252,253,254,255
region,Afghanistan,Albania,Algeria,Andorra,Angola,Antigua and Barbuda,Argentina,Armenia,Australia,Australia,...,Canada,Kosovo,Burma,United Kingdom,United Kingdom,United Kingdom,MS Zaandam,Botswana,Burundi,Sierra Leone
subregion,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,Australian Capital Territory,New South Wales,...,Yukon,NaN,NaN,Anguilla,British Virgin Islands,Turks and Caicos Islands,NaN,NaN,NaN,NaN
combined_key,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
uid,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
iso2,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
iso3,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
code3,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
fips,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
admin2,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
lat,33.000000,41.153300,28.033900,42.506300,-11.202700,17.060800,-38.416100,40.069100,-35.473500,-33.868800,...,64.282300,42.602636,21.916200,18.220600,18.420700,21.694000,0.000000,-22.328500,-3.373100,8.460555
long,65.000000,20.168300,1.659600,1.521800,17.873900,-61.796400,-63.616700,45.038200,149.012400,151.209300,...,-135.000000,20.902977,95.956000,-63.068600,-64.640000,-71.797900,0.000000,24.684900,29.918900,-11.779889
tskey,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,...,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global,deaths_global
domain,global,global,global,global,global,global,global,global,global,global,...,global,global,global,global,global,global,global,global,global,global
datum,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,...,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths,deaths
population,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,...,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN
2020-01-22,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-23,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-24,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-25,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2020-01-26,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-03-28,4,10,29,3,0,0,18,1,0,8,...,0,1,0,0,0,0,0,0,0,0
2020-03-29,4,10,31,6,2,0,19,3,0,8,...,0,1,0,0,0,0,0,0,0,0
2020-03-30,4,11,35,8,2,0,23,3,1,8,...,0,1,0,0,0,0,0,0,0,0
2020-03-31,4,15,44,12,2,0,27,3,1,8,...,0,1,1,0,0,0,0,1,0,0


In [87]:
pd.MultiIndex.from_frame(mindx_df)

MultiIndex([(  0,         'Afghanistan',                            nan, ...),
            (  1,             'Albania',                            nan, ...),
            (  2,             'Algeria',                            nan, ...),
            (  3,             'Andorra',                            nan, ...),
            (  4,              'Angola',                            nan, ...),
            (  5, 'Antigua and Barbuda',                            nan, ...),
            (  6,           'Argentina',                            nan, ...),
            (  7,             'Armenia',                            nan, ...),
            (  8,           'Australia', 'Australian Capital Territory', ...),
            (  9,           'Australia',              'New South Wales', ...),
            ...
            (246,              'Canada',                        'Yukon', ...),
            (247,              'Kosovo',                            nan, ...),
            (248,               'Bur

In [79]:
print(mindx_df.shape)

(256, 8)


In [80]:
mindx_df.assign(a=np.nan)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,locid,region,subregion,lat,long,tskey,domain,datum,a
locid,region,subregion,lat,long,tskey,domain,datum,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,Afghanistan,,33.000000,65.000000,confirmed_global,global,confirmed,0,Afghanistan,,33.000000,65.000000,confirmed_global,global,confirmed,
1,Albania,,41.153300,20.168300,confirmed_global,global,confirmed,1,Albania,,41.153300,20.168300,confirmed_global,global,confirmed,
2,Algeria,,28.033900,1.659600,confirmed_global,global,confirmed,2,Algeria,,28.033900,1.659600,confirmed_global,global,confirmed,
3,Andorra,,42.506300,1.521800,confirmed_global,global,confirmed,3,Andorra,,42.506300,1.521800,confirmed_global,global,confirmed,
4,Angola,,-11.202700,17.873900,confirmed_global,global,confirmed,4,Angola,,-11.202700,17.873900,confirmed_global,global,confirmed,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
251,United Kingdom,Turks and Caicos Islands,21.694000,-71.797900,confirmed_global,global,confirmed,251,United Kingdom,Turks and Caicos Islands,21.694000,-71.797900,confirmed_global,global,confirmed,
252,MS Zaandam,,0.000000,0.000000,confirmed_global,global,confirmed,252,MS Zaandam,,0.000000,0.000000,confirmed_global,global,confirmed,
253,Botswana,,-22.328500,24.684900,confirmed_global,global,confirmed,253,Botswana,,-22.328500,24.684900,confirmed_global,global,confirmed,
254,Burundi,,-3.373100,29.918900,confirmed_global,global,confirmed,254,Burundi,,-3.373100,29.918900,confirmed_global,global,confirmed,
