# Interactive Data Visualization - Week 03 - Experiments with maps

Working on the course project during Week 03. Data wrangling and visualization experiments that will be described in more detail in the learning diary that is this week's core deliverable.

Goal of these experiments is to create an interactive choropleth of COVID-19 data.

## Experiments on choropleths from blog posts

For this to work, the World Bank development indicators data needs to be downloaded: https://www.kaggle.com/worldbank/world-development-indicators#Indicators.csv

In [43]:
# Source: https://medium.com/datadriveninvestor/visualising-geospatial-data-with-python-d3b1c519f31

# First, fetch the data.

import folium
import pandas as pd

country_geo = 'world-countries.json'

data = pd.read_csv('tmp/indicators.csv')

hist_indicator =  'Life expectancy at birth'
hist_year = 2013

mask1 = data['IndicatorName'].str.contains(hist_indicator) 
mask2 = data['Year'].isin([hist_year])

data = data[mask1 & mask2][['CountryCode','Value']]

Unnamed: 0,CountryCode,Value
5377669,ARB,72.536117
5377670,ARB,68.848383
5377671,ARB,70.631305
5378129,CSS,74.757382
5378130,CSS,69.183365
...,...,...
5533190,ZMB,57.592000
5533191,ZMB,59.237366
5533979,ZWE,56.872000
5533980,ZWE,54.453000


In [25]:
# Second, do the mapping.

from IPython.display import HTML

map = folium.Map(location=[100, 0], zoom_start=1.5)

map.choropleth(
    geo_data=country_geo,
    data=data,
    columns=['CountryCode', 'Value'],
    key_on='feature.id',
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name=hist_indicator,
)

# We need to save the interactive map to HTML and read it back to the notebook
choropleth_filename = "choropleth.html"
map.save(choropleth_filename)
HTML('<iframe src="{}" width=700 height=450></iframe>'.format(choropleth_filename))

# Building a choropleth of COVID-19 prevalence

Using the experiments below, let's try to build a COVID-19 choropleth for the JHU data.

In [91]:
# Experiment with converting the country names to country codes
import pycountry

def rebase(country_name):
    country = pycountry.countries.get(name=country_name)
    return country.alpha_3

tmp = [
    ['Germany', 100],
    ['France', 200],
    ['Spain', 300],
]

data = pd.DataFrame(data=tmp, columns=['Country', 'Cases'])
data.Country = data.Country.apply(rebase)

data

Unnamed: 0,Country,Cases
0,DEU,100
1,FRA,200
2,ESP,300


In [96]:
# Imports and data setup

import folium
import pandas as pd
from IPython.display import HTML
import pycountry

country_geo = 'world-countries.json'

datasource = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
data = pd.read_csv(datasource)

data = data.drop(columns=["Lat", "Long"])  # clean out spatial location columns we don't need
# FIXME: Cleaning out countries with regional data; removes e.g. Canada and Australia
data = data[data['Province/State'].isna()].drop(columns=['Province/State'])

data = data.rename(columns={"Country/Region": "Country"})

# Convert the country names
def rebase(country_name):
    country = pycountry.countries.get(name=country_name)
    if not country:
        return 'N/A'  # FIXME
    return country.alpha_3

data.Country = data.Country.apply(rebase)



data


Unnamed: 0,Country,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,4/10/20,4/11/20,4/12/20,4/13/20,4/14/20,4/15/20,4/16/20,4/17/20,4/18/20,4/19/20
0,AFG,0,0,0,0,0,0,0,0,0,...,521,555,607,665,714,784,840,906,933,996
1,ALB,0,0,0,0,0,0,0,0,0,...,416,433,446,467,475,494,518,539,548,562
2,DZA,0,0,0,0,0,0,0,0,0,...,1761,1825,1914,1983,2070,2160,2268,2418,2534,2629
3,AND,0,0,0,0,0,0,0,0,0,...,601,601,638,646,659,673,673,696,704,713
4,AGO,0,0,0,0,0,0,0,0,0,...,19,19,19,19,19,19,19,19,24,24
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
257,MWI,0,0,0,0,0,0,0,0,0,...,9,12,13,16,16,16,16,17,17,17
260,SSD,0,0,0,0,0,0,0,0,0,...,4,4,4,4,4,4,4,4,4,4
261,ESH,0,0,0,0,0,0,0,0,0,...,4,4,6,6,6,6,6,6,6,6
262,STP,0,0,0,0,0,0,0,0,0,...,4,4,4,4,4,4,4,4,4,4


In [99]:
# Render the map


map = folium.Map() #location=[100, 0], zoom_start=1.5)

map.choropleth(
    geo_data=country_geo,
    data=data,
    columns=['Country', '4/19/20'],  # FIXME: Hardwired date!
    key_on='feature.id',
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Cases of COVID-19",
)

# We need to save the interactive map to HTML and read it back to the notebook
choropleth_filename = "choropleth.html"
map.save(choropleth_filename)
HTML('<iframe src="{}" width=700 height=450></iframe>'.format(choropleth_filename))

