# Interactive Data Visualization - Week 03 - Experiments with maps

Working on the course project during Week 03. Data wrangling and visualization experiments that will be described in more detail in the learning diary that is this week's core deliverable.

Goal of these experiments is to create an interactive choropleth of COVID-19 data.

**Update: This notebook has been used for also implementing Week 04 experiments before porting them to the visualization Django app.**

## Experiments on choropleths from blog posts

For this to work, the World Bank development indicators data needs to be downloaded: https://www.kaggle.com/worldbank/world-development-indicators#Indicators.csv

In [2]:
# Source: https://medium.com/datadriveninvestor/visualising-geospatial-data-with-python-d3b1c519f31

# First, fetch the data.

import folium
import pandas as pd

country_geo = 'world-countries.json'

data = pd.read_csv('tmp/indicators.csv')

hist_indicator =  'Life expectancy at birth'
hist_year = 2013

mask1 = data['IndicatorName'].str.contains(hist_indicator) 
mask2 = data['Year'].isin([hist_year])

data = data[mask1 & mask2][['CountryCode','Value']]

FileNotFoundError: [Errno 2] File tmp/indicators.csv does not exist: 'tmp/indicators.csv'

In [3]:
# Second, do the mapping.

from IPython.display import HTML

map = folium.Map(location=[100, 0], zoom_start=1.5)

map.choropleth(
    geo_data=country_geo,
    data=data,
    columns=['CountryCode', 'Value'],
    key_on='feature.id',
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name=hist_indicator,
)

# We need to save the interactive map to HTML and read it back to the notebook
choropleth_filename = "choropleth.html"
map.save(choropleth_filename)
HTML('<iframe src="{}" width=700 height=450></iframe>'.format(choropleth_filename))

NameError: name 'data' is not defined

# Building a choropleth of COVID-19 prevalence

Using the experiments below, let's try to build a COVID-19 choropleth for the JHU data.

In [4]:
# Experiment with converting the country names to country codes
import pycountry

def rebase(country_name):
    country = pycountry.countries.get(name=country_name)
    return country.alpha_3

tmp = [
    ['Germany', 100],
    ['France', 200],
    ['Spain', 300],
]

data = pd.DataFrame(data=tmp, columns=['Country', 'Cases'])
data.Country = data.Country.apply(rebase)

data

Unnamed: 0,Country,Cases
0,DEU,100
1,FRA,200
2,ESP,300


In [6]:
# Imports and data setup

import folium
import pandas as pd
from IPython.display import HTML
import pycountry

country_geo = 'world-countries.json'

datasource = "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
data = pd.read_csv(datasource)

data = data.drop(columns=["Lat", "Long"])  # clean out spatial location columns we don't need
# FIXME: Cleaning out countries with regional data; removes e.g. Canada and Australia
data = data[data['Province/State'].isna()].drop(columns=['Province/State'])

data = data.rename(columns={"Country/Region": "Country"})

# Convert the country names
def rebase(country_name):
    country = pycountry.countries.get(name=country_name)
    if not country:
        return 'N/A'  # FIXME
    return country.alpha_3

data.Country = data.Country.apply(rebase)

data


Unnamed: 0,Country,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,1/28/20,1/29/20,1/30/20,...,4/30/20,5/1/20,5/2/20,5/3/20,5/4/20,5/5/20,5/6/20,5/7/20,5/8/20,5/9/20
0,AFG,0,0,0,0,0,0,0,0,0,...,2171,2335,2469,2704,2894,3224,3392,3563,3778,4033
1,ALB,0,0,0,0,0,0,0,0,0,...,773,782,789,795,803,820,832,842,850,856
2,DZA,0,0,0,0,0,0,0,0,0,...,4006,4154,4295,4474,4648,4838,4997,5182,5369,5558
3,AND,0,0,0,0,0,0,0,0,0,...,745,745,747,748,750,751,751,752,752,754
4,AGO,0,0,0,0,0,0,0,0,0,...,27,30,35,35,35,36,36,36,43,43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,ESH,0,0,0,0,0,0,0,0,0,...,6,6,6,6,6,6,6,6,6,6
262,STP,0,0,0,0,0,0,0,0,0,...,14,16,16,16,23,174,174,187,208,208
263,YEM,0,0,0,0,0,0,0,0,0,...,6,7,10,10,12,22,25,25,34,34
264,COM,0,0,0,0,0,0,0,0,0,...,1,1,3,3,3,3,8,8,8,11


In [5]:
# Render the map

import json
import requests

map = folium.Map(
    location=[45,0],
    zoom_start=2.0,
)

folium.Choropleth(
    geo_data=country_geo,
    data=data,
    columns=['Country', '4/19/20'],  # FIXME: Hardwired date!
    key_on='feature.id',
    fill_color='YlGnBu',
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Cases of COVID-19",
).add_to(map)

folium.Marker(
    location=[51.50, 0],
    popup='<b>Greenwich</b>',
    icon=folium.Icon(icon='cloud')
).add_to(map)

url = 'https://raw.githubusercontent.com/python-visualization/folium/master/examples/data'
dataviz = json.loads(requests.get(f'{url}/vis2.json').text)

folium.Marker(
    location=[60.17, 20.94],
    popup=folium.Popup(max_width=450).add_child(
        folium.Vega(dataviz, width=450, height=250))
).add_to(map)

map


KeyError: '4/19/20'