## Generate points for the cases tab

This notebook generates the points GeoJSON for the points on the Cases tab of the dashboard. It pulls the point data from JHU confirmed cases. It also produces some intermediate outputs, such as mappings to the feature IDs, for use in the case data processing notebook that runs as part of the data update pipeline.

In [None]:
import json
import io
import os
from datetime import datetime

import requests
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, shape, mapping
from slugify import slugify

In [None]:
data_dir = '/opt/src/data'

In [None]:
def get_code(admin0, admin1=None, admin2=None):
    slug_txt = admin0
    if admin1 is not None:
        slug_txt = "{} {}".format(admin1, slug_txt)
    if admin2 is not None:
        slug_txt = "{} {}".format(admin2, slug_txt)
    return slugify(slug_txt)

def fetch_df(url):
    """Fetches a Pandas DataFrame from a remote source"""
    r = requests.get(url)
    return pd.read_csv(io.BytesIO(r.content))

In [None]:
cases_df = fetch_df('https://github.com/CSSEGISandData/COVID-19/raw/master/'
                    'csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')

In [None]:
us_territories = [
    'American Samoa',
    'Guam',
    'Northern Mariana Islands',
    'Puerto Rico',
    'Virgin Islands'
]

def filter_us(df):
   # Filter out counties that have 0 latest data.
    filtered_df = df[df.iloc[:,-1] != 0]
    filtered_df = filtered_df[
        (filtered_df['Province_State'].isin(us_territories)) |
        (
            (~filtered_df['Lat'].isnull()) &
            (filtered_df['Lat'] != 0.0) &
            (~filtered_df['FIPS'].isnull())
        )
    ]
    
    return filtered_df

us_cases_df = fetch_df('https://github.com/CSSEGISandData/COVID-19/raw/master/'
                    'csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv')
us_cases_df = filter_us(us_cases_df)

In [None]:
countries_gdf = gpd.read_file(os.path.join(data_dir, 'published/countries.geojson'))

In [None]:
country_data = {}
case_features = []
codes_to_id = {}
codes_to_alpha2 = {}

current_id = 0

def add_country_data(country):
    bounds = country['geometry'].bounds
    bounds = [[bounds[0], bounds[1]], [bounds[2], bounds[3]]]
    
    if country['ADM0_A3'] is None or type(country['ADM0_A3']) is float:
        raise Exception('ADM0_A3 is None or nan for {}'.format(country['ADMIN']))
    
    if country['ISO_A2'] is None or type(country['ISO_A2']) is float:
        raise Exception('ISO_A2 is None or nan for {}'.format(country['ADMIN']))

    country_data[code] = {
        'name':  country['ADMIN'],
        'alpha3': country['ADM0_A3'],
        'alpha2': country['ISO_A2'],
        'bounds': bounds
    }

for _, row in cases_df.sort_values(by=['Country/Region', 'Province/State']).iterrows():
    name = row['Country/Region']
    region_name = None if type(row['Province/State']) is float else row['Province/State']
    if name == 'Congo (Brazzaville)':
        # Fix the lat/lng of Congo, which is the same location 
        # the Democratic Republic of Congo in the JHU data.
        lat, lon = -1.402385, 15.405892
    else:
        lat, lon = row['Lat'], row['Long']

    pt = Point(lon, lat)

    if (lon, lat) == (0, 0):
        print('Skipping {}'.format(get_code(name, region_name)))
    else:
        matching_countries = countries_gdf[countries_gdf['geometry'].contains(pt)]
        if len(matching_countries) < 1:
            if name == 'Saint Vincent and the Grenadines':
                matching_countries = countries_gdf[
                    countries_gdf['NAME'] == 'St. Vin. and Gren.'
                ]
            else:
                matching_countries = countries_gdf[
                    countries_gdf['NAME'] == name
                ]

        if len(matching_countries) < 1:
            print(row['Country/Region'])
            print('  Not found: {}'.format(pt))
        else:
            country = matching_countries.iloc[0]
            code = get_code(name, region_name)

            point_id = current_id
            current_id += 1

            if region_name is not None:
                display_name = '{}, {}'.format(region_name, country['ADMIN'])
            else:
                display_name = country['ADMIN']

            case_features.append({
                'id': point_id,
                'type': 'Feature',
                'geometry': mapping(pt),
                'properties': {
                    'displayName': display_name,
                    'code': code,
                    'id': point_id
                }
            })

            codes_to_id[code] = point_id
            codes_to_alpha2[code] = country['ISO_A2']

            # Process countries
            if region_name is None:
                add_country_data(country)

SKIP_COUNTIES = True                
                
for _, row in us_cases_df.sort_values(by=['Country_Region', 'Province_State', 'Admin2']).iterrows():
    region_name = row['Province_State']
    county_name = None if type(row['Admin2']) is float else row['Admin2'] 

    if county_name is not None and (
        county_name.startswith('Out of') or
        county_name == 'Unassigned'):
        print('Skipping {}, {}'.format(county_name, region_name))
        continue

    lat, lon = row['Lat'], row['Long_']

    pt = Point(lon, lat)

    if (lon, lat) == (0, 0):
        print('Skipping {}'.format(get_code('US', region_name, county_name)))
    else:
        code = get_code('US', admin1=region_name, admin2=county_name)

        point_id = current_id
        current_id += 1

        display_name = '{}, US'
        if county_name is not None:
            display_name = '{}, {}, US'.format(county_name, region_name)
        else:
            display_name = '{}, US'.format(region_name)

        if SKIP_COUNTIES and county_name is not None:
            pass
        else:
            case_features.append({
                'id': point_id,
                'type': 'Feature',
                'geometry': mapping(pt),
                'properties': {
                    'displayName': display_name,
                    'code': code,
                    'id': point_id
                }
            })

        codes_to_id[code] = point_id
        codes_to_alpha2[code] = 'US'

Handle adding country information for countries that only have regions in the JHU data.

In [None]:
for _, row in countries_gdf[countries_gdf['ADMIN'].isin(['Australia', 'Canada', 'China'])].iterrows():
    add_country_data(row)
    codes_to_alpha2[get_code(row['ADMIN'])] = row['ISO_A2']
    

In [None]:
with open(os.path.join(data_dir, 'published/case-points.geojson'), 'w') as f:
    f.write(json.dumps({
    'type': 'FeatureCollection',
    'features': case_features
}, sort_keys=True))

In [None]:
with open(os.path.join(data_dir, 'case-codes-to-ids-intermidiate.json'), 'w') as f:
    f.write(json.dumps(codes_to_id, sort_keys=True))

In [None]:
with open(os.path.join(data_dir, 'case-country-config.json'), 'w') as f:
    f.write(json.dumps(country_data, sort_keys=True))

In [None]:
with open(os.path.join(data_dir, 'case-codes-to-alpha2.json'), 'w') as f:
    f.write(json.dumps(codes_to_alpha2, sort_keys=True))