### Generate regions GeoJSON

This notebook combines the admin1 boundaries with country-specific regions that can be matched against for datasets that work with sub-national regions. Not all data that works with regions match exactly to Natural Earth's Admin1 boundaries; we add in country-specific region data for cases where we need another breakdown of a country's regions. This GeoJSON will therefor contain overlapping polygons for regions that have both admin1 and country-specific region boundaries.

The features produced by this processing will contain the following columns:

- `name`: The name of the region.
- `name_en`: Optionally an alternate name in english, used for matching.
- `iso_a2`: the ISO Alpha 2 code of the country the region belongs to.

The region data is set up to match with both Google mobility data and JHU regions.


In [3]:
# Static, should not need to be run through the update
data_dir = '/opt/src/data'
region_data_dir = '/opt/src/data/regions'

In [4]:
import io
import os
import json
from collections import defaultdict

import numpy as np
import pandas as pd
import geopandas as gpd
import requests
from shapely.geometry import mapping
from shapely.algorithms.polylabel import polylabel
import pycountry
from shapely.ops import cascaded_union
from jenkspy import jenks_breaks

In [5]:
features = []

In [6]:
def add_feature(geom, iso_a2, name, name_en=None):
    features.append(
        {
            'type': 'Feature',
            'geometry': geom if type(geom) is dict else mapping(geom),
            'properties': {
                'iso_a2': iso_a2,
                'name': name,
                'name_en': name_en
            }
        }
    )
    


In [7]:
columns = [
    'name',
    'name_en',
    'iso_a2',
]

In [8]:
admin1_gdf = gpd.read_file(os.path.join(region_data_dir,'admin1.geojson'))


In [9]:
for _, row in admin1_gdf.iterrows():
    add_feature(row['geometry'], 
                row['iso_a2'], 
                row['name'], 
                row['name_en'])

In [10]:
# Spain
spain_gdf = gpd.read_file(os.path.join(region_data_dir, 'spain-comunidad.json'))

for _, row in spain_gdf.iterrows():
    add_feature(row['geometry'], 
                'ES', 
                row['NAME_1'])
    
# Spain - Canary Islands

canary_islands_gdf = gpd.read_file(os.path.join(region_data_dir, 'canary-islands-province.json'))

for _, row in canary_islands_gdf.iterrows():
    add_feature(row['geometry'], 
                'ES', 
                'Canary Islands')
    

In [12]:
# Italy
italy_gdf = gpd.read_file(os.path.join(region_data_dir, 'italy_regions.geojson'))

for _, row in italy_gdf.iterrows():
    add_feature(row['geometry'], 
                'IT', 
                row['reg_name'])

In [13]:
# Belgium
be_regions = {
    'Flanders': ['East Flanders', 'West Flanders', 
                 'Limburg', 'Antwerp', 'Flemish Brabant'],
    'Wallonia': ['Hainaut', 'Liege', 'Luxembourg',
                 'Namur','Walloon Brabant' ]
}

for r in be_regions:
    add_feature(cascaded_union(admin1_gdf[
                    (admin1_gdf['iso_a2'] == 'BE') & 
                    (admin1_gdf['name'].isin(be_regions[r]))
                ]['geometry']),
                'BE',
                r)

In [14]:
# France
france_gdf = gpd.read_file(os.path.join(region_data_dir, 'france-regions.geojson'))

for _, row in france_gdf.iterrows():
    add_feature(row['geometry'], 
                'FR', 
                row['nom'])


In [15]:
# Great Britian

gb_regions = {
    'Mid And East Antrim': ['Ballymena', 'Larne', 'Carrickfergus'],
    'Lisburn and Castlereagh': ['Lisburn', 'Castlereagh'],
    'Fermanagh And Omagh': ['Fermanagh','Omagh'],
    'Derry And Strabane': ['Derry','Strabane'],
    'Causeway Coast and Glens': ['Ballymoney','Coleraine', 'Limavady', 'Moyle'],
    'Armagh City, Banbridge And Craigavon':['Armagh', 'Banbridge', 'Craigavon'],
    'Ards And North Down': ['Ards','North Down'],
    'Antrim And Newtownabbey': ['Antrim','Newtownabbey']
}

counties_accounted_for = set([])
gb_admin1 = set(admin1_gdf[admin1_gdf['iso_a2'] == 'GB']['name'])
for path in os.listdir(os.path.join(data_dir, 'gb-counties')):
    with open(os.path.join(data_dir, 'gb-counties', path)) as f:
        feat = json.loads(f.read())
        name = feat['properties']['name']
        if not name in gb_admin1:
            feat['properties'] = {
                'name': name,
                'name_en': None,
                'iso_a2':  'GB'
            }
            features.append(feat)
        
for r in gb_regions:
    add_feature(cascaded_union(admin1_gdf[
                    (admin1_gdf['iso_a2'] == 'GB') & 
                    (admin1_gdf['name'].isin(gb_regions[r]))
                ]['geometry']),
                'GB',
                r)

FileNotFoundError: [Errno 2] No such file or directory: '/opt/src/data/gb-counties'

In [16]:
# Greece

gr_regions = {
    'Decentralized Administration of Epirus and Western Macedonia': [
        'Ipeiros', 'Dytiki Makedonia'
    ],
    'Decentralized Administration of Macedonia and Thrace': [
        'Kentriki Makedonia', 'Anatoliki Makedonia kai Thraki'
    ],
    'Decentralized Administration of Peloponnese, Western Greece and the Ionian': [
        'Peloponnisos', 'Dytiki Ellada', 'Ionioi Nisoi'
    ],
    'Decentralized Administration of the Aegean': [
        'Notio Aigaio', 'Voreio Aigaio'
    ],
    'Decentralized Administration of Thessaly and Central Greece': [
        'Thessalia', 'Stereá Elláda'
    ]
}

for r in gr_regions:
    add_feature(cascaded_union(admin1_gdf[
                    (admin1_gdf['iso_a2'] == 'GR') & 
                    (admin1_gdf['name'].isin(gr_regions[r]))
                ]['geometry']),
                'GR',
                r)

In [17]:
# Kenya 
kenya_gdf = gpd.read_file(os.path.join(region_data_dir, 'kenya-counties.geojson'))

for _, row in kenya_gdf.iterrows():
    if not row['COUNTY_NAM']:
        continue
    name = row['COUNTY_NAM'].capitalize() \
            .replace(' - ', '-') \
            .replace(' ', '-') + ' County'
    add_feature(row['geometry'], 
                'KE', 
                name)
                                                                   

In [18]:
# Norway

no_gdf = gpd.read_file(os.path.join(region_data_dir, 'norway-counties.geojson'))
no_admin1 = set(admin1_gdf[admin1_gdf['iso_a2'] == 'NO']['name'])

for _, row in no_gdf.iterrows():
    name = row['navn']
    if not name in no_admin1:
        add_feature(row['geometry'], 
                    'NO', 
                    name)


In [19]:
features = sorted(features, key=lambda f: (
    f['properties']['iso_a2'], 
    'x' if f['properties']['name'] is None else f['properties']['name']
))
for i, f in enumerate(features):
    f['properties']['id'] = i
                  

In [20]:
fc = {
    'type': 'FeatureCollection',
    'features': features
}

In [21]:
with open(os.path.join(data_dir, 'published/regions.geojson'), 'w') as f:
    f.write(json.dumps(fc))