In [1]:
import os
import json 

import pandas as pd
import requests

In [2]:
data = pd.read_csv('../output/results.csv')

Some entries have more than one `strat_name_id` separated by a tilde

In [3]:
strat_ids = {}

for strat_name_id in list(data['strat_name_id']):
    ids = strat_name_id.split('~')
    for id in ids:
        strat = strat_ids.get(id, {'occurrence': 0})
        strat['occurrence'] += 1
        strat_ids[id] = strat
        
print("Seen {0} distinct stratigraphic names".format(len(strat_ids)))

Seen 1558 distinct stratigraphic names


Collect the associated geodata for the Macrostrat units from the API, checks for the presence of the file first (it is included in the repository).  

In [4]:
# https://macrostrat.org/api/units?strat_name_id=8011&format=geojson

def macrostrat_geo():
    """If necessary collect all the geojson associated with 
    Macrostrat strat_name_id. But the JSON data should be cached with the notebook"""
    MACROSTRAT = "https://macrostrat.org/api/units?strat_name_id={0}&format=geojson"
    
    for id in strat_ids:
        response = requests.get(MACROSTRAT.format(id))
        data = {}
        try:
            data = response.json()['success']
        except (KeyError, requests.exceptions.HTTPError):
            print(id)
            pass        
        strat_ids[id]['features'] = data['data']['features']
    

FEATURE_DATA = '../output/macrostrat_features.json'
if not os.path.isfile(FEATURE_DATA):
    macrostrat_geo()
    # strat_name_id contains too many results to be meaningful
    del strat_ids['0']
    with open('FEATURE_DATA', 'w') as output:
        output.write(json.dumps(strat_ids))
else:
    strat_ids = json.load(open(FEATURE_DATA))

Use ipyleaflet to do a quick web map visualisation; installation notes here.
https://ipyleaflet.readthedocs.io/en/latest/installation.html#using-pip 

We see a very limited geographic coverage for our result set. What's the cause? 
 * A side-effect of our sub-set of publications? Or studies overfocused on particular areas? We should do the complete DeepDive.
 * Limits in the stratigraphic units available via Macrostrat? 
 * Limits in the ability of GeoDeepDive to georeference marine features?
 

In [11]:
from ipyleaflet import Map, Marker, GeoJSON

center = (52.204793, 360.121558)
m = Map(center=center, zoom=0)

for id, data in strat_ids.items():
    geojson = GeoJSON(data=data, style = {'color': 'green', 'opacity':1, 'weight':1.9, 'dashArray':'9', 'fillOpacity':0.1})
    m.add_layer(geojson);

display(m)
    

Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_zoom': 19, 'attribution': 'Map …