# Compare geocoding results with curated geopositions
- Make use of BioTIME database and mordecai
- Calculate the geodesic distance between computed geoposition and curated longitude, latitude coordinates

Ensure to reload Python scripts dynamically

In [59]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


Set up mordecai's geoparser

In [60]:
from mordecai import Geoparser
geo = Geoparser()

Load examples from biotime via a Python helper

In [61]:
import biotime_examples

Show all examples

In [62]:
biotime_examples.biotime_sample_sites.keys()

dict_keys(['Anderson et al. - 2011', 'Farah et al. - 2014', 'Holmes et al. - 1986', 'Rocha et al. - 2017', 'Thomsen et al. - 2016', 'Woods - 2009'])

Prepare a text for geocoding

In [63]:
text = biotime_examples.biotime_sample_sites['Anderson et al. - 2011']
text

'This historical data set consists of 44 permanent 1-m 2 quadrats located on northern mixed prairie in Miles City, Montana, USA.'

Geocoding of the entire text and show the resulting list of locations


In [64]:
geocoding_res = geo.geoparse(text)
geocoding_res

[{'word': 'Miles City',
  'spans': [{'start': 102, 'end': 112}],
  'country_predicted': 'USA',
  'country_conf': 0.999931,
  'geo': {'admin1': 'Montana',
   'lat': '46.40834',
   'lon': '-105.84056',
   'country_code3': 'USA',
   'geonameid': '5666176',
   'place_name': 'Miles City',
   'feature_class': 'P',
   'feature_code': 'PPLA2'}},
 {'word': 'Montana',
  'spans': [{'start': 114, 'end': 121}],
  'country_predicted': 'USA',
  'country_conf': 0.9991698,
  'geo': {'admin1': 'Montana',
   'lat': '47.00025',
   'lon': '-109.75102',
   'country_code3': 'USA',
   'geonameid': '5667009',
   'place_name': 'Montana',
   'feature_class': 'A',
   'feature_code': 'ADM1'}},
 {'word': 'USA',
  'spans': [{'start': 123, 'end': 126}],
  'country_predicted': 'USA',
  'country_conf': 0.999931,
  'geo': {'admin1': 'California',
   'lat': '34.00474',
   'lon': '-117.33588',
   'country_code3': 'USA',
   'geonameid': '7195491',
   'place_name': 'Sgi-Usa Riverside Community Center',
   'feature_class': '

Infer the country of the sampling site

In [65]:
geo.infer_country(text)

[{'text': 'Miles City',
  'label': 'United States',
  'word': 'Miles City',
  'spans': [{'start': 102, 'end': 112}],
  'features': {'maj_vote': 'USA',
   'word_vec': 'USA',
   'first_back': 'USA',
   'most_alt': 'USA',
   'most_pop': 'USA',
   'ct_mention': 'USA',
   'ctm_count1': 2,
   'ct_mention2': '',
   'ctm_count2': 0,
   'wv_confid': 17.08477783203125,
   'class_mention': 'P',
   'code_mention': ''},
  'country_predicted': 'USA',
  'country_conf': 0.999931,
  'all_countries': array(['USA'], dtype='<U3'),
  'all_confidence': array([0.999931], dtype=float32)},
 {'text': 'Montana',
  'label': 'United States',
  'word': 'Montana',
  'spans': [{'start': 114, 'end': 121}],
  'features': {'maj_vote': 'USA',
   'word_vec': 'USA',
   'first_back': 'ESP',
   'most_alt': 'USA',
   'most_pop': 'USA',
   'ct_mention': 'USA',
   'ctm_count1': 2,
   'ct_mention2': '',
   'ctm_count2': 0,
   'wv_confid': 46.95623016357422,
   'class_mention': '',
   'code_mention': ''},
  'country_predicted': '

Geocoding for another study

In [66]:
computed_geolocation_rocha = geo.geoparse(biotime_sample_sites["Rocha et al. - 2017"])
computed_geolocation_rocha

[{'word': 'Manaus',
  'spans': [{'start': 62, 'end': 68}],
  'country_predicted': 'BRA',
  'country_conf': 0.993103,
  'geo': {'admin1': 'Amazonas',
   'lat': '-3.04361',
   'lon': '-60.01282',
   'country_code3': 'BRA',
   'geonameid': '6319328',
   'place_name': 'Manaus',
   'feature_class': 'A',
   'feature_code': 'ADM2'}},
 {'word': 'Brazil',
  'spans': [{'start': 87, 'end': 93}],
  'country_predicted': 'BRA',
  'country_conf': 0.9998105,
  'geo': {'admin1': 'NA',
   'lat': '-10',
   'lon': '-55',
   'country_code3': 'BRA',
   'geonameid': '3469034',
   'place_name': 'Federative Republic of Brazil',
   'feature_class': 'A',
   'feature_code': 'PCLI'}}]

Extract the GPS location from the geocoding result

In [67]:
from utils import extract_geolocation
extract_geolocation(computed_geolocation_rocha)

(-3.04361, -60.01282)

Extract gps coordinates from biotime data


In [68]:
actual_latitude, actual_longitude = biotime_examples.geocoordinates_from_study("Rocha et al. - 2017")
actual_latitude, actual_longitude

(-2.386381, -59.918769)

Calculate the distance between the extracted and the actual geolocation

In [69]:
import utils
utils.geodesic_distance((computed_latitude, computed_longitude), (actual_latitude, actual_longitude))

73422.87288501344