# Getting started with geoparsing libraries



__Imports__

In [1]:
try:
    import geopandas
except:
    !pip install geopandas


In [2]:
try:
    import geotext
except:
    !pip install geotext


In [3]:
try:
    import geopy
except:
    !pip install geopy


In [4]:
try:
    import descartes
except:
    !pip install descartes


In [5]:
try:
    import geonamescache
except:
    !pip install geonamescache

Collecting geonamescache
  Downloading geonamescache-1.2.0-py3-none-any.whl (2.6 MB)
[K     |████████████████████████████████| 2.6 MB 1.1 MB/s eta 0:00:01
[?25hInstalling collected packages: geonamescache
Successfully installed geonamescache-1.2.0
You should consider upgrading via the '/Users/eva/anaconda3/bin/python -m pip install --upgrade pip' command.[0m


In [6]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

import pandas as pd
import geopandas as gpd

from urllib import request
from geotext import GeoText

from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut

from shapely.geometry import Point, Polygon
import descartes

import geonamescache

__GeoNamesCache__

In [7]:
## test geonamescache ##

gc = geonamescache.GeonamesCache()

# town
paris = gc.get_cities_by_name("Paris")
print(paris) 

[{'2988507': {'geonameid': 2988507, 'name': 'Paris', 'latitude': 48.85341, 'longitude': 2.3488, 'countrycode': 'FR', 'population': 2138551, 'timezone': 'Europe/Paris', 'admin1code': '11', 'alternatenames': ['Baariis', 'Bahliz', 'Ile-de-France', 'Lungsod ng Paris', 'Lutece', 'Lutetia', 'Lutetia Parisorum', 'Lutèce', 'PAR', 'Pa-ri', 'Paarys', 'Palika', 'Paname', 'Pantruche', 'Paraeis', 'Paras', 'Pari', 'Paries', 'Parigge', 'Pariggi', 'Parighji', 'Parigi', 'Pariis', 'Pariisi', 'Pariizu', 'Pariižu', 'Parij', 'Parijs', 'Paris', 'Parisi', 'Parixe', 'Pariz', 'Parize', 'Parizh', 'Parizh osh', "Parizh'", 'Parizo', 'Parizs', 'Pariž', 'Parys', 'Paryz', 'Paryzh', 'Paryzius', 'Paryż', 'Paryžius', 'Paräis', 'París', 'Paríž', 'Parîs', 'Parĩ', 'Parī', 'Parīze', 'Paříž', 'Páras', 'Párizs', 'Ville-Lumiere', 'Ville-Lumière', 'ba li', 'barys', 'pairisa', 'pali', 'pari', 'paris', 'parys', 'paryzh', 'perisa', 'pryz', 'pyaris', 'pyarisa', 'pyrs', 'Île-de-France', 'Παρίσι', 'Париж', 'Париж ош', 'Парижь', 'Пар

Provides 2 possibilities: Paris, France and Paris, US ! Idea : by default retrieve only the first possibility ?

In [12]:
#print countries / dictionary
countries = gc.get_countries()

print(countries.keys())
print(countries['GG'])
print(countries['AE'])

dict_keys(['AD', 'AE', 'AF', 'AG', 'AI', 'AL', 'AM', 'AO', 'AQ', 'AR', 'AS', 'AT', 'AU', 'AW', 'AX', 'AZ', 'BA', 'BB', 'BD', 'BE', 'BF', 'BG', 'BH', 'BI', 'BJ', 'BL', 'BM', 'BN', 'BO', 'BQ', 'BR', 'BS', 'BT', 'BV', 'BW', 'BY', 'BZ', 'CA', 'CC', 'CD', 'CF', 'CG', 'CH', 'CI', 'CK', 'CL', 'CM', 'CN', 'CO', 'CR', 'CU', 'CV', 'CW', 'CX', 'CY', 'CZ', 'DE', 'DJ', 'DK', 'DM', 'DO', 'DZ', 'EC', 'EE', 'EG', 'EH', 'ER', 'ES', 'ET', 'FI', 'FJ', 'FK', 'FM', 'FO', 'FR', 'GA', 'GB', 'GD', 'GE', 'GF', 'GG', 'GH', 'GI', 'GL', 'GM', 'GN', 'GP', 'GQ', 'GR', 'GS', 'GT', 'GU', 'GW', 'GY', 'HK', 'HM', 'HN', 'HR', 'HT', 'HU', 'ID', 'IE', 'IL', 'IM', 'IN', 'IO', 'IQ', 'IR', 'IS', 'IT', 'JE', 'JM', 'JO', 'JP', 'KE', 'KG', 'KH', 'KI', 'KM', 'KN', 'KP', 'KR', 'XK', 'KW', 'KY', 'KZ', 'LA', 'LB', 'LC', 'LI', 'LK', 'LR', 'LS', 'LT', 'LU', 'LV', 'LY', 'MA', 'MC', 'MD', 'ME', 'MF', 'MG', 'MH', 'MK', 'ML', 'MM', 'MN', 'MO', 'MP', 'MQ', 'MR', 'MS', 'MT', 'MU', 'MV', 'MW', 'MX', 'MY', 'MZ', 'NA', 'NC', 'NE', 'NF', 'NG',

In [13]:
# print continents / dictionary
continents = gc.get_continents()
print(continents)

{'AF': {'lng': '21.09375', 'geonameId': 6255146, 'timezone': {'gmtOffset': 1, 'timeZoneId': 'Africa/Bangui', 'dstOffset': 1}, 'bbox': {'east': 63.49390709700003, 'south': -37.432486921640724, 'north': 37.5598406350001, 'west': -25.36118769, 'accuracyLevel': 0}, 'toponymName': 'Africa', 'asciiName': 'Africa', 'astergdem': 589, 'fcl': 'L', 'population': 1031833000, 'wikipediaURL': 'en.wikipedia.org/wiki/Africa', 'adminName5': '', 'srtm3': 592, 'adminName4': '', 'adminName3': '', 'alternateNames': [{'name': 'アフリカ', 'lang': 'ja'}, {'isPreferredName': True, 'name': '아프리카', 'lang': 'ko'}, {'name': 'แอฟริกา', 'lang': 'th'}, {'isPreferredName': True, 'name': 'Aafrika', 'lang': 'et'}, {'name': 'Affrica', 'lang': 'cy'}, {'isPreferredName': True, 'name': 'Africa', 'lang': 'en'}, {'isPreferredName': True, 'name': 'Africa', 'lang': 'it'}, {'name': 'Africa', 'lang': 'la'}, {'isShortName': True, 'isPreferredName': True, 'name': 'África', 'lang': 'es'}, {'name': 'África', 'lang': 'pt'}, {'name': 'Àfri

In [49]:
from geonamescache.mappers import country
mapper = country(from_key='name', to_key='iso3')

iso3 = mapper('Spain') # iso3 is assigned ESP

print(iso3)

ESP


__GeoText__

Only for English ? uses SpaCy for retrieval and classification of LOC entities 

In [19]:
my_str = "Hilton hotel in Paris is a great place. However I would prefer a vacation in Landerneau. Do you know that French people call the capital of England Londres ?"

In [21]:
places = GeoText(my_str)
cities = places.cities
print(cities)
countries = places.countries
print(countries)

['Paris', 'Landerneau']
[]


issue : Londres is French so it is not recognized as a city (London). England is not captured because it is neither a city nor a country ? Need to go through the documentation

__Geopy__

Works with French !! Issue : works only with cities... in fact it is useful for retrieving informations from coordinates or from a city name but it can't be used to get information about highre level places.

In [48]:
geopy.geocoders.options.default_user_agent = "my-application1"
geolocator = Nominatim(timeout=2)

cities = ['Paris', 'Landerneau', 'Londres', 'Acapulco', 'Saint-Jean de Luz', 'Munich']

lat_lon = []
for city in cities: 
    try:
        location = geolocator.geocode(city)
        if location:
            print(location.latitude, location.longitude)
            lat_lon.append(location)
    except GeocoderTimedOut as e:
        print("Error: geocode failed on input %s with message %s" %(city, e))
print(lat_lon)

48.8566969 2.3514616
48.4514803 -4.2557901
51.5073219 -0.1276474
16.8680495 -99.8940182
43.3870763 -1.6635695
48.1371079 11.5753822
[Location(Paris, Île-de-France, France métropolitaine, France, (48.8566969, 2.3514616, 0.0)), Location(Landerneau, Brest, Finistère, Bretagne, France métropolitaine, 29800, France, (48.4514803, -4.2557901, 0.0)), Location(London, Greater London, England, United Kingdom, (51.5073219, -0.1276474, 0.0)), Location(Acapulco, Acapulco de Juárez, Guerrero, 39300, México, (16.8680495, -99.8940182, 0.0)), Location(Saint-Jean-de-Luz, Bayonne, Pyrénées-Atlantiques, Nouvelle-Aquitaine, France métropolitaine, 64500, France, (43.3870763, -1.6635695, 0.0)), Location(München, Bayern, Deutschland, (48.1371079, 11.5753822, 0.0))]


In [36]:
df = pd.DataFrame(lat_lon, columns=['City Name', 'Coordinates'])
df.head(7)


Unnamed: 0,City Name,Coordinates
0,"Paris, Île-de-France, France métropolitaine, F...","(48.8566969, 2.3514616)"
1,"Landerneau, Brest, Finistère, Bretagne, France...","(48.4514803, -4.2557901)"
2,"London, Greater London, England, United Kingdom","(51.5073219, -0.1276474)"
3,"Acapulco, Acapulco de Juárez, Guerrero, 39300,...","(16.8680495, -99.8940182)"
4,"Saint-Jean-de-Luz, Bayonne, Pyrénées-Atlantiqu...","(43.3870763, -1.6635695)"
5,"München, Bayern, Deutschland","(48.1371079, 11.5753822)"


In [37]:
#test
Latitude = "48.8566969"
Longitude = "2.3514616"
 
location = geolocator.reverse(Latitude+","+Longitude) 
  
# Display 
print(location)

L'Art, Place de l'Hôtel-de-Ville - Esplanade de la Libération, Beaubourg, Quartier Saint-Merri, Paris 4e Arrondissement, Paris, Île-de-France, France métropolitaine, 75004, France


In [31]:
# get address from geolocation
address = location.raw['address'] 
print(address)

{'tourism': "L'Art", 'road': "Place de l'Hôtel-de-Ville - Esplanade de la Libération", 'neighbourhood': 'Beaubourg', 'suburb': 'Paris 4e Arrondissement', 'city': 'Paris', 'municipality': 'Paris', 'county': 'Paris', 'state': 'Île-de-France', 'country': 'France', 'postcode': '75004', 'country_code': 'fr'}


In [32]:
# parsing
city = address.get('city', '') 
state = address.get('state', '') 
country = address.get('country', '') 
code = address.get('country_code') 
zipcode = address.get('postcode') 
print('City : ',city) 
print('State : ',state) 
print('Country : ',country) 
print('Zip Code : ', zipcode) 

City :  Paris
State :  Île-de-France
Country :  France
Zip Code :  75004


In [47]:
print(geolocator.geocode("paris", language="fr"))
print(geolocator.geocode("paris", language="en"))
print(geolocator.geocode("paris", language="de"))

Paris, Île-de-France, France métropolitaine, France
Paris, Ile-de-France, Metropolitan France, France
Paris, Île-de-France, Metropolitanes Frankreich, Frankreich
