In [None]:
%load_ext autoreload
%autoreload 2
# default_exp indexers.geo.geo_indexer

In [None]:
# export
from integrators.data.schema import *
from integrators.data.itembase import *
from integrators.pod.client import PodClient
from integrators.indexers.indexer import IndexerBase, get_indexer_run_data, IndexerData, test_registration
from integrators.indexers import *
import pycountry, requests
import reverse_geocoder as rg

# GeoIndexer

In [None]:
# export

LOCATION_EDGE = "hasLocation"

class GeoIndexer(IndexerBase):
    """Adds Countries and Cities to items with a location."""
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
    def latlong2citycountry(self, latlong):
        geo_result = rg.search([latlong])[0]
        city_name = geo_result["name"]
        country_name = pycountry.countries.get(alpha_2=geo_result["cc"]).name
        return city_name, country_name
        
    def get_country_by_name(self, client, name):
        data = client.search_by_fields({"_type": "Country", "name": name})
        if data == None or data == []: return None
        else:
            return data[0]
        
    def get_lat_long(self, item):
        locations = item.location
        if len(locations) != 1:
            print(f"skipping item {item}, found {len(locations)} locations")
            return None, True

        latlong = locations[0].latitude, locations[0].longitude

        if None in latlong:
            print(f"skipping item {item}, incomplete latlong")
            None, True

        return latlong, False
    
    def get_data(self, client, indexer_run):
        items_expanded      = [d.expand(client) for d in get_indexer_run_data(client, indexer_run)]
        items_with_location = [x for x in items_expanded if any([loc.latitude is not None for loc in x.location])]
        print(f"{len(items_with_location)} items found to index")
        return IndexerData(items_with_location=items_with_location)

    def index(self, data, indexer_run, client=None):
        items_with_location = data.items_with_location
        print(f"indexing {len(items_with_location)} items")

        new_nodes = []
        for n, item in enumerate(items_with_location):

            latlong, skip = self.get_lat_long(item)
            if skip: continue

            # get geo info
            city_name, country_name = self.latlong2citycountry(latlong)

            # add information to indexer objects
            item.city = city_name        
            # item.add_property("city", city_name)    
            country = self.get_country_by_name(client, country_name) if client is not None else None
    
            if country is None:
                country = Country(name=country_name)
                new_nodes.append(country)
            
            item.add_edge("country", country)
            # item.country=country/
            # edge = Edge(item, country, "country", created=True)
            # item.add_edge(edge)

            progress = int(n+1 / len(items_with_location) * 100)

            indexer_run.progress=progress
            if client is not None: indexer_run.update(client, edges=False)

            # indexer_run.set_progress(client, progress)

        return items_with_location, new_nodes

# Example

First, lets create a test dataset

In [None]:
client = PodClient()

location = Location.from_data(latitude=-37.81, longitude=144.96)
address = Address.from_data()
indexer = GeoIndexer.from_data()
indexer_run = IndexerRun.from_data(progress=0, targetDataType="Address")

indexer_run.add_edge("indexer", indexer)
address.add_edge("location", location)

In [None]:
test_registration(GeoIndexer)

In [None]:
data = IndexerData(items_with_location= [address])

In [None]:
updated_items, new_items = indexer.index(data, indexer_run)
assert new_items[0].name == "Australia" and updated_items[0].city == "Melbourne"

indexing 1 items


In [None]:
# hide
from nbdev.export import *
notebook2script()

Converted basic.ipynb.
Converted index.ipynb.
Converted indexers.GeoIndexer.ipynb.
Converted indexers.NoteListIndexer.ipynb.
Converted indexers.indexer.ipynb.
Converted itembase.ipynb.
Converted pod.client.ipynb.
