In [None]:
%load_ext autoreload
%autoreload 2
# default_exp indexers.geo.geo_indexer

In [None]:
# export
from integrators.data.schema import *
from integrators.data.itembase import *
from integrators.pod.client import PodClient
from integrators.indexers.indexer import IndexerBase, get_indexer_run_data
from integrators.indexers import *
import pycountry, requests
import reverse_geocoder as rg

# GeoIndexer

In [None]:
# export

LOCATION_EDGE = "hasLocation"

class GeoIndexer(IndexerBase):
    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        
    def latlong2citycountry(self, latlong):
        geo_result = rg.search([latlong])[0]
        city_name = geo_result["name"]
        country_name = pycountry.countries.get(alpha_2=geo_result["cc"]).name
        return city_name, country_name

        
    def get_country_by_name(self, api, name):
        data = api.search_by_fields({"_type": "Country", "name": name})
        if data == None or data == []: return None
        else:
            return data[0]
            # props = data[0]
            # return api.create_local(Node(props))
        
    def get_lat_long(self, item):
        locations = item.location
        if len(locations) != 1:
            print(f"skipping item {item}, found {len(locations)} locations")
            return None, True

        latlong = locations[0].latitude, locations[0].longitude

        if None in latlong:
            print(f"skipping item {item}, incomplete latlong")
            None, True

        return latlong, False

    def index(self, api, indexer_run):
        items_expanded      = [d.expand(api) for d in get_indexer_run_data(api, indexer_run)]
        items_with_location = [x for x in items_expanded
         if any([loc.latitude is not None for loc in x.location])]

        print(f"{len(items_with_location)} items found to index")

        new_nodes = []
        for n, item in enumerate(items_with_location):

            latlong, skip = self.get_lat_long(item)
            if skip: continue

            # get geo info
            city_name, country_name = self.latlong2citycountry(latlong)

            # add information to indexer objects
            item.city = city_name        
            # item.add_property("city", city_name)    
            country = self.get_country_by_name(api, country_name)
    
            if country is None:
                country = Country(name=country_name)
                new_nodes.append(country)
            
            item.add_edge("country", country)
            # item.country=country/
            # edge = Edge(item, country, "country", created=True)
            # item.add_edge(edge)

            progress = int(n+1 / len(items_with_location) * 100)

            indexer_run.progress=progress
            indexer_run.update(api, edges=False)

            # indexer_run.set_progress(api, progress)

        return items_with_location, new_nodes

# create a toy dataset

In [None]:
client = PodClient()

In [None]:
node1 = Location.from_data(latitude=-37.81, longitude=144.96)
node2 = Address.from_data()

node2.add_edge("location", node1)

In [None]:
indexer = Indexer.from_data(indexerClass="GeoIndexer", name="GeoIndexer")
indexer_run = IndexerRun(progress=0, targetDataType="Address")
indexer_run.add_edge("indexer", indexer)

In [None]:
# hide
from nbdev.export import *
notebook2script()

Converted basic.ipynb.
Converted index.ipynb.
Converted indexers.GeoIndexer.ipynb.
Converted indexers.indexer.ipynb.
Converted itembase.ipynb.
Converted pod.client.ipynb.
