## GeoPoint - the below will go fine

In [2]:
import pandas as pd
import chardet

# https://simplemaps.com/data/us-cities

# Determine the encoding of the file
with open("../data/us_cities.csv", 'rb') as f:
    result = chardet.detect(f.read())
encoding = result['encoding']

print("File encoding:", encoding)

df = pd.read_csv("../data/us_cities.csv", encoding=encoding)
df["GeoJSON"] = df.apply(lambda x: {"type": "Point", "coordinates": [x["LONGITUDE"], x["LATITUDE"]]}, axis=1)
RENAME_COLUMNS = {"city": "CITY", "state_id": "STATE_CODE","id":"ID"}
df.rename(columns=RENAME_COLUMNS,inplace=True)
NEEDED_COLUMNS = ["ID","CITY", "STATE_CODE","GeoJSON"]
df.info()
print(df.head(1))
df = df[NEEDED_COLUMNS]

File encoding: ascii
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29880 entries, 0 to 29879
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   ID          29880 non-null  int64  
 1   STATE_CODE  29880 non-null  object 
 2   STATE_NAME  29880 non-null  object 
 3   CITY        29880 non-null  object 
 4   COUNTY      29858 non-null  object 
 5   LATITUDE    29880 non-null  float64
 6   LONGITUDE   29880 non-null  float64
 7   GeoJSON     29880 non-null  object 
dtypes: float64(2), int64(1), object(5)
memory usage: 1.8+ MB
   ID STATE_CODE STATE_NAME  CITY          COUNTY   LATITUDE   LONGITUDE  \
0   1         AK     Alaska  Adak  Aleutians West  55.999722 -161.207778   

                                             GeoJSON  
0  {'type': 'Point', 'coordinates': [-161.207778,...  


In [3]:
from elasticsearch import Elasticsearch, helpers  # Import the helpers module
from elasticsearch.helpers import *
# BEGIN: Import Elasticsearch exceptions
# END: Import Elasticsearch exceptions
def bulk_load_documents(es: Elasticsearch, index_name: str, documents: list) -> None:
    """
    Bulk loads a list of documents into Elasticsearch.
    Parameters:
    - es (Elasticsearch): The Elasticsearch client instance.
    - index_name (str): The name of the index where the documents will be inserted.
    - documents (list): A list of documents to be inserted into Elasticsearch.
    Returns:
    - None
    Raises:
    - None
    """

    try: 
        # Prepare the actions for bulk indexing
        actions = [
            {
                "_index": index_name,
                "_source": doc,
                "_id": doc["ID"]
            }
            for doc in documents
        ]

        # Perform the bulk load
        success, errors = helpers.bulk(es, actions, 
                                   index=index_name,
                                   raise_on_error=False,
                                   raise_on_exception=False)
        
        print(f"Successful operations: {success}")
    
        if errors:
            print("Errors encountered:")
            for error in errors:
                print(f"Document ID: {error['index']['_id']}")
                print(f"Error reason: {error['index']['error']['reason']}")
                print("---")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    

In [4]:
from elasticsearch import Elasticsearch
import json
es = Elasticsearch(hosts=["http://192.168.0.111:9200"])

# Define the index name
index_name = "points"

# Define the mapping
mapping = {
  "mappings": {
    "properties": {
      "GeoJSON": {
        "type": "geo_point"
      }
    }
  }
}

es.indices.create(index=index_name, body=mapping)
bulk_load_documents(es, index_name, json.loads(df.to_json(orient="records")))


Successful operations: 29880


## Query Geopoint - geo_bounding_box
- For Pittsburg and Raleigh, in jupyter notebook we see below error (but really have to dig deeper using dev console by taking the input json)
```
BadRequestError(400, 'x_content_parse_exception', '[1:165] [bool] failed to parse field [filter]')
```
- For Pittsburg and Raleigh, below error (in dev tools) : 
```
"caused_by": {
      "type": "illegal_argument_exception",
      "reason": "top is below bottom corner: 35.772701 vs. 40.474536"
}```  

- Searching online will not get much help. People will ask to use [prepair](https://github.com/tudelft3d/prepair) etc, but ultimately it comes down to ensuring you have correct lat, long and that elasticsearch is not so forgiving with geo spatial queries

In [20]:
# Raleigh to Charlotte geo-bounding box
# BEGIN: Get rows for Raleigh and Charlotte in NC
chicago_raleigh_df = df[
  (df['CITY'] == 'Pittsburgh') & (df['STATE_CODE'] == 'PA') |
  (df['CITY'] == 'Raleigh') & (df['STATE_CODE'] == 'NC')
]

# Set display options to show full row contents
pd.set_option('display.max_colwidth', None)
display(chicago_raleigh_df["GeoJSON"])
# Create a geo_bounding_box for Elasticsearch search
geo_bounding_box = {
    "GeoJSON": {
    "top_left": chicago_raleigh_df["GeoJSON"].iloc[0]["coordinates"],
    "bottom_right": chicago_raleigh_df["GeoJSON"].iloc[1]["coordinates"]
}
}

print(geo_bounding_box)

QUERY =  {
  "query": {
    "bool": {
      "must": {
        "match_all": {}
      },
      "filter": {
        "geo_bounding_box": geo_bounding_box
      }
    }
  }
}

search_results = es.search(index="points", body=QUERY, error_trace=True)

# Process search results
for hit in search_results["hits"]["hits"]:
    print(hit["_source"])
    print(hit["_score"])


15484    {'type': 'Point', 'coordinates': [-78.632439, 35.772701]}
22810    {'type': 'Point', 'coordinates': [-79.952524, 40.474536]}
Name: GeoJSON, dtype: object

{'GeoJSON': {'top_left': [-78.632439, 35.772701], 'bottom_right': [-79.952524, 40.474536]}}


BadRequestError: BadRequestError(400, 'x_content_parse_exception', '[1:156] [bool] failed to parse field [filter]')

# Load my_geoshapes

In [14]:
from elasticsearch import Elasticsearch
import json
es = Elasticsearch(hosts=["http://192.168.0.111:9200"])

# Define the index name
index_name = "locations"

# Define the mapping
mapping = {
  "mappings": {
    "properties": {
      "geoshape_geojson": {
        "type": "geo_shape"
      }
    }
  }
}

es.indices.create(index=index_name, body=mapping)
bulk_load_documents(es, index_name, json.loads(df_geo_shape.to_json(orient="records")))


Successful operations: 0
Errors encountered:
Document ID: 0
Error reason: [1:284] failed to parse field [geoshape_geojson] of type [geo_shape]
---


In [19]:
# Raleigh to Charlotte geo-bounding box
# BEGIN: Get rows for Raleigh and Charlotte in NC
minneapolis_norfolk_df = df[
  (df['CITY'] == 'Pittsburgh') & (df['STATE_CODE'] == 'PA') |
  (df['CITY'] == 'Raleigh') & (df['STATE_CODE'] == 'NC')
]

# Set display options to show full row contents
pd.set_option('display.max_colwidth', None)
display(minneapolis_norfolk_df["GeoJSON"])
# Create a geo_bounding_box for Elasticsearch search
geo_bounding_box = {
    "geoshape_geojson": {
    "top_left": minneapolis_norfolk_df["GeoJSON"].iloc[0]["coordinates"],
    "bottom_right": minneapolis_norfolk_df["GeoJSON"].iloc[1]["coordinates"]
}
}

print(geo_bounding_box)

QUERY =  {
  "query": {
    "bool": {
      "must": {
        "match_all": {}
      },
      "filter": {
        "geo_bounding_box": geo_bounding_box
      }
    }
  }
}

search_results = es.search(index="locations", body=QUERY, error_trace=True)

# Process search results
for hit in search_results["hits"]["hits"]:
    print(hit["_source"])
    print(hit["_score"])


15484    {'type': 'Point', 'coordinates': [-78.632439, 35.772701]}
22810    {'type': 'Point', 'coordinates': [-79.952524, 40.474536]}
Name: GeoJSON, dtype: object

{'geoshape_geojson': {'top_left': [-78.632439, 35.772701], 'bottom_right': [-79.952524, 40.474536]}}


BadRequestError: BadRequestError(400, 'x_content_parse_exception', '[1:165] [bool] failed to parse field [filter]')