# OSDEV-1199: SPIKE. Prototype name & address search.


## Prep work for the execution

Handling things such as:

1. Importing the necessary libraries.

1. Checking that we have an access to the OpenSearch cluster.

1. Defining helper functions that we'll use later on.


In [291]:
# importing necessary libraries
import opensearchpy
import pandas as pd
import json
import googlemaps
import os
import dotenv
import folium

In [292]:
# test that the client can connect to the OpenSearch cluster
search_client = opensearchpy.OpenSearch(
    hosts=[
        {
            "host": "localhost",
            "port": 9200,
        },
    ],
)

if not search_client.ping():
    raise Exception("Could not connect to the cluster!")

In [293]:
# prepare high level search client
PRODUCTION_LOCATION_INDEX = "production-locations"
search = opensearchpy.Search(using=search_client, index=PRODUCTION_LOCATION_INDEX)

In [294]:
# setup google maps client
dotenv.load_dotenv()
maps_api_key = os.getenv("GOOGLE_MAPS_API_KEY")

if not maps_api_key:
    raise Exception("GOOGLE_MAPS_API_KEY environment variable is not set!")

maps_client = googlemaps.Client(key=maps_api_key)

In [295]:
# define the functions that will allow us to interact with the OpenSearch cluster
def map_hit(hit: dict) -> dict:
    country = hit["_source"]["country"]
    coordinates = hit["_source"]["coordinates"]

    return {
        "os_id": hit["_id"],
        "name": hit["_source"]["name"],
        "address": hit["_source"]["address"],
        "country_code": country["alpha_2"],
        "score": hit["_score"],
        "lat": coordinates["lat"],
        "lon": coordinates["lon"],
    }


def get_hits(response: any) -> list[dict]:
    return map(
        map_hit,
        response.to_dict()["hits"]["hits"],
    )


def get_frame(response: any) -> pd.DataFrame:
    hits = get_hits(response)
    return pd.DataFrame(hits)

In [296]:
# define the functions that will allow us to interact with the Folium API


def plot_geocode_result(map: folium.Map, geocode_result: list[dict]):
    for result in geocode_result:
        location = result["geometry"]["location"]
        marker = folium.Marker(
            location=[location["lat"], location["lng"]],
            popup=result["formatted_address"],
            icon=folium.Icon(color="red"),
        )

        map.add_child(marker)


def plot_data_frame(map: folium.Map, data_frame: pd.DataFrame):
    for _, row in data_frame.iterrows():
        marker = folium.Marker(
            location=[row["lat"], row["lon"]],
            popup=f"<p>{row["name"]}<p><p>{row["address"]}<p><p>{row["os_id"]}<p>",
            icon=folium.Icon(color="blue"),
        )

        marker.add_to(map)

## Handling the experimental inputs


In [297]:
name_and_address_inp = {
    "name": "KASHION INDUSTRY CO. LTD",
    "address": "PHL.LUM PHM TRO PNG PO, CHOM CHOA KHAN PO SEN CHEY PHNOM PENH 0",
    "country_code": "KH",
}

### #1 Complete name and address search (no geocoding)


In [298]:
query = search.query(
    "match",
    name=name_and_address_inp["name"],
).query(
    "match",
    address=name_and_address_inp["address"],
)

cmn_frame = get_frame(query.execute())

# print(json.dumps(query.to_dict(), indent=2)) # uncomment to see the query
cmn_frame

Unnamed: 0,os_id,name,address,country_code,score,lat,lon
0,KH2020106BEENKD,KASHION INDUSTRY LIMITED,"PHL.LUM PHM TRO PNG PO, CHOM CHOA KHAN PO SEN ...",KH,124.58914,11.563274,104.856635
1,KH20191080E99T2,T & K GARMENT INDUSTRY CO LTD,"BUILDING #5,6,7,8,9, TOUL PONG ROR, SANGKAT CH...",KH,38.429237,11.533314,104.824603
2,KH2021029959ES1,GG FASHION (CAMBODIA) CO LTD,"NATIONAL ROAD 4, TROPEANG TOULVILLAGE, SANGKAT...",KH,36.794605,11.517915,104.773283
3,KH20201062BDWAY,FOOK WAH KUN KEE KNITTING FTY LTD,"TRAPAING THLEUNG VILLAGE, PHNOM PENH, SANGKAT ...",KH,35.572643,11.533314,104.824603
4,KH2019108R4V9PY,8 STAR SPORTSWEAR LTD,"LOT 0176 DAMNAK THOM VILLAGE, STOEUNG MEAN CHE...",KH,34.83528,11.537885,104.893626
5,KH2022298NP8S91,GRAND DW CO LTD,"978 PHUM DOMNAK THOM VILLAGE 2, SANGKAT STEUNG...",KH,34.83236,11.539648,104.886328
6,KH2019181BJWP7Z,JI HONG YUAN (CAMBODIA) GARMENT CO LTD,"NO4174 ST30 THREA VILLAGE, SANGKT STEUNG MEAN ...",KH,34.066074,11.539648,104.886328
7,KH2019268VDRAE7,CAMBO HANDSOME LTD,"PHUM ANGKEO, SANGKAT CHOM CHAO, KHAN PORSENCHE...",KH,33.448788,11.533314,104.824603
8,KH2021168NDF956,"NAN KUANG GARMENT (CAMBODIA) CO., LTD","DAMNAK THOM VILLAGE, STUENG MEAN CHEY DISTRICT...",KH,33.40309,11.537885,104.893626
9,KH2021320Z10KFZ,"SURPASSING GARMENT FACTORY CO., LTD","PHUM TROPAING THLEUNG, SANGKAT CHOM CHAO I, KH...",KH,32.003387,11.533314,104.824603


### #2 Complete name and address search (with geocoding)

In [299]:
cnm_geocode_result = maps_client.geocode(name_and_address_inp["address"])
# print(json.dumps(cnm_geocode_result, indent=2)) # uncomment to see the result

In [300]:
# print the results on the map
map_osm = folium.Map(
    location=[cmn_frame.iloc[0]["lat"], cmn_frame.iloc[0]["lon"]],
)

plot_data_frame(map_osm, cmn_frame)
plot_geocode_result(map_osm, cnm_geocode_result)

map_osm

### #3 Complete name and address search + country (no geocoding)

In [301]:
cnm_c_geocode_result = maps_client.geocode(
    name_and_address_inp["address"],
    components={
        "country": name_and_address_inp["country_code"],
    },
)
# print(json.dumps(cnm_c_geocode_result, indent=2)) # uncomment to see the results

In [302]:
query = (
    search.query(
        "match",
        name=name_and_address_inp["name"],
    )
    .query(
        "match",
        address=name_and_address_inp["address"],
    )
    .filter(
        {
            "term": {
                "country.alpha_2": name_and_address_inp["country_code"],
            },
        },
    )
)

cmn_c_frame = get_frame(query.execute())

# print(json.dumps(query.to_dict(), indent=2)) # uncomment to see the query
cmn_c_frame

Unnamed: 0,os_id,name,address,country_code,score,lat,lon
0,KH2020106BEENKD,KASHION INDUSTRY LIMITED,"PHL.LUM PHM TRO PNG PO, CHOM CHOA KHAN PO SEN ...",KH,124.58914,11.563274,104.856635
1,KH20191080E99T2,T & K GARMENT INDUSTRY CO LTD,"BUILDING #5,6,7,8,9, TOUL PONG ROR, SANGKAT CH...",KH,38.429237,11.533314,104.824603
2,KH2021029959ES1,GG FASHION (CAMBODIA) CO LTD,"NATIONAL ROAD 4, TROPEANG TOULVILLAGE, SANGKAT...",KH,36.794605,11.517915,104.773283
3,KH20201062BDWAY,FOOK WAH KUN KEE KNITTING FTY LTD,"TRAPAING THLEUNG VILLAGE, PHNOM PENH, SANGKAT ...",KH,35.572643,11.533314,104.824603
4,KH2019108R4V9PY,8 STAR SPORTSWEAR LTD,"LOT 0176 DAMNAK THOM VILLAGE, STOEUNG MEAN CHE...",KH,34.83528,11.537885,104.893626
5,KH2022298NP8S91,GRAND DW CO LTD,"978 PHUM DOMNAK THOM VILLAGE 2, SANGKAT STEUNG...",KH,34.83236,11.539648,104.886328
6,KH2019181BJWP7Z,JI HONG YUAN (CAMBODIA) GARMENT CO LTD,"NO4174 ST30 THREA VILLAGE, SANGKT STEUNG MEAN ...",KH,34.066074,11.539648,104.886328
7,KH2019268VDRAE7,CAMBO HANDSOME LTD,"PHUM ANGKEO, SANGKAT CHOM CHAO, KHAN PORSENCHE...",KH,33.448788,11.533314,104.824603
8,KH2021168NDF956,"NAN KUANG GARMENT (CAMBODIA) CO., LTD","DAMNAK THOM VILLAGE, STUENG MEAN CHEY DISTRICT...",KH,33.40309,11.537885,104.893626
9,KH2021320Z10KFZ,"SURPASSING GARMENT FACTORY CO., LTD","PHUM TROPAING THLEUNG, SANGKAT CHOM CHAO I, KH...",KH,32.003387,11.533314,104.824603


In [303]:
map_osm = folium.Map(
    location=[cmn_frame.iloc[0]["lat"], cmn_frame.iloc[0]["lon"]],
)

plot_data_frame(map_osm, cmn_c_frame)
plot_geocode_result(map_osm, cnm_c_geocode_result)

map_osm