In [14]:
import h3
from sklearn.cluster import DBSCAN
import numpy as np

class LandmarkPriority:

    def __init__(self, hex_resolution=4):
        self.hex_resolution = hex_resolution

    def get_hexagons(self, lat, lon):
        h3_index = h3.geo_to_h3(lat, lon, self.hex_resolution)
        return h3.k_ring(h3_index, 1)

    def cluster_landmarks(self, landmarks):
        coords = np.array([(landmark['lat'], landmark['lon']) for landmark in landmarks])
        # coords = landmarks_df[['lat', 'lon']].values  # Extract only lat and lon columns as a numpy array
        clustering = DBSCAN(eps=0.005, min_samples=5).fit(coords)
        labels = clustering.labels_
        return labels

    def rank_clusters(self, labels):
        unique, counts = np.unique(labels, return_counts=True)
        cluster_counts = dict(zip(unique, counts))
        # Sort clusters by size
        sorted_clusters = sorted(cluster_counts.items(), key=lambda x: x[1], reverse=True)
        return sorted_clusters

    def select_priority_landmark(self, landmarks, labels):
        sorted_clusters = self.rank_clusters(labels)
        top_cluster_label = sorted_clusters[0][0]
        top_cluster_landmarks = [landmarks[i] for i in range(len(labels)) if labels[i] == top_cluster_label]

        # Prioritization logic
        priority_order = ["temple", "tourist_spot", "bus_stop", "government_building", "market", "school"]
        for priority in priority_order:
            for landmark in top_cluster_landmarks:
                if landmark['type'] == priority:
                    return landmark

        # In case no landmark matches the prioritization logic
        return top_cluster_landmarks[0]

    def get_priority_landmark_for_hex(self, lat, lon, landmarks):
        hexagons = self.get_hexagons(lat, lon)
        landmarks_in_hex = [landmark for landmark in landmarks if h3.geo_to_h3(landmark['lat'], landmark['lon'], self.hex_resolution) in hexagons]
        if not landmarks_in_hex:
            return None
        labels = self.cluster_landmarks(landmarks_in_hex)
        priority_landmark = self.select_priority_landmark(landmarks_in_hex, labels)
        return priority_landmark

In [15]:
import pandas as pd

landmarks = pd.read_csv('landmarks_clean.csv')
landmarks = landmarks[["tags_name", "lat", "lon", "tags_amenity"]]
landmarks.columns = ["name", "lat", "lon", "type"]

landmarks_dict = landmarks.to_dict('records')

In [16]:
len(landmarks_dict)

494

In [19]:
destination_home_lat = 27.7
destination_home_lon = 85.3

selector = LandmarkPriority()
priority_landmark = selector.get_priority_landmark_for_hex(destination_home_lat, destination_home_lon, landmarks_dict)


In [24]:
print("_".join(i.strip() for i in priority_landmark["name"].split(" ")))

नेपाल_बनेपा_पोलिटेक्निक_संस्थान
