# Implementation for clustering hotspots

In [11]:
import geopy.distance
import folium

In [5]:
class Point:
    def __init__(self, id, coordinates):
        self.id = id
        self.coordinates = coordinates
        self.visited = False
        self.cluster = None
        self.is_noise = False

    def distance(self, other_point):
        """
        Compute the Euclidean distance between this point and another point.
        """
        return geopy.distance.geodesic(self.coordinates, other_point.coordinates).km

    def __str__(self):
        return f'''Point {self.id}[{str(self.coordinates)}, cluster:{self.cluster}]'''

In [6]:
class DBSCAN:
    def __init__(self, dataset, eps, min_pts):
        self.dataset = dataset
        self.eps = eps
        self.min_pts = min_pts
        self.cluster_id = 0

    def region_query(self, point):
        """
        Returns all points within eps-distance from the given point in the dataset.
        """
        neighbors = []
        for candidate_point in self.dataset:
            if point.distance(candidate_point) < self.eps:
                neighbors.append(candidate_point)
        return neighbors

    def expand_cluster(self, point, neighbors):
        """
        Expands the cluster of the given point and its neighbors.
        """
        point.cluster = self.cluster_id
        i = 0
        while i < len(neighbors):
            neighbor = neighbors[i]
            if not neighbor.visited:
                neighbor.visited = True
                new_neighbors = self.region_query(neighbor)
                if len(new_neighbors) >= self.min_pts:
                    neighbors += new_neighbors
            if neighbor.cluster is None:
                neighbor.cluster = self.cluster_id
            i += 1

    def fit(self):
        """
        Run the DBSCAN clustering algorithm.
        """
        for point in self.dataset:
            if not point.visited:
                point.visited = True
                neighbors = self.region_query(point)
                if len(neighbors) < self.min_pts:
                    point.is_noise = True
                else:
                    self.cluster_id += 1
                    self.expand_cluster(point, neighbors)

In [7]:
long_lats = [
    (1.31990020343696, 103.95674626542143),
    (1.3409742208234072, 103.9640548435533),
    (1.3412846014636721, 103.96400052881947),
    (1.3277199609269097, 103.95972731843452),
    (1.3173137658654994, 103.9511365061205),
    (1.3150333487355417, 103.9470373314858),
    (1.362225122765516, 103.88294192838914),
    (1.362222442765516, 103.88294193224891),
    (1.32222442765516, 103.88294193224891),
    (1.3124085243656334, 103.93869585305949),
    (1.3118890275098984, 103.93232351063924)
    ]

dataset = []
for i in range(len(long_lats)):
    dataset.append(Point(i,long_lats[i]))

# Running DBSCAN using OOP
dbscan = DBSCAN(dataset, eps=1, min_pts=2)
dbscan.fit()

# Output clusters
for point in dataset:
    print(f"Point: {point}, Cluster ID: {point.cluster}")

Point: Point 0[(1.31990020343696, 103.95674626542143), cluster:1], Cluster ID: 1
Point: Point 1[(1.3409742208234072, 103.9640548435533), cluster:2], Cluster ID: 2
Point: Point 2[(1.3412846014636721, 103.96400052881947), cluster:2], Cluster ID: 2
Point: Point 3[(1.3277199609269097, 103.95972731843452), cluster:1], Cluster ID: 1
Point: Point 4[(1.3173137658654994, 103.9511365061205), cluster:1], Cluster ID: 1
Point: Point 5[(1.3150333487355417, 103.9470373314858), cluster:1], Cluster ID: 1
Point: Point 6[(1.362225122765516, 103.88294192838914), cluster:3], Cluster ID: 3
Point: Point 7[(1.362222442765516, 103.88294193224891), cluster:3], Cluster ID: 3
Point: Point 8[(1.32222442765516, 103.88294193224891), cluster:None], Cluster ID: None
Point: Point 9[(1.3124085243656334, 103.93869585305949), cluster:1], Cluster ID: 1
Point: Point 10[(1.3118890275098984, 103.93232351063924), cluster:1], Cluster ID: 1


In [8]:
def visualize_clusters(dataset):
    # Create a base map
    m = folium.Map(location=[1.3409742208234072, 103.9640548435533], zoom_start=15)  # Default location: NYC

    # Define cluster colors (you can expand this list for more clusters)
    colors = ['red', 'blue', 'green', 'purple', 'orange', 'darkred', 'lightred', 'beige', 'darkblue', 'darkgreen', 'cadetblue', 'darkpurple', 'pink', 'lightblue', 'lightgreen', 'gray', 'black', 'lightgray']

    # Add points to the map
    for point in dataset:
        cluster_id = point.cluster
        color = 'gray' if cluster_id is None else colors[(cluster_id - 1) % len(colors)]
        folium.Marker(
            location=point.coordinates,
            icon=folium.Icon(color=color),
            popup=f"ID:{point.id}, Cluster: {cluster_id}"
        ).add_to(m)

    return m

# Visualize
map_result = visualize_clusters(dataset)

In [9]:
display(map_result)

In [10]:
dataset[3].distance(dataset[10])
print(f'{dataset[3]} and {dataset[10]} are {dataset[3].distance(dataset[10])}km apart but still the same cluster')

Point 3[(1.3277199609269097, 103.95972731843452), cluster:1] and Point 10[(1.3118890275098984, 103.93232351063924), cluster:1] are 3.516444736331772km apart but still the same cluster


## Break up large clusters

DBScan just does clustering of points as long as nodes are close together. But the overall cluster might be too large for exploration of a single drone, we need a way to limit the size of this clustering.