# ML from scratch : DBSCAN, a step by step code with python

By Sabrine Bendimerad

[Article link](https://)



In [21]:
import numpy as np

def dbscan(D, eps, MinPts):
    """
    Implements the DBSCAN (Density-Based Spatial Clustering of Applications with Noise) algorithm.

    Args:
        D: An array representing the dataset, where each row is a data point.
        eps: The maximum distance between two points to be considered neighbors.
        MinPts: The minimum number of points required to form a dense region.

    Returns:
        An array of labels for each data point, where:
            - 0: Unclassified
            - -1: Noise
            - Positive value: Cluster ID
    """

    # Initialize labels for all points
    labels = np.zeros(len(D), dtype=int)  # 0 indicates UNCLASSIFIED

    # Keep track of cluster IDs
    cluster_id = 0

    def find_neighbors(P, eps):
        """
        Finds the neighbors of a point P within a radius of eps.

        Args:
            P: The index of the point in the dataset.
            eps: The radius within which to search for neighbors.

        Returns:
            A list of indices of the neighboring points.
        """

        neighbors = []
        for Pn in range(len(D)):
            if np.linalg.norm(D[P] - D[Pn]) < eps:  # Check distance using Euclidean norm
                neighbors.append(Pn)
        return neighbors

    def expand_cluster(P, neighbors, cluster_id):
        """
        Expands a cluster from a seed point P by recursively adding its neighbors.

        Args:
            P: The index of the seed point.
            neighbors: A list of indices of the initial neighbors of P.
            cluster_id: The ID of the cluster being expanded.
        """

        labels[P] = cluster_id  # Assign cluster ID to the seed point

        i = 0  # Iterate through neighbors
        while i < len(neighbors):
            Pn = neighbors[i]

            # UNCLASSIFIED neighbor: Add to cluster and explore its neighbors
            if labels[Pn] == 0:
                labels[Pn] = cluster_id  # Assign cluster ID
                Pn_neighbors = find_neighbors(Pn, eps)
                if len(Pn_neighbors) >= MinPts:
                    neighbors += Pn_neighbors  # Expand cluster with new neighbors

            # NOISE neighbor: Convert to BORDER point if density is sufficient
            elif labels[Pn] == -1:
                labels[Pn] = cluster_id  # Assign cluster ID (now considered BORDER)

            i += 1

    # Iterate through each data point
    for P in range(len(D)):
        if labels[P] != 0:
            continue  # Point already classified

        neighbors = find_neighbors(P, eps)  # Find neighbors of P

        if len(neighbors) < MinPts:
            labels[P] = -1  # Mark as NOISE if not in a dense region
        else:
            cluster_id += 1  # Assign a new cluster ID
            expand_cluster(P, neighbors, cluster_id)  # Expand the cluster

    return labels


In [22]:
 # Example usage with dummy data
D = np.array([
    [1, 2], [2, 2], [2, 3],
    [8, 7], [8, 8], [7, 8],
    [0, 1], [5, 5], [5, 6],
    [7, 6], [10, 1], [9, 2]
])

# Parameters
eps = 1.5
MinPts = 3

# Run DBSCAN
labels = dbscan(D, eps, MinPts)

# Output the cluster labels
print("Labels:", labels)

Labels: [ 1  1  1  2  2  2  1 -1 -1  2 -1 -1]


In [25]:
import numpy as np
import plotly.graph_objects as go

# Define color mapping for visualization
color_dict = {
    1: 'blue',  # Color for Cluster 1 (change as needed)
    2: 'green',  # Color for Cluster 2 (change as needed)
    -1: 'red'    # Color for noise points (adjust if needed)
}

# Assign colors to points based on their labels
colors = [color_dict[label] for label in labels]

# Define descriptive labels for legend entries
legend_labels = {
    1: 'Cluster 1',
    2: 'Cluster 2',
    -1: 'Noise'
}

# Create Plotly scatter plot
fig = go.Figure()
fig = fig.add_trace(go.Scatter(
    x=D[:, 0],  # X-coordinates of the points
    y=D[:, 1],  # Y-coordinates of the points
    mode='markers',  # Display as markers
    marker=dict(
        color=colors,  # Assign colors from the color list
        size=10        # Set marker size
    ),
    text=labels    # Add labels as hover text for individual points
))

# Update plot layout
fig.update_layout(
    title='Points with Cluster Colors',  # Set plot title
    xaxis_title='X-axis',             # Set x-axis title
    yaxis_title='Y-axis'              # Set y-axis title
)

# Display the generated Plotly plot
fig.show()
