# Practice DBC

## Density-Based Clustering



In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons, make_circles, make_blobs, make_swiss_roll, make_gaussian_quantiles
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

# Function to generate different datasets
def generate_dataset(choice, random_state):
    if choice == 0:
        return make_moons(n_samples=300, noise=0.05, random_state=random_state)
    elif choice == 1:
        return make_circles(n_samples=300, noise=0.05, factor=0.5, random_state=random_state)
    elif choice == 2:
        return make_blobs(n_samples=300, centers=3, cluster_std=0.6, random_state=random_state)
    elif choice == 3:
        X, _ = make_swiss_roll(n_samples=300, noise=0.05, random_state=random_state)
        return X[:, [0, 2]], _  # Selecting two features for 2D representation
    else:
        return make_gaussian_quantiles(n_samples=300, n_features=2, random_state=random_state)
    

def changeEpsilon():
    numNoisePoints=np.zeros(30)
    numClusters=np.zeros(30)
    # Loop through 5 different datasets
    for z in range(5):
        for x in range(30):
            for i in range(1):
                X, _ = generate_dataset(z, random_state=42 + i)
                X = StandardScaler().fit_transform(X)

                # Apply DBSCAN clustering
                dbscan = DBSCAN(eps=0.02*x+0.06, min_samples=5)
                labels = dbscan.fit_predict(X)

                # Count noise points
                noise_points = np.sum(labels == -1)
                numNoisePoints[x]=noise_points
                # Count the number of clusters (excluding noise points labeled as -1)
                num_clusters = len(set(labels)) - (1 if -1 in labels else 0)
                numClusters[x]=num_clusters

                #print(f"Dataset {i+1}: Number of noise points: {noise_points}, Number of clusters: {num_clusters}")

                # Plot the results
                plt.figure()
                plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', edgecolor='k')
                plt.title(f"DBSCAN Clustering - Dataset {i+1}")
                plt.xlabel("Feature 1")
                plt.ylabel("Feature 2")
                
                # Save snapshot
                #snapshot_filename = f"dbscan_cluster_dataset_{i+1}.png"
                #plt.savefig(snapshot_filename)
                #print(f"Snapshot saved as {snapshot_filename}")
                
                plt.show()
        x = np.arange(len(numNoisePoints))

        # Plot the data
        plt.figure(figsize=(8, 5))
        plt.plot(x, numNoisePoints, marker='o', linestyle='-', color='b', label="Data values")

        # Labels and title
        plt.xlabel("Index")
        plt.ylabel("Value")
        plt.title("Index vs Value Graph")
        plt.legend()

        # Show the grid
        plt.grid(True, linestyle='--', alpha=0.6)

        # Display the plot
        plt.show()

        x = np.arange(len(numClusters))

        # Plot the data
        plt.figure(figsize=(8, 5))
        plt.plot(x, numClusters, marker='o', linestyle='-', color='b', label="Data values")

        # Labels and title
        plt.xlabel("Index")
        plt.ylabel("Value")
        plt.title("Index vs Value Graph")
        plt.legend()

        # Show the grid
        plt.grid(True, linestyle='--', alpha=0.6)

        # Display the plot
        plt.show()

def changeMinPoints():
    numNoisePoints=np.zeros(30)
    numClusters=np.zeros(30)
    for z in range(5):
        for x in range(30):
            for i in range(1):
                X, _ = generate_dataset(z, random_state=42 + i)
                X = StandardScaler().fit_transform(X)

                # Apply DBSCAN clustering
                dbscan = DBSCAN(eps=0.3, min_samples=x)
                labels = dbscan.fit_predict(X)

                # Count noise points
                noise_points = np.sum(labels == -1)
                numNoisePoints[x]=noise_points
                # Count the number of clusters (excluding noise points labeled as -1)
                num_clusters = len(set(labels)) - (1 if -1 in labels else 0)
                numClusters[x]=num_clusters

                #print(f"Dataset {i+1}: Number of noise points: {noise_points}, Number of clusters: {num_clusters}")

                # Plot the results
                plt.figure()
                plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', edgecolor='k')
                plt.title(f"DBSCAN Clustering - Dataset {i+1}")
                plt.xlabel("Feature 1")
                plt.ylabel("Feature 2")
                
                # Save snapshot
                #snapshot_filename = f"dbscan_cluster_dataset_{i+1}.png"
                #plt.savefig(snapshot_filename)
                #print(f"Snapshot saved as {snapshot_filename}")
                
                plt.show()
        x = np.arange(len(numNoisePoints))

        # Plot the data
        plt.figure(figsize=(8, 5))
        plt.plot(x, numNoisePoints, marker='o', linestyle='-', color='b', label="Data values")

        # Labels and title
        plt.xlabel("Index")
        plt.ylabel("Value")
        plt.title("Index vs Value Graph")
        plt.legend()

        # Show the grid
        plt.grid(True, linestyle='--', alpha=0.6)

        # Display the plot
        plt.show()

        x = np.arange(len(numClusters))

        # Plot the data
        plt.figure(figsize=(8, 5))
        plt.plot(x, numClusters, marker='o', linestyle='-', color='b', label="Data values")

        # Labels and title
        plt.xlabel("Index")
        plt.ylabel("Value")
        plt.title("Index vs Value Graph")
        plt.legend()

        # Show the grid
        plt.grid(True, linestyle='--', alpha=0.6)

        # Display the plot
        plt.show()

def changeBothInverse():
    numNoisePoints=np.zeros(30)
    numClusters=np.zeros(30)
    for z in range(5):
        for x in range(30):
            for i in range(1):
                X, _ = generate_dataset(z, random_state=42 + i)
                X = StandardScaler().fit_transform(X)

                # Apply DBSCAN clustering
                dbscan = DBSCAN(eps=0.02*x+0.06, min_samples=30-x)
                labels = dbscan.fit_predict(X)

                # Count noise points
                noise_points = np.sum(labels == -1)
                numNoisePoints[x]=noise_points
                # Count the number of clusters (excluding noise points labeled as -1)
                num_clusters = len(set(labels)) - (1 if -1 in labels else 0)
                numClusters[x]=num_clusters

                #print(f"Dataset {i+1}: Number of noise points: {noise_points}, Number of clusters: {num_clusters}")

                # Plot the results
                plt.figure()
                plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', edgecolor='k')
                plt.title(f"DBSCAN Clustering - Dataset {i+1}")
                plt.xlabel("Feature 1")
                plt.ylabel("Feature 2")
                
                # Save snapshot
                #snapshot_filename = f"dbscan_cluster_dataset_{i+1}.png"
                #plt.savefig(snapshot_filename)
                #print(f"Snapshot saved as {snapshot_filename}")
                
                plt.show()
        x = np.arange(len(numNoisePoints))

        # Plot the data
        plt.figure(figsize=(8, 5))
        plt.plot(x, numNoisePoints, marker='o', linestyle='-', color='b', label="Data values")

        # Labels and title
        plt.xlabel("Index")
        plt.ylabel("Value")
        plt.title("Index vs Value Graph")
        plt.legend()

        # Show the grid
        plt.grid(True, linestyle='--', alpha=0.6)

        # Display the plot
        plt.show()

        x = np.arange(len(numClusters))

        # Plot the data
        plt.figure(figsize=(8, 5))
        plt.plot(x, numClusters, marker='o', linestyle='-', color='b', label="Data values")

        # Labels and title
        plt.xlabel("Index")
        plt.ylabel("Value")
        plt.title("Index vs Value Graph")
        plt.legend()

        # Show the grid
        plt.grid(True, linestyle='--', alpha=0.6)

        # Display the plot
        plt.show()

def changeBoth():
    numNoisePoints=np.zeros(30)
    numClusters=np.zeros(30)
    for z in range(5):
        for x in range(30):
            for i in range(1):
                X, _ = generate_dataset(z, random_state=42 + i)
                X = StandardScaler().fit_transform(X)

                # Apply DBSCAN clustering
                dbscan = DBSCAN(eps=0.02*x+0.06, min_samples=x)
                labels = dbscan.fit_predict(X)

                # Count noise points
                noise_points = np.sum(labels == -1)
                numNoisePoints[x]=noise_points
                # Count the number of clusters (excluding noise points labeled as -1)
                num_clusters = len(set(labels)) - (1 if -1 in labels else 0)
                numClusters[x]=num_clusters

                #print(f"Dataset {i+1}: Number of noise points: {noise_points}, Number of clusters: {num_clusters}")

                # Plot the results
                plt.figure()
                plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', edgecolor='k')
                plt.title(f"DBSCAN Clustering - Dataset {i+1}")
                plt.xlabel("Feature 1")
                plt.ylabel("Feature 2")
                
                # Save snapshot
                #snapshot_filename = f"dbscan_cluster_dataset_{i+1}.png"
                #plt.savefig(snapshot_filename)
                #print(f"Snapshot saved as {snapshot_filename}")
                
                plt.show()
        x = np.arange(len(numNoisePoints))

        # Plot the data
        plt.figure(figsize=(8, 5))
        plt.plot(x, numNoisePoints, marker='o', linestyle='-', color='b', label="Data values")

        # Labels and title
        plt.xlabel("Index")
        plt.ylabel("Value")
        plt.title("Index vs Value Graph")
        plt.legend()

        # Show the grid
        plt.grid(True, linestyle='--', alpha=0.6)

        # Display the plot
        plt.show()

        x = np.arange(len(numClusters))

        # Plot the data
        plt.figure(figsize=(8, 5))
        plt.plot(x, numClusters, marker='o', linestyle='-', color='b', label="Data values")

        # Labels and title
        plt.xlabel("Index")
        plt.ylabel("Value")
        plt.title("Index vs Value Graph")
        plt.legend()

        # Show the grid
        plt.grid(True, linestyle='--', alpha=0.6)

        # Display the plot
        plt.show()

#changeEpsilon()
#changeMinPoints()
#changeBoth()
#changeBothInverse()
