In [None]:
from sklearn.cluster import OPTICS
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

# 주어진 데이터 프레임 생성
data = {
    'PC1': [4.268221, 43.018170, 12.690767, -11.206257, -12.890127, -6.877754, 11.830796, -13.748025, -5.242339, 6.089911],
    'PC2': [-51.277805, -5.458323, -1.213626, -12.641719, -0.349350, -2.149095, -0.143538, -8.015132, 5.318019, -3.421382],
    'PC3': [-10.480102, 4.651520, -1.368500, 8.621291, -0.633938, -0.108776, 0.335296, 15.943814, -8.142536, 1.917324],
    'PC4': [-8.524084, 3.484968, 0.835437, 23.878764, 12.915047, 8.001788, 0.264470, 5.850048, -4.727296, 3.624886],
    'PC5': [0.649149, 0.952031, -0.575149, -7.124372, 4.973803, 0.706754, 0.242483, 1.744722, -3.060403, 0.457345]
}

X = pd.DataFrame(data)

# 두 개의 차원 선택 (예: PC1, PC2)
selected_dimensions = ['PC1', 'PC2']
X_selected = X[selected_dimensions]

# Function to perform OPTICS clustering and visualize the results
def OPTICS_cluster(X, eps, min_samples):

    # Initialize the OPTICS clustering algorithm with eps and min_samples.
    optics = OPTICS(eps=eps, min_samples=min_samples)

    # Fit the OPTICS model to the data and obtain cluster labels.
    cluster_labels = optics.fit_predict(X)

    # Get unique cluster labels (excluding noise points with label -1)
    unique_labels = set(cluster_labels) - {-1}

    # Plot the cluster results for the selected dimensions on a single XY plot
    fig, ax = plt.subplots(figsize=(10, 10))

    # Assign colors to clusters
    colors = plt.cm.get_cmap('tab20', len(unique_labels))

    for k, col in zip(unique_labels, colors(range(len(unique_labels)))):
        class_member_mask = (cluster_labels == k)
        xy = X[class_member_mask]
        ax.plot(xy.iloc[:, 0], xy.iloc[:, 1], "o", markerfacecolor=col, markeredgecolor="k", markersize=6, label=f'Cluster {k}')

    # Plot noise points with light gray color
    noise_points = X[cluster_labels == -1]
    ax.plot(noise_points.iloc[:, 0], noise_points.iloc[:, 1], "o", markerfacecolor="lightgray", markeredgecolor="k", markersize=6, label='Noise')

    ax.set_title(f"OPTICS Cluster Results (eps={eps}, min_samples={min_samples})")
    ax.legend()
    plt.show()

    # Get the number of clusters formed.
    num_clusters = len(unique_labels)
    print(f"For the epsilon value of {eps} and min_samples of {min_samples}, the number of clusters is {num_clusters}")

    return cluster_labels

# Function to optimize and get the optimal eps value
def optimise_epsilon(X):

    # Initialize the NearestNeighbors function with 2 neighbors
    # This is to find the optimal value of epsilon based on an optimization algorithm
    from sklearn.neighbors import NearestNeighbors
    neigh = NearestNeighbors(n_neighbors=2)

    # Fits the model with X
    nbrs = neigh.fit(X)

    # Using the method .kneighbors(X), the k-Neighbours for data points in the dataset X are found
    # By default, this method returns the distances array which is found in the 0 index
    distances = nbrs.kneighbors(X)[0]

    # Sort distances row-wise
    distances = np.sort(distances, axis=0)

    # Remove the 0-index value (Euclidean distance to itself which is 0)
    distances = distances[:,1:]

    # Plot graph of "Euclidean Distance" against "Datapoint".
    # Using this graph, the optimal epsilon value can be found via the elbow method
    plt.plot(distances, color="r")
    plt.title("Sorted Euclidean Distance to Nearest Neighbor")
    plt.xlabel("Data Point")
    plt.ylabel("Euclidean Distance")
    plt.axhline(y=12.5, color='black', linestyle='--')  # Adjust this threshold as needed
    plt.show()

    # Use the elbow method or other criteria to determine the optimal epsilon value
    # In this example, we set the threshold manually (you can adjust it as needed)
    optimal_eps = 12.5

    return optimal_eps

# Find the optimal epsilon value
optimal_eps = optimise_epsilon(X_selected)

# Use the optimal epsilon value in the OPTICS clustering for the selected dimensions
OPTICS_cluster(X_selected, eps=optimal_eps, min_samples=5)
