In [2]:
import numpy as np
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.neighbors import KernelDensity
from sklearn.datasets import make_moons, make_blobs, make_classification
from sklearn.manifold import TSNE
from scipy.sparse.csgraph import laplacian
from scipy.spatial.distance import pdist, squareform
from lund.lund import LearningbyUnsupervisedNonlinearDiffusion
from lund.utils import GraphExtractor, diffusion_distance
from model.utils import loadHSI,calculate_aligned_accuracy
import matplotlib.pyplot as plt
import scipy.io


# data_path, gt_path, data_name, gt_name = '/Users/seoli/Desktop/DIAMONDS/Tufts2024/data/SalinasA_corrected.mat', '/Users/seoli/Desktop/DIAMONDS/Tufts2024/data/SalinasA_gt.mat', 'salinasA_corrected', 'salinasA_gt'


# X, M, N, D, HSI, GT, Y, n, K = loadHSI(data_path, gt_path, data_name, gt_name)
X, GT = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, n_clusters_per_class=1, random_state=42)
scipy.io.savemat('data.mat', {'X': X, 'GT': GT})

grid_size = int(np.ceil(np.sqrt(X.shape[0])))
print("GRID SIZE: ", grid_size)

print("Data shape:", X.shape)

# GT = GT - 1
# HSI = X.reshape((M, N, D))
Hyperparameters = {
    'Sigma': 1.0,
    'DiffusionNN': 10,
    
}

kde = KernelDensity(bandwidth=1.0)
kde.fit(X)
log_density = kde.score_samples(X)
p = np.exp(log_density)

graph_extractor = GraphExtractor()
G = graph_extractor.extract_graph(X)
print(G)
t = 1
distances, embedding = diffusion_distance(G, t)

# Plot the diffusion map (embedding)
plt.figure(figsize=(8, 6))
plt.scatter(embedding[:, 1], embedding[:, 2], c=np.arange(len(embedding)), cmap='viridis')
plt.title('Diffusion Map')
plt.xlabel('Component 1')
plt.ylabel('Component 2')
plt.colorbar()
plt.show()

# Plot the diffusion distances
plt.figure(figsize=(8, 6))
plt.imshow(distances, cmap='viridis')
plt.title('Diffusion Distances')
plt.colorbar()
plt.show()

C, K, Dt = LearningbyUnsupervisedNonlinearDiffusion(X, t, G, p)


# print("Cluster labels:", C)
# print("GT",GT)
print("Number of clusters:", K)

accuracy = calculate_aligned_accuracy(GT, C)
print("Aligned Accuracy:", accuracy)

def plot_clusters(X, labels, title):
    plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis', s=50)
    plt.title(title)
    plt.show()
    
def plot_kde_density(X, title):
    # Create the KDE plot
    plt.figure(figsize=(10, 6))
    sns.kdeplot(x=X[:, 0], y=X[:, 1], cmap='viridis', fill=True, levels=20, thresh=0)
    plt.scatter(X[:, 0], X[:, 1], c='red', s=5, edgecolor='k')
    plt.title(title)
    plt.show()


plot_clusters(X, C, f"LUND Clustering (Aligned Accuracy: {accuracy:.2f})")
plot_clusters(X, GT, "Ground Truth")
# plot_kde_density(X, "KDE Density Estimation")

# kmeans = KMeans(n_clusters=2, random_state=42)
# kmeans_labels = kmeans.fit_predict(X)
# kmeans_accuracy = calculate_aligned_accuracy(GT, kmeans_labels)
# print("Aligned Accuracy (KMeans Clustering):", kmeans_accuracy)
# plot_clusters(X, kmeans_labels, f"KMeans Clustering (Aligned Accuracy: {kmeans_accuracy:.2f})")




GRID SIZE:  32
Data shape: (1000, 2)
Initialized with NEigs = 1000 (type: <class 'int'>)
[[0.         2.71213211 2.59563926 ... 2.60488795 0.63249972 3.13851077]
 [2.71213211 0.         0.63901434 ... 2.50514732 2.89902198 3.47108215]
 [2.59563926 0.63901434 0.         ... 1.88806756 2.91528799 2.85288259]
 ...
 [2.60488795 2.50514732 1.88806756 ... 0.         3.19692937 0.96601409]
 [0.63249972 2.89902198 2.91528799 ... 3.19692937 0.         3.76737337]
 [3.13851077 3.47108215 2.85288259 ... 0.96601409 3.76737337 0.        ]]
entering try


  eigvals, eigvecs = eigs(P, k = self.NEigs)


EigenDecomposition of P failed
{'Hyperparameters': {'Sigma': 1.0, 'DiffusionNN': 100}, 'EigenVecs': nan, 'EigenVals': nan, 'StationaryDist': nan, 'P': nan, 'W': nan}


TypeError: object of type 'float' has no len()