In [28]:
import scipy.io
import numpy as np
from sklearn.model_selection import train_test_split
mat = scipy.io.loadmat("Indian_pines.mat")


In [29]:
mat["indian_pines"].shape
# transfer to numpy array
data = np.array(mat["indian_pines"])
# Reshape the data into a 2D array
n_pixels = data.shape[0] * data.shape[1]
n_bands = data.shape[2]
data_2d = np.reshape(data, (n_pixels, n_bands))
data.shape

(145, 145, 220)

In [17]:
data_2d.shape 

(21025, 220)

In [18]:
# Split the data into training and testing sets
X_train, X_test = train_test_split(data_2d, test_size=0.3, random_state=42)

In [21]:
# Perform SVD on the training set
U, s, Vt = np.linalg.svd(X_train, full_matrices=False)
k = 10  # number of principal components to keep
U_k = U[:, :k]
s_k = np.diag(s[:k])
Vt_k = Vt[:k, :]
X_train_recon_svd = U_k @ s_k @ Vt_k
X_test_recon_svd = U_k @ s_k @ Vt_k @ X_test.T



In [23]:
from sklearn.decomposition import NMF
# Perform NMF on the training set
model = NMF(n_components=10, init='random', random_state=0)
W = model.fit_transform(X_train)
H = model.components_
X_train_recon_nmf = W @ H
X_test_recon_nmf = model.transform(X_test) @ H






In [25]:
X_train_recon_nmf.shape

(14717, 220)

In [26]:
X_train_recon_svd.shape

(14717, 220)

In [37]:
from sklearn.cluster import KMeans
# Perform K-means clustering on the training set
kmeans = KMeans(n_clusters=16, random_state=0).fit(X_train_recon_svd)
X_train_recon_svd_kmeans = kmeans.predict(X_train_recon_svd)


(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15]),
 array([ 906,  619,  860, 1466, 1399,  620, 1075, 1016,  735,  815,  952,
        1406,  384, 1513,  860,   91], dtype=int64))

In [38]:
# give a list of numbers of data in each cluster
unique_labels, counts = np.unique(X_train_recon_svd_kmeans, return_counts=True)
# Print label counts
for label, count in zip(unique_labels, counts):
    print(f"Label {label}: {count}")

Label 0: 906
Label 1: 619
Label 2: 860
Label 3: 1466
Label 4: 1399
Label 5: 620
Label 6: 1075
Label 7: 1016
Label 8: 735
Label 9: 815
Label 10: 952
Label 11: 1406
Label 12: 384
Label 13: 1513
Label 14: 860
Label 15: 91


In [39]:
# Perform K-means clustering on the training set
kmeans = KMeans(n_clusters=16, random_state=0).fit(X_train_recon_nmf)
X_train_recon_nmf_kmeans = kmeans.predict(X_train_recon_nmf)

# give a list of numbers of data in each cluster
unique_labels, counts = np.unique(X_train_recon_nmf_kmeans, return_counts=True)
# Print label counts
for label, count in zip(unique_labels, counts):
    print(f"Label {label}: {count}")

Label 0: 1734
Label 1: 1012
Label 2: 919
Label 3: 633
Label 4: 632
Label 5: 1400
Label 6: 1541
Label 7: 1194
Label 8: 1039
Label 9: 709
Label 10: 356
Label 11: 851
Label 12: 1012
Label 13: 851
Label 14: 741
Label 15: 93
