In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np

# Load data from Excel file
df = pd.read_excel("data_files/multi_scale_samples_cluster_labels.xlsx")

# Extract data and labels
data_array = df.values
train_data, label = data_array[:, 0:28], data_array[:, -1]

# Normalize data using MinMaxScaler
train_data = MinMaxScaler().fit_transform(train_data)

# Perform PCA for dimensionality reduction
pca = PCA(n_components=2)
trans_data = pca.fit_transform(train_data)

# Print transformed data and explained variance ratio
print(trans_data)
print(pca.explained_variance_ratio_)

# Combine transformed data and labels
new_data = np.column_stack((trans_data, label))
print("Transformed data:", new_data)

# Save transformed data to Excel file
df = pd.DataFrame(new_data)
df.to_excel("ClusterVisualization.xlsx")

# Create scatter plot with different colors for each cluster
for row_index in range(len(trans_data)):
    row_data = trans_data[row_index]
    if label[row_index] == 0:
        plt.scatter(row_data[0], row_data[1], c='#FF00FF')  # Magenta
    elif label[row_index] == 1:
        plt.scatter(row_data[0], row_data[1], c='r')  # Red
    elif label[row_index] == 2:
        plt.scatter(row_data[0], row_data[1], c='g')  # Green
    elif label[row_index] == 3:
        plt.scatter(row_data[0], row_data[1], c='b')  # Blue
    elif label[row_index] == 4:
        plt.scatter(row_data[0], row_data[1], c='#696969')  # Dark Gray
    elif label[row_index] == 5:
        plt.scatter(row_data[0], row_data[1], c='#FFA500')  # Orange
    elif label[row_index] == 6:
        plt.scatter(row_data[0], row_data[1], c='#00BFFF')  # Deep Sky Blue
    else:
        plt.scatter(row_data[0], row_data[1], c='#7CFC00')  # Lawn Green

plt.show()
