In [1]:
import numpy as np
import pandas as pd
import xlrd


In [None]:
def load_data(input_file):
    # Load data from an Excel file
    source_data = pd.read_excel(input_file)
    np_data = source_data.iloc[:,:].values
    return source_data, np_data

In [None]:
def data_process(np_data):
    from sklearn import preprocessing
    # Normalize data using Min-Max scaling
    array_data = np.zeros(np_data.shape)
    for i in range(np_data.shape[1]):
        array_data[:, i] = preprocessing.minmax_scale(np_data[:, i])
    return array_data

In [None]:
def cluster_split(array_data):
    # Select data for clustering, excluding the first and last column
    return array_data[:, 1:-1]

In [None]:
def train_cluster(train_data, np_data, source_data):
    from sklearn.cluster import KMeans
    # Initialize and fit the KMeans model
    model = KMeans()
    model.fit(train_data)
    # Store labels from KMeans model
    labels = model.labels_

In [None]:
combine = np.concatenate((np_data, labels[:, None]), axis=1)
    writer = pd.ExcelWriter('cluster_data2.xls')

In [None]:
# Create and save labels data into Excel
    r0 = pd.concat([pd.DataFrame(np_data[:, 0:2]), pd.DataFrame(labels)], axis=1)
    r0.columns = ['temp', 'stress', 'label']
    r0.to_excel(writer, sheet_name='cluster_label')

In [None]:
# Create and save each cluster's data into Excel
    for i in range(len(np.unique(labels))):
        cluster_subset = combine[combine[:, -1] == i][:, :-1]
        r0 = pd.DataFrame(range(len(cluster_subset[:, 0])))
        r1 = pd.DataFrame(cluster_subset)
        r = pd.concat([r0, r1], axis=1)
        r.columns = ['alloy'] + list(source_data.columns)
        r.to_excel(writer, sheet_name='cluster_' + str(i))
    plot_cluster(train_data, labels)
    writer.save()

In [None]:
def plot_cluster(data_zs, labels):
    from sklearn.manifold import TSNE
    import matplotlib.pyplot as plt

    # Embed high-dimensional data into 2D using t-SNE
    tsne = TSNE()
    tsne_embedding = tsne.fit_transform(data_zs)
    tsne = pd.DataFrame(tsne_embedding)

    # Plot clusters using different colors
    for i, color in enumerate(['k.', 'r.', 'y.', 'g.', 'c.', 'm.', 'b.', '#EE82EE']):
        d = tsne[labels == i]
        plt.plot(d[0], d[1], color)

    plt.show()

In [None]:
def run_cluster():
    print("Starting the clustering process...")
    resource_data, np_data = load_data('multi_scale_samples_revision.xlsx')
    array_data = data_process(np_data)
    train_data = cluster_split(array_data)
    train_cluster(train_data, array_data, resource_data)

if __name__ == "__main__":
    print('Welcome to the world of clustering!')
    run_cluster()