In [None]:
import sys
from IPython.display import display
from pathlib import Path as path
import numpy as np
import pandas as pd
import hyperspy.api as hs
import xarray as xr

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib import ticker
from matplotlib.colors import LinearSegmentedColormap, to_rgba

In [None]:
# 画图的初始设置
plt.style.use(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-python\Figure\liuchzzyy.mplstyle')
# display(plt.style.available)

# 颜色设定
sys.path.append(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Python\Figure')
from colors import tol_cmap, tol_cset
colors = list(tol_cset('vibrant'))
if r'sunset' not in plt.colormaps():
    plt.colormaps.register(tol_cmap('sunset'))
if r'rainbow_PuRd' not in plt.colormaps():
    plt.colormaps.register(tol_cmap('rainbow_PuRd')) # 备用 plasma

# 输出的文件夹
path_out = path(r"C:\Users\chengliu\Desktop\Figure")

In [None]:
from mpl_toolkits.axes_grid1.anchored_artists import AnchoredSizeBar
def add_sizebar(ax, size, data, color):
    asb = AnchoredSizeBar(ax.transData,
                          size / data.axes_manager["y"].scale,
                          '{} {}'.format(size, data.axes_manager['y'].units),
                          loc='lower left',
                          pad=0.1, borderpad=0.5, sep=0.5,
                          frameon=False,
                          color=color,
                         label_top=True)
    ax.add_artist(asb)
        
def transparent_single_color_cmap(color):
    """Return a single color matplotlib cmap with the transparency increasing
    linearly from 0 to 1."""
    return LinearSegmentedColormap.from_list("", [to_rgba(color, 0), to_rgba(color, 1)])

### 读取数据

In [None]:
path_file = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\TEM\ExSitu\αMnO2\Charge\1st0.9V\αMnO2 + PVDF + SP\1M ZnSO4 + 1M MnSO4\2024-EMCA\EDS\0003 - B8_HAADF_67000_x')
file = path.joinpath(path_file, r'Data', r'0003 - B8_HAADF_67000_x.emd')
data = hs.load(file) # type: ignore
data

In [None]:
for file in data:
    if len(file.axes_manager.shape) >= 2:
        if len(file.axes_manager.navigation_shape) == 2:
            if file.axes_manager.navigation_axes[0].units == r'µm':
                file.axes_manager.convert_units(axes="navigation", units='nm', same_units=True, factor=1000)
        elif len(file.axes_manager.signal_shape) == 2:
            if file.axes_manager.signal_axes[0].units == r'µm':
                file.axes_manager.convert_units(axes="signal", units='nm', same_units=True, factor=1000)

    if len(file.axes_manager.shape) ==3:
        if len(file.axes_manager.navigation_shape) == 2:
            for axis in file.axes_manager.navigation_axes:
                axis.offset = 0
        elif len(file.axes_manager.signal_shape) == 2:
            for axis in file.axes_manager.signal_axes:
                axis.offset = 0

data[-1].axes_manager

In [None]:
# HADDF 图
%matplotlib inline
plt.close('all')

fig = plt.figure(figsize=(3.3, 2.5))
gs = gridspec.GridSpec(1, 1, width_ratios=None, height_ratios=None, wspace=0, hspace=0, figure=fig)

subfig = fig.add_subfigure(gs[0, 0], zorder=0)
ax = subfig.add_subplot()
ax.set_position([0, 0, 1.0, 1.0])

ax.imshow(data[5].data, cmap='gray', aspect=1.0) # HADDF
add_sizebar(ax, 50, data[5], 'w')
ax.set_axis_off()

plt.savefig(path.joinpath(path_out, r'TEM_EDS_HADDF_600.tif'), pad_inches=0.05, bbox_inches='tight', dpi=600, transparent=False, pil_kwargs={"compression": "tiff_lzw"})
plt.show()

### EDS mappings

In [None]:
# TEM 图
%matplotlib inline

plt.close('all')
fig = plt.figure(figsize=(3.3, 2.5))
gs = gridspec.GridSpec(1, 1, width_ratios=None, height_ratios=None, wspace=0, hspace=0, figure=fig)

subfig = fig.add_subfigure(gs[0, 0], zorder=0)
ax = subfig.add_subplot()
ax.set_position([0, 0, 1.0, 1.0])

ax.imshow(data[4].data, cmap=transparent_single_color_cmap(colors[0]), aspect=1.0)
ax.imshow(data[6].data, cmap=transparent_single_color_cmap(colors[1]), aspect=1.0)
ax.imshow(data[10].data, cmap=transparent_single_color_cmap(colors[2]), aspect=1.0, alpha=1.0)
add_sizebar(ax, 50, data[4], 'k')
ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, labelbottom=False, labelleft=False,)

# plt.savefig(path.joinpath(path_out, r'TEM_EDS_Mixed_Mappings_Mn_Zn_S_300_V0_0.tif'), pad_inches=0.05, bbox_inches='tight', dpi=300, transparent=False, pil_kwargs={"compression": "tiff_lzw"})
# plt.savefig(path.joinpath(path_out, r'TEM_EDS_Mixed_Mappings_Mn_Zn_S_600_V0_0.tif'), pad_inches=0.05, bbox_inches='tight', dpi=600, transparent=False, pil_kwargs={"compression": "tiff_lzw"})
plt.show()

### EDSsum quantification

In [None]:
from typing import Tuple, List
from IPython.utils.io import capture_output

def EDS_Fit(
    data,
    elements: List[str],
    energy_range: Tuple[float, float] = (0.14, 10.0),
    data_plot: bool = True,
    offsetA: float = 1000,
    colors: List[str] = colors[0:4],
    axislims: Tuple[List[float], List[float]] = ([-0.01, 10.0], [-2000, 10000]),
    tickers: Tuple[List[float], List[float]] = ([2, 1], [2000, 1000]),
    save_data: bool = True,
    output_dir: path = path_out,
    output_name: str = "Sum"
) -> None:
    """处理输入数据，创建并拟合模型
    
    Parameters
    ----------
    data : 输入数据对象
    elements : 需要分析的元素列表
    energy_range : 能量范围筛选区间
    data_plot : 是否绘制图表
    offsetA : 残差曲线偏移量
    xlims : X轴显示范围
    ylims : Y轴显示范围
    save_data : 是否保存数据
    output_dir : 输出目录路径
    """
    # 参数校验
    if not isinstance(energy_range, tuple) or len(energy_range) != 2:
        raise ValueError("energy_range must be a tuple of two floats")
    if not isinstance(elements, list):
        raise TypeError("elements must be a list of strings")

    # 数据处理
    spectrum = data.sum().isig[energy_range[0]:energy_range[1]].deepcopy()
    spectrum.set_elements(elements)
    model = spectrum.create_model()
    
    # 模型校准和拟合
    model.fit_background()
    # model.calibrate_energy_axis(calibrate='resolution')
    model.calibrate_xray_lines('energy', bound=50)
    model.calibrate_xray_lines('width', bound=10)
    # model.calibrate_xray_lines('sub_weight', bound=10)
    model.multifit()

    # 计算定量结果（无论是否保存数据都需要）
    intensities = [model.get_lines_intensity()[i] for i in [4,5,6,7,9,11,12]]
    # K_Ka, Mn_Ka, Mn_La, O_Ka, S_Ka, Zn_Ka, Zn_La
    kfactors = [0.606, 0.753, 1.84, 0.691, 0.585, 0.950, 1.23]  
    quant = spectrum.quantification(
        intensities, 
        method="CL", 
        factors=kfactors, 
        plot_result=False, 
        composition_units="atomic"
    )

    # 数据可视化
    if data_plot:
        plot_spectrum(
            spectrum=spectrum,
            model=model,
            offsetA=offsetA,
            colors=colors,
            axislims=axislims,
            tickers=tickers,
            save_path=output_dir,
            save_name=output_name
        )

    # 数据保存
    if save_data:
        save_quantification_data(
            spectrum=spectrum,
            model=model,
            quant=quant,
            save_path=output_dir,
            save_name=output_name
        )

    # 保存参数细节
    with capture_output() as captured:
        model.print_current_values()

    # 保存参数细节
    with open(path.joinpath(output_dir, f'EDS_Model_Parameters_{output_name}.txt'), 'w') as file:
        file.write("\n".join(create_attrs_dict(quant, displayA=True)))
        file.write("\n\n")
        file.write(str(captured.outputs[0].data['text/plain']))  

    # 打印定量结果
    print("\n".join(create_attrs_dict(quant, displayA=True)))

    return None

def plot_spectrum(
    spectrum,
    model,
    offsetA: float,
    colors: List[str],
    axislims: Tuple[List[float], List[float]],
    tickers: Tuple[List[float], List[float]],
    save_path: path,
    save_name: str,
) -> None:
    
    save_path = path.joinpath(save_path, f'EDS_Fit_{save_name}_300.tif')

    plt.close('all')
    """绘制光谱拟合结果"""
    fitted_signal = model.as_signal()
    energy_axis = spectrum.axes_manager["X-ray energy"].axis
    original_data = spectrum.data
    fitted_data = fitted_signal.data
    residual = original_data - fitted_data

    fig = plt.figure(figsize=(3.3, 2.5), dpi=300)
    ax = fig.add_subplot(111)
    ax.set_position([0.15, 0.15, 0.8, 0.8])  # 调整边距

    ax.plot(energy_axis, original_data, "-", color=colors[0], label="Experimental")
    ax.plot(energy_axis, fitted_data, "--", color=colors[1], label="Fit")
    ax.plot(energy_axis, residual - offsetA, "--", color=colors[2], label="Residuals")

    ax.set_xlabel("Energy (keV)", fontsize=11)
    ax.set_ylabel("Total Counts", fontsize=11)
    ax.set_xlim(axislims[0])
    ax.set_ylim(axislims[1])

    # 设置刻度
    ax.xaxis.set_major_locator(ticker.MultipleLocator(tickers[0][0]))
    ax.xaxis.set_minor_locator(ticker.MultipleLocator(tickers[0][1]))
    ax.yaxis.set_major_locator(ticker.MultipleLocator(tickers[1][0]))
    ax.yaxis.set_minor_locator(ticker.MultipleLocator(tickers[1][1]))

    ax.tick_params(axis="both", which="both", labelsize=9)
    ax.legend(
        loc="upper right",
        frameon=False,
        fontsize=9,
        handlelength=1.5,
        borderpad=0.2
    )

    plt.savefig(
        save_path,
        bbox_inches="tight",
        dpi=600,
        transparent=False,
        pil_kwargs={"compression": "tiff_lzw"}
    )
    plt.show()

def save_quantification_data(
    spectrum,
    model,
    quant,
    save_path: path,
    save_name: str,
) -> None:
    
    """保存定量分析结果"""
    # 创建数据集
    energy_coord = model.as_signal().axes_manager["X-ray energy"].axis
    
    datasets = [
        xr.DataArray(
            data=spectrum.data,
            coords={"Energy": energy_coord},
            dims=["Energy"],
            name="EDS_Sum_Data"
        ),
        xr.DataArray(
            data=model.as_signal().data,
            coords={"Energy": energy_coord},
            dims=["Energy"],
            name="EDS_Sum_Fit",
            attrs=create_attrs_dict(quant, displayA=False)
        ),
        xr.DataArray(
            data=spectrum.data - model.as_signal().data,
            coords={"Energy": energy_coord},
            dims=["Energy"],
            name="EDS_Sum_Residuals"
        )
    ]

    # 合并保存
    xr.merge(datasets).to_netcdf(
        path.joinpath(save_path, f"EDS_Fit_{save_name}.NETCDF4"),
        engine="h5netcdf"
    )
    
    # 保存模型参数
    model.save_parameters2file(path.joinpath(save_path, f"EDS_Model_Parameters_{save_name}.npz"))
    model.save(path.joinpath(save_path, f"EDS_Models_{save_name}"), overwrite=True)

def create_attrs_dict(quant, displayA: bool = False):
    """创建属性字典"""
    # 使用更具可读性的变量名
    k = quant[0].data[0]
    mn_ka = quant[1].data[0]
    mn_la = quant[2].data[0]
    o = quant[3].data[0]
    s = quant[4].data[0]
    zn_ka = quant[5].data[0]
    zn_la = quant[6].data[0]

    # 计算各种比率
    total_mn = mn_ka + mn_la
    total_zn = zn_ka + zn_la
    others_1 = 100 - k - mn_ka - o - s - zn_ka
    others_2 = 100 - k - total_mn - o - s - total_zn

    # 创建属性字典
    params = {
        "Ratio_S_Zn_1": s / zn_ka,
        "Ratio_K_Mn_1": k / mn_ka,
        "Ratio_Zn_Mn_1": zn_ka / mn_ka,
        "K_Mn_O_Zn_S_Others_1": f"{k}, {mn_ka}, {o}, {s}, {zn_ka}, {others_1}",
        "Ratio_S_Zn_2": s / total_zn,
        "Ratio_K_Mn_2": k / total_mn,
        "Ratio_Zn_Mn_2": total_zn / total_mn,
        "K_Mn_O_Zn_S_Others_2": f"{k}, {total_mn}, {o}, {s}, {total_zn}, {others_2}",
    }
    if displayA:
        lines = []
        lines.append(f"{'Ratio_at':20} | {'Value':>12}")
        lines.append("=" * 40)
        for key, value in params.items():
            line = f"{key:20} | {str(value):>12}"
            lines.append(line)
    return lines if displayA else params


#### 考虑全局

In [None]:
EDS_Fit(
    data=data[-1],
    elements=['K', 'Mn', 'O', 'S', 'Si', 'Zn', 'C', 'Cu', 'Al', 'P'],
    energy_range=(0.14, 10.0),
    data_plot=True,
    offsetA=500,
    axislims=([-0.01, 10.0], [-1000, 4000]),
    tickers=([2, 1], [1000, 500]),
    save_data=True,
    output_dir=path_out,
    output_name=r"Sum"
)

#### 选择某些区域

In [None]:
%matplotlib ipympl
data[5].plot()
rectangular_roi_1 = hs.roi.RectangularROI(left=170, right=400, top=180, bottom=200)
roi2D_1 = rectangular_roi_1.interactive(data[5], color="yellow")

In [None]:
%matplotlib ipympl
data[-1].plot()
rectangular_roi_1 = hs.roi.RectangularROI(left=280, right=420, top=110, bottom=140)
rectangular_roi_2 = hs.roi.RectangularROI(left=250, right=420, top=180, bottom=200)
# rectangular_roi_3 = hs.roi.RectangularROI(left=730, right=1000, top=2200, bottom=2700)
# rectangular_roi_4 = hs.roi.RectangularROI(left=2300, right=2700, top=1350, bottom=1650)

roi2D_1 = rectangular_roi_1.interactive(data[-1], color="yellow")
roi2D_2 = rectangular_roi_2.interactive(data[-1], color="yellow")
# roi2D_3 = rectangular_roi_3.interactive(data[-1], color="yellow")
# roi2D_4 = rectangular_roi_4.interactive(data[-1], color="yellow")

In [None]:
%matplotlib inline

plt.close('all')
fig = plt.figure(figsize=(3.3, 2.5))
gs = gridspec.GridSpec(1, 1, width_ratios=None, height_ratios=None,
                        wspace=0, hspace=0, figure=fig)
# 图 A
subfiga = fig.add_subfigure(gs[0, 0], zorder=0)
ax = subfiga.add_subplot()
ax.set_position([0.0, 0, 1.0, 1.0])
ax.imshow(data[6].data, cmap='gray',)
add_sizebar(ax, 20, data[6], 'w')
ax.set_axis_off()
ax.tick_params(axis='both', which='both', bottom=False, top=False, left=False, labelbottom=False, labelleft=False,)

rect1 = mpl.patches.Rectangle((int(rectangular_roi_1.x/data[-1].axes_manager[0].scale), int(rectangular_roi_1.y/data[-1].axes_manager[1].scale)), int(rectangular_roi_1.width/data[-1].axes_manager[0].scale), int(rectangular_roi_1.height/data[-1].axes_manager[1].scale), linewidth=1, edgecolor='y', facecolor='none', transform=ax.transData, zorder=5)
rect2 = mpl.patches.Rectangle((int(rectangular_roi_2.x/data[-1].axes_manager[0].scale), int(rectangular_roi_2.y/data[-1].axes_manager[1].scale)), int(rectangular_roi_2.width/data[-1].axes_manager[0].scale), int(rectangular_roi_2.height/data[-1].axes_manager[1].scale), linewidth=1, edgecolor='y', facecolor='none', transform=ax.transData, zorder=5)
# rect3 = mpl.patches.Rectangle((int(rectangular_roi_3.x/data[-1].axes_manager[0].scale), int(rectangular_roi_3.y/data[-1].axes_manager[1].scale)), int(rectangular_roi_3.width/data[-1].axes_manager[0].scale), int(rectangular_roi_3.height/data[-1].axes_manager[1].scale), linewidth=1, edgecolor='y', facecolor='none', transform=ax.transData, zorder=5)
# rect4 = mpl.patches.Rectangle((int(rectangular_roi_4.x/data[-1].axes_manager[0].scale), int(rectangular_roi_4.y/data[-1].axes_manager[1].scale)), int(rectangular_roi_4.width/data[-1].axes_manager[0].scale), int(rectangular_roi_4.height/data[-1].axes_manager[1].scale), linewidth=1, edgecolor='y', facecolor='none', transform=ax.transData, zorder=5)
ax.add_patch(rect1)
ax.add_patch(rect2)
# ax.add_patch(rect3)
# ax.add_patch(rect4)
ax.text(rectangular_roi_1.x/data[-1].axes_manager[0].scale+10, rectangular_roi_1.y/data[-1].axes_manager[1].scale, r'Roi_1', fontsize=10, color='w', ha='center', va='center', transform=ax.transData)
ax.text(rectangular_roi_2.x/data[-1].axes_manager[0].scale+10, rectangular_roi_2.y/data[-1].axes_manager[1].scale-5, r'Roi_2', fontsize=10, color='w', ha='center', va='center', transform=ax.transData)
# ax.text(rectangular_roi_3.x/data[-1].axes_manager[0].scale, rectangular_roi_3.y/data[-1].axes_manager[1].scale-15, r'Roi_3', fontsize=10, color='w', ha='center', va='center', transform=ax.transData)
# ax.text(rectangular_roi_4.x/data[-1].axes_manager[0].scale, rectangular_roi_4.y/data[-1].axes_manager[1].scale-50, r'Roi_4', fontsize=10, color='w', ha='center', va='center', transform=ax.transData)

plt.savefig(path.joinpath(path_out, r'TEM_EDS_Selected_Regions_600.tif'), pad_inches=0.05, bbox_inches='tight', dpi=600, transparent=False, pil_kwargs={"compression": "tiff_lzw"})
plt.show()

#### EDSsum Fitting

In [None]:
rois = [roi2D_1, roi2D_2,]
offsetA = [20, 20, 20]
axislims = ([[-0.01, 10.0], [-50, 300]], [[-0.01, 10.0], [-40, 250]], )
tickers = ([[2, 1], [60, 30]], [[2, 1], [50, 25]],)
for i, file in enumerate(rois):
    EDS_Fit(
        data=file,
        elements=['K', 'Mn', 'O', 'S', 'Si', 'Zn', 'C', 'Cu', 'Al', 'P'],
        energy_range=(0.14, 10.0),
        data_plot=True,
        offsetA=offsetA[i],
        axislims=axislims[i],
        tickers=tickers[i],
        save_data=True,
        output_dir=path_out,
        output_name=f"roi2D_{i+1}"
    )

### EDS-Sigma 分析

In [None]:
# 导入相关的包
import sigma
from sigma.utils import normalisation as norm
from sigma.utils import visualisation as visual
from sigma.utils.load import SEMDataset
from sigma.utils.loadtem import TEMDataset
from sigma.src.utils import same_seeds
from sigma.src.dim_reduction import Experiment
from sigma.models.autoencoder import AutoEncoder, VariationalAutoEncoder
from sigma.src.segmentation import PixelSegmenter
from sigma.gui import gui

#### 数据预处理

In [None]:
path_file = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\TEM\ExSitu\αMnO2\Pristine\αMnO2\2023-EMCA\EDS\0007 - B2 HAADF\Data')
file = path.joinpath(path_file, r'0007 - B2 HAADF.emd')

In [None]:
data = TEMDataset(file)
# 设置对应的元素
data.set_xray_lines(['O_Ka', 'Mn_Ka', 'K_Ka', 'Cu_Ka', 'Zn_Ka', 'S_Ka', 'P_Ka', 'Si_Ka', 'Al_Ka'])
# visual.plot_sum_spectrum(data.spectra)
# 数据预处理
# # Rebin both edx and bse dataset
# data.rebin_signal(size=(7,7))
# Remove the first peak until the energy of 0.1 keV
data.remove_fist_peak(end=0.4)
# normalisation to make the spectrum of each pixel summing to 1.
data.peak_intensity_normalisation()
# View the dataset (bse, edx etc.) again to check differences.
visual.plot_sum_spectrum(data.spectra)

In [None]:
path_normalisation = path(path.joinpath(path_out_folder, file.stem))
path_normalisation.mkdir(parents=True, exist_ok=True)
# Normalise the dataset using the (optional) sequential three methods.
data.normalisation([
    # norm.neighbour_averaging,
    norm.zscore,
    norm.softmax,])
# gui.view_intensity_maps(spectra=data.normalised_elemental_data,

#### Trainning Method 1: Autoencoder

In [None]:
path_Autoencoder = path.joinpath(path_normalisation, r'Autoencoder')
path_Autoencoder.mkdir(parents=True, exist_ok=True)

In [None]:
for testtype in ['eval', 'all']:
    batch_size = 64
    # The integer in this function can determine different initialised parameters of model (tuning sudo randomness)
    # This can influence the result of dimensionality reduction and change the latent space.
    same_seeds(1)

    # set the folder path to save the model(the model will automatically save in the specified folder)
    path_result_folder = path_AutoEncoder
    # Set up the experiment, e.g. determining the model structure, dataset for training etc.
    ex = Experiment(descriptor=f'softmax-100-{batch_size}-{testtype}-AutoEncoder',
                    general_results_dir=path_result_folder,
                    model=AutoEncoder,
                    model_args={'hidden_layer_sizes':(512,256,128)},
                    chosen_dataset=data.normalised_elemental_data,
                    save_model_every_epoch=False)
    # Train the model
    # The recommended procedure is to run the 'train_eval' for hyperparameter selection, and 'train_all' for the final analysis.
    ex.run_model(num_epochs=100, batch_size=batch_size, learning_rate=1e-4, weight_decay=0.0, task=f'train_{testtype}', criterion='MSE')
    # 获得在 loss 的数据
    if ex.task in ['train_eval']:
        model_training_loss = ex.train_loss
        model_validation_loss = ex.test_loss
    else:
        model_all_data_loss = ex.train_loss

latent = ex.get_latent()
# # (Optional) Load pre-trained Autoencoder
# model_path = r'C:\Users\chengliu\Desktop\Cheng\2024-05-07_Model-softmax\params\Model-softmax_epoch100'  # model path (the model path should be stored in the folder 'result_folder_path')
# ex.load_trained_model(model_path)
# latent = ex.get_latent()

#### Pixel segmentation Method 1: Gaussian mixture modelling (GMM) clustering

In [None]:
bic_list = PixelSegmenter.bic(latent=latent,
             model='GaussianMixture',
             n_components=14,
             model_args={'random_state': 6, 'init_params':'kmeans'})
# 确定大概分解成几个 clusters
pd.DataFrame(bic_list).plot()

In [None]:
n_components = 5
ps = PixelSegmenter(latent=latent,
                    dataset=data,
                    method="GaussianMixture",
                    method_args={'n_components': n_components, 'random_state': 6, 'init_params':'kmeans'})
                    # can change random_state to different integer i.e. 10 or 0 to adjust the clustering result.
# visualise the latent space
# gui.check_latent_space(ps=ps, ratio_to_be_shown=1.0, show_map=True)
gui.view_clusters_sum_spectra(ps=ps, normalisation=True, spectra_range=(0, 10))

In [None]:
# 注意修改 parameter 参数
BIC_GMM_parameters = {'model':'GaussianMixture', 'n_components':14, 'random_state': 6, 'init_params':'kmeans', 
                      'method': "GaussianMixture", 'n_components': n_components, 'random_state': 6, 'init_params':'kmeans'}
spectra_profiles = []
binary_maps = []
binary_map_indicess = []
for i in range(ps.n_components):
    binary_map, binary_map_indices, spectra_profile = ps.get_binary_map_spectra_profile(cluster_num=i, use_label=True)
    spectra_profiles.append(spectra_profile["intensity"])
    binary_maps.append(binary_map)
    binary_map_indicess.append(binary_map_indices)
binary_maps = np.asarray(binary_maps)
spectra_profiles = np.vstack(spectra_profiles)
binary_map_indicess = np.asarray(binary_map_indicess, dtype=object)
np.savez(path.joinpath(path_AutoEncoder, r'BIC_GMM.npz'), energy=ps.energy_axis, raw_data= data.spectra.sum().data, normalised_elemental_data=data.normalised_elemental_data, peak_list=ps.peak_list,
         peak_dict=list(ps.peak_dict.items()), latent=latent, bic=bic_list, labels=ps.labels, model_means = ps.model.means_, model_covariances = ps.model.covariances_, model_weights = ps.model.weights_,
         model_training_loss = model_training_loss, model_validation_loss = model_validation_loss, model_all_data_loss = ex.train_loss, prob_maps=ps.prob_map, spectra_profiles=spectra_profiles, binary_map=binary_maps,
         BIC_GMM_parameters=BIC_GMM_parameters, binary_map_indices=binary_map_indicess)

#### 4. Unmixing cluster spectrums using Non-negative Matrix Fatorization (NMF)

In [None]:
number_clusters = [2,3,5,7,9]
weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=2,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
gui.show_unmixed_weights_and_compoments(ps=ps, weights=weights, components=components)

In [None]:
number_clusters = [0,1,2,3,4,5,6,7,8]
with pd.ExcelWriter(path.joinpath(path_AutoEncoder, r'BIC_GMM_NMF.xlsx')) as writer:
    n_components = 7 if 7 <= len(number_clusters)+1 else len(number_clusters)+1
    for i in range(2, n_components):
        weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=i,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
        (pd.concat([weights.reset_index(), pd.DataFrame(ps.energy_axis), components], axis=1, ignore_index=True,)
         .to_excel(writer, sheet_name=f"n_components_{i}", header=True))

#### Pixel segmentation Method 2: HDBSCAN clustering

In [None]:
# hyperparameter tuning can be found https://scikit-learn.org/stable/auto_examples/cluster/plot_hdbscan.html#hyperparameter-robustness
min_cluster_size, min_samples = 25, 25
ps = PixelSegmenter(latent=latent,
                    dataset=data,
                    method="HDBSCAN",
                    method_args=dict(min_cluster_size=min_cluster_size, min_samples=min_samples,
                                     max_cluster_size=int(len(latent)/10),
                                     cluster_selection_epsilon=2e-1))
# visualise the latent space
# gui.check_latent_space(ps=ps, ratio_to_be_shown=1.0, show_map=True)
gui.view_clusters_sum_spectra(ps=ps, normalisation=True, spectra_range=(0, 10))

In [None]:
# 注意修改 parameter 参数
HDBSCAN_parameters = {'method': "HDBSCAN", 'min_cluster_size':min_cluster_size,'min_samples':min_samples, 'max_cluster_size':'int(len(latent)/10)', 'cluster_selection_epsilon':2e-1}
spectra_profiles = []
binary_maps = []
binary_map_indicess = []
for i in range(ps.n_components):
    binary_map, binary_map_indices, spectra_profile = ps.get_binary_map_spectra_profile(cluster_num=i, use_label=True)
    spectra_profiles.append(spectra_profile["intensity"])
    binary_maps.append(binary_map)
    binary_map_indicess.append(binary_map_indices)
binary_maps = np.asarray(binary_maps)
spectra_profiles = np.vstack(spectra_profiles)
binary_map_indicess = np.asarray(binary_map_indicess, dtype=object)
np.savez(path.joinpath(path_AutoEncoder, r'HDBSCAN.npz'), energy=ps.energy_axis, raw_data= data.spectra.sum().data, normalised_elemental_data=data.normalised_elemental_data, peak_list=ps.peak_list,
         peak_dict=list(ps.peak_dict.items()), latent=latent, bic=bic_list, labels=ps.labels, HDBSCAN_parameters = HDBSCAN_parameters, prob_maps=ps.prob_map,
         model_training_loss = model_training_loss, model_validation_loss = model_validation_loss, model_all_data_loss = ex.train_loss, spectra_profiles=spectra_profiles, binary_map=binary_maps, binary_map_indices=binary_map_indicess)

#### 4. Unmixing cluster spectrums using Non-negative Matrix Fatorization (NMF)

In [None]:
number_clusters = [2,3,5,7,9]
weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=2,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
gui.show_unmixed_weights_and_compoments(ps=ps, weights=weights, components=components)

In [None]:
number_clusters = [0,1,2,3,4,5,6,7,8]
with pd.ExcelWriter(path.joinpath(path_AutoEncoder, r'HDBSCAN_NMF.xlsx')) as writer:
    n_components = 7 if 7 <= len(number_clusters)+1 else len(number_clusters)+1
    for i in range(2, n_components):
        weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=i,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
        (pd.concat([weights.reset_index(), pd.DataFrame(ps.energy_axis), components], axis=1, ignore_index=True,)
         .to_excel(writer, sheet_name=f"n_components_{i}", header=True))

#### Unmixing cluster spectrums using Non-negative Matrix Fatorization (NMF)

In [None]:
number_clusters = [2,3,5,7,9]
weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=2,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
gui.show_unmixed_weights_and_compoments(ps=ps, weights=weights, components=components)

In [None]:
number_clusters = [0,1,2,3]
with pd.ExcelWriter(path.joinpath(path_UMAP, r'BIC_GMM_NMF.xlsx')) as writer:
    n_components = 7 if 7 <= len(number_clusters)+1 else len(number_clusters)+1
    for i in range(2, n_components):
        weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=i,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
        (pd.concat([weights.reset_index(), pd.DataFrame(ps.energy_axis), components], axis=1, ignore_index=True,)
         .to_excel(writer, sheet_name=f"n_components_{i}", header=True))

#### Trainning Method 2: VariationalAutoEncoder

In [None]:
path_VariationalAutoEncoder = path.joinpath(path_normalisation, r'VariationalAutoEncoder')
path_VariationalAutoEncoder.mkdir(parents=True, exist_ok=True)

In [None]:
for testtype in ['eval', 'all']:
    batch_size = 64
    # The integer in this function can determine different initialised parameters of model (tuning sudo randomness)
    # This can influence the result of dimensionality reduction and change the latent space.
    same_seeds(1)
    
    # set the folder path to save the model(the model will automatically save in the specified folder)
    path_result_folder = path_VariationalAutoEncoder
    # Set up the experiment, e.g. determining the model structure, dataset for training etc.
    ex = Experiment(descriptor=f'softmax-100-{batch_size}-{testtype}-VariationalAutoEncoder',
                    general_results_dir=path_result_folder,               
                    model=VariationalAutoEncoder,
                    model_args={'hidden_layer_sizes':(512,256,128)},
                    chosen_dataset=data.normalised_elemental_data,
                    save_model_every_epoch=False)
    # Train the model
    # The recommended procedure is to run the 'train_eval' for hyperparameter selection, and 'train_all' for the final analysis.
    ex.run_model(num_epochs=100, batch_size=batch_size, learning_rate=1e-4, weight_decay=0.0, task=f'train_{testtype}', criterion='MSE')
    # 获得在 loss 的数据
    if ex.task in ['train_eval']:
        model_training_loss = ex.train_loss
        model_validation_loss = ex.test_loss
    else:
        model_all_data_loss = ex.train_loss
        
latent = ex.get_latent()
# # (Optional) Load pre-trained Autoencoder
# model_path = r'C:\Users\chengliu\Desktop\Cheng\2024-05-07_Model-softmax\params\Model-softmax_epoch100'  # model path (the model path should be stored in the folder 'result_folder_path')
# ex.load_trained_model(model_path)
# latent = ex.get_latent()

#### Pixel segmentation Method 1: Gaussian mixture modelling (GMM) clustering

In [None]:
bic_list = PixelSegmenter.bic(latent=latent,
             model='GaussianMixture',
             n_components=14,
             model_args={'random_state': 6, 'init_params':'kmeans'})
# 确定大概分解成几个 clusters
pd.DataFrame(bic_list).plot()

In [None]:
n_components = 5
ps = PixelSegmenter(latent=latent,
                    dataset=data,
                    method="GaussianMixture",
                    method_args={'n_components':n_components, 'random_state': 6, 'init_params':'kmeans'})
                    # can change random_state to different integer i.e. 10 or 0 to adjust the clustering result.
# visualise the latent space
# gui.check_latent_space(ps=ps, ratio_to_be_shown=1.0, show_map=True)
gui.view_clusters_sum_spectra(ps=ps, normalisation=True, spectra_range=(0, 10))

In [None]:
# 注意修改 parameter 参数
BIC_GMM_parameters = {'model':'GaussianMixture', 'n_components':14, 'random_state': 6, 'init_params':'kmeans', 
                      'method': "GaussianMixture", 'n_components': n_components, 'random_state': 6, 'init_params':'kmeans'}
spectra_profiles = []
binary_maps = []
binary_map_indicess = []
for i in range(ps.n_components):
    binary_map, binary_map_indices, spectra_profile = ps.get_binary_map_spectra_profile(cluster_num=i, use_label=True)
    spectra_profiles.append(spectra_profile["intensity"])
    binary_maps.append(binary_map)
    binary_map_indicess.append(binary_map_indices)
binary_maps = np.asarray(binary_maps)
spectra_profiles = np.vstack(spectra_profiles)
binary_map_indicess = np.asarray(binary_map_indicess, dtype=object)
np.savez(path.joinpath(path_VariationalAutoEncoder, r'BIC_GMM.npz'), energy=ps.energy_axis, raw_data= data.spectra.sum().data, normalised_elemental_data=data.normalised_elemental_data, peak_list=ps.peak_list,
         peak_dict=list(ps.peak_dict.items()), latent=latent, bic=bic_list, labels=ps.labels, model_means = ps.model.means_, model_covariances = ps.model.covariances_, model_weights = ps.model.weights_,
         model_training_loss = model_training_loss, model_validation_loss = model_validation_loss, model_all_data_loss = ex.train_loss, prob_maps=ps.prob_map, spectra_profiles=spectra_profiles, binary_map=binary_maps,
         BIC_GMM_parameters=BIC_GMM_parameters, binary_map_indices=binary_map_indicess)

#### 4. Unmixing cluster spectrums using Non-negative Matrix Fatorization (NMF)

In [None]:
number_clusters = [2,3,5,7,9]
weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=2,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
gui.show_unmixed_weights_and_compoments(ps=ps, weights=weights, components=components)

In [None]:
number_clusters = [2,3,5,7,9]
with pd.ExcelWriter(path.joinpath(path_VariationalAutoEncoder, r'BIC_GMM_NMF.xlsx')) as writer:
    n_components = 7 if 7 <= len(number_clusters)+1 else len(number_clusters)+1
    for i in range(2, n_components):
        weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=i,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
        (pd.concat([weights.reset_index(), pd.DataFrame(ps.energy_axis), components], axis=1, ignore_index=True,)
         .to_excel(writer, sheet_name=f"n_components_{i}", header=True))

#### Pixel segmentation Method 2: HDBSCAN clustering

In [None]:
# hyperparameter tuning can be found https://scikit-learn.org/stable/auto_examples/cluster/plot_hdbscan.html#hyperparameter-robustness
min_cluster_size, min_samples = 25 , 25
ps = PixelSegmenter(latent=latent,
                    dataset=data,
                    method="HDBSCAN",
                    method_args=dict(min_cluster_size=min_cluster_size, min_samples=min_samples,
                                     max_cluster_size=int(len(latent)/10),
                                     cluster_selection_epsilon=2e-1))
# visualise the latent space
# gui.check_latent_space(ps=ps, ratio_to_be_shown=1.0, show_map=True)
gui.view_clusters_sum_spectra(ps=ps, normalisation=True, spectra_range=(0, 10))

In [None]:
# 注意修改 parameter 参数
HDBSCAN_parameters = {'method': "HDBSCAN", 'min_cluster_size':min_cluster_size,'min_samples':min_samples, 'max_cluster_size':'int(len(latent)/10)', 'cluster_selection_epsilon':2e-1}
spectra_profiles = []
binary_maps = []
binary_map_indicess = []
for i in range(ps.n_components):
    binary_map, binary_map_indices, spectra_profile = ps.get_binary_map_spectra_profile(cluster_num=i, use_label=True)
    spectra_profiles.append(spectra_profile["intensity"])
    binary_maps.append(binary_map)
    binary_map_indicess.append(binary_map_indices)
binary_maps = np.asarray(binary_maps)
spectra_profiles = np.vstack(spectra_profiles)
binary_map_indicess = np.asarray(binary_map_indicess, dtype=object)
np.savez(path.joinpath(path_VariationalAutoEncoder, r'HDBSCAN.npz'), energy=ps.energy_axis, raw_data= data.spectra.sum().data, normalised_elemental_data=data.normalised_elemental_data, peak_list=ps.peak_list,
         peak_dict=list(ps.peak_dict.items()), latent=latent, bic=bic_list, labels=ps.labels, HDBSCAN_parameters = HDBSCAN_parameters, prob_maps=ps.prob_map,
         model_training_loss = model_training_loss, model_validation_loss = model_validation_loss, model_all_data_loss = ex.train_loss, spectra_profiles=spectra_profiles, binary_map=binary_maps, binary_map_indices=binary_map_indicess)

#### Unmixing cluster spectrums using Non-negative Matrix Fatorization (NMF)

In [None]:
number_clusters = [2,3,5,7,9]
weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=2,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
gui.show_unmixed_weights_and_compoments(ps=ps, weights=weights, components=components)

In [None]:
number_clusters = [0,1,2,3]
with pd.ExcelWriter(path.joinpath(path_UMAP, r'BIC_GMM_NMF.xlsx')) as writer:
    n_components = 7 if 7 <= len(number_clusters)+1 else len(number_clusters)+1
    for i in range(2, n_components):
        weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=i,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
        (pd.concat([weights.reset_index(), pd.DataFrame(ps.energy_axis), components], axis=1, ignore_index=True,)
         .to_excel(writer, sheet_name=f"n_components_{i}", header=True))

#### 4. Unmixing cluster spectrums using Non-negative Matrix Fatorization (NMF)

In [None]:
number_clusters = [2,3,5,7,9]
weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=2,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
gui.show_unmixed_weights_and_compoments(ps=ps, weights=weights, components=components)

In [None]:
number_clusters = [0,1,2]
with pd.ExcelWriter(path.joinpath(path_VariationalAutoEncoder, r'HDBSCAN_NMF.xlsx')) as writer:
    n_components = 7 if 7 <= len(number_clusters)+1 else len(number_clusters)+1
    for i in range(2, n_components):
        weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=i,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
        (pd.concat([weights.reset_index(), pd.DataFrame(ps.energy_axis), components], axis=1, ignore_index=True,)
         .to_excel(writer, sheet_name=f"n_components_{i}", header=True))

#### Trainning Method 3: UMAP

In [None]:
path_UMAP = path.joinpath(path_normalisation, r'UMAP')
path_UMAP.mkdir(parents=True, exist_ok=True)

In [None]:
from umap import UMAP
n_neighbors, min_dist = 50, 0.1
# Parameter tuning can be found https://umap-learn.readthedocs.io/en/latest/parameters.html
data_umap = data.normalised_elemental_data.reshape(-1,len(data.feature_list))
umap = UMAP(
        n_neighbors=n_neighbors,
        min_dist=0.1,
        n_components=2,
        metric='euclidean'
    )
latent = umap.fit_transform(data_umap)

#### Pixel segmentation Method 1: Gaussian mixture modelling (GMM) clustering

In [None]:
bic_list = PixelSegmenter.bic(latent=latent,
             model='GaussianMixture',
             n_components=14,
             model_args={'random_state': 6, 'init_params':'kmeans'})
# 确定大概分解成几个 clusters
pd.DataFrame(bic_list).plot()

In [None]:
n_components=5
ps = PixelSegmenter(latent=latent,
                    dataset=data,
                    method="GaussianMixture",
                    method_args={'n_components':n_components, 'random_state': 6, 'init_params':'kmeans'})
                    # can change random_state to different integer i.e. 10 or 0 to adjust the clustering result.
# visualise the latent space
# gui.check_latent_space(ps=ps, ratio_to_be_shown=1.0, show_map=True)
gui.view_clusters_sum_spectra(ps=ps, normalisation=True, spectra_range=(0, 10))

In [None]:
# 注意修改 parameter 参数
BIC_GMM_parameters = {'model':'GaussianMixture', 'n_components':14, 'random_state': 6, 'init_params':'kmeans', 
                      'method': "GaussianMixture", 'n_components': n_components, 'random_state': 6, 'init_params':'kmeans',
                      'n_neighbors':n_neighbors, 'min_dist':min_dist, 'n_components':2, 'metric':'euclidean'}
spectra_profiles = []
binary_maps = []
binary_map_indicess = []
for i in range(ps.n_components):
    binary_map, binary_map_indices, spectra_profile = ps.get_binary_map_spectra_profile(cluster_num=i, use_label=True)
    spectra_profiles.append(spectra_profile["intensity"])
    binary_maps.append(binary_map)
    binary_map_indicess.append(binary_map_indices)
binary_maps = np.asarray(binary_maps)
spectra_profiles = np.vstack(spectra_profiles)
binary_map_indicess = np.asarray(binary_map_indicess, dtype=object)
np.savez(path.joinpath(path_UMAP, r'BIC_GMM.npz'), energy=ps.energy_axis, raw_data= data.spectra.sum().data, normalised_elemental_data=data.normalised_elemental_data, peak_list=ps.peak_list,
         peak_dict=list(ps.peak_dict.items()), latent=latent, bic=bic_list, labels=ps.labels, model_means = ps.model.means_, model_covariances = ps.model.covariances_, model_weights = ps.model.weights_,
         model_training_loss = model_training_loss, model_validation_loss = model_validation_loss, model_all_data_loss = ex.train_loss, prob_maps=ps.prob_map, spectra_profiles=spectra_profiles, binary_map=binary_maps,
         BIC_GMM_parameters=BIC_GMM_parameters, binary_map_indices=binary_map_indicess)

#### Pixel segmentation Method 2: HDBSCAN clustering

In [None]:
# hyperparameter tuning can be found https://scikit-learn.org/stable/auto_examples/cluster/plot_hdbscan.html#hyperparameter-robustness
min_cluster_size, min_samples = 300, 50
ps = PixelSegmenter(latent=latent,
                    dataset=data,
                    method="HDBSCAN",
                    method_args=dict(min_cluster_size=min_cluster_size, min_samples=min_samples,
                                     max_cluster_size=int(len(latent)/10),
                                     cluster_selection_epsilon=2e-1))
# visualise the latent space
# gui.check_latent_space(ps=ps, ratio_to_be_shown=1.0, show_map=True)
gui.view_clusters_sum_spectra(ps=ps, normalisation=True, spectra_range=(0, 10))

In [None]:
# 注意修改 parameter 参数
HDBSCAN_parameters = {'n_neighbors':n_neighbors, 'min_dist':min_dist, 'n_components':2, 'metric':'euclidean',
                      'method': "HDBSCAN", 'min_cluster_size':min_cluster_size,'min_samples':min_samples, 'max_cluster_size':'int(len(latent)/10)', 'cluster_selection_epsilon':2e-1}
spectra_profiles = []
binary_maps = []
binary_map_indicess = []
for i in range(ps.n_components):
    binary_map, binary_map_indices, spectra_profile = ps.get_binary_map_spectra_profile(cluster_num=i, use_label=True)
    spectra_profiles.append(spectra_profile["intensity"])
    binary_maps.append(binary_map)
    binary_map_indicess.append(binary_map_indices)
binary_maps = np.asarray(binary_maps)
spectra_profiles = np.vstack(spectra_profiles)
binary_map_indicess = np.asarray(binary_map_indicess, dtype=object)
np.savez(path.joinpath(path_UMAP, r'HDBSCAN.npz'), energy=ps.energy_axis, raw_data= data.spectra.sum().data, normalised_elemental_data=data.normalised_elemental_data, peak_list=ps.peak_list,
         peak_dict=list(ps.peak_dict.items()), latent=latent, bic=bic_list, labels=ps.labels, HDBSCAN_parameters = HDBSCAN_parameters, prob_maps=ps.prob_map,
         model_training_loss = model_training_loss, model_validation_loss = model_validation_loss, model_all_data_loss = ex.train_loss, spectra_profiles=spectra_profiles, binary_map=binary_maps, binary_map_indices=binary_map_indicess)

#### Unmixing cluster spectrums using Non-negative Matrix Fatorization (NMF)

In [None]:
number_clusters = [2,3,5,7,9]
weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=2,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
gui.show_unmixed_weights_and_compoments(ps=ps, weights=weights, components=components)

In [None]:
number_clusters = [0,1,2,3]
with pd.ExcelWriter(path.joinpath(path_UMAP, r'BIC_GMM_NMF.xlsx')) as writer:
    n_components = 7 if 7 <= len(number_clusters)+1 else len(number_clusters)+1
    for i in range(2, n_components):
        weights, components = ps.get_unmixed_spectra_profile(clusters_to_be_calculated=number_clusters,
                                                         n_components=i,
                                                         normalised=True,
                                                         method='NMF',
                                                         method_args={'init':'nndsvd'})
        (pd.concat([weights.reset_index(), pd.DataFrame(ps.energy_axis), components], axis=1, ignore_index=True,)
         .to_excel(writer, sheet_name=f"n_components_{i}", header=True))