## Kbeta -CLAESS

### Version-5

In [None]:
# 导入相关的包
import sys
from IPython.display import display
import spectrochempy as scp
from spectrochempy import Coord, CoordSet, NDDataset, ur
import numpy as np
import pandas as pd
from pathlib import Path as path
import scipy 
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
import matplotlib.gridspec as gridspec
from matplotlib import ticker
from matplotlib.colors import ListedColormap
from matplotlib.colorbar import Colorbar

In [None]:
# 画图的初始设置
plt.style.use(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-python\Figure\liuchzzyy.mplstyle')
# display(plt.style.available)

# 颜色设定
sys.path.append(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Python\Figure')
from colors import tol_cmap, tol_cset
colors = list(tol_cset('vibrant'))
if r'sunset' not in plt.colormaps():
    plt.colormaps.register(tol_cmap('sunset'))
if r'rainbow_PuRd' not in plt.colormaps():
    plt.colormaps.register(tol_cmap('rainbow_PuRd')) # 备用 plasma

# 输出的文件夹
path_out = path(r"C:\Users\chengliu\Desktop\Figure")

#### 读取数据并 denoise, 得到平均化

In [None]:
# 读取数据文件夹
path_data_folder = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\XAS\ExSitu\αMnO2\Kbeta\2023-CLAESS\Data')
path_out_folder = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\XAS\ExSitu\αMnO2\Kbeta\2023-CLAESS\Results\Version-6')

#### 寻找平均谱线以及峰

In [None]:
path_filelist = []
for item in path_data_folder.iterdir():
    if item.is_dir():
        file_dir = path.joinpath(item, r'Mn')
        path_filelist.append(file_dir)
display(path_filelist)

In [None]:
# 基线校准
blc = scp.Baseline(
    log_level="INFO",
    model="polynomial",  # use a polynomial model
    order='linear',  # with linear method
    ranges=([6462., 6463.], [6510., 6511.]),)

In [None]:
%matplotlib inline
data_mean_corrected_normal = pd.DataFrame()
for path_ref in path_filelist:
    
    path_out_ref = path.joinpath(path_out_folder, f'{path_ref.parts[-2]}')
    path_out_ref.mkdir(parents=True, exist_ok=True)
    
    ref_txt_data_merge = []
    for filetxt in path_ref.glob(r'*.txt'):
        ref_txt_data = pd.read_csv(filetxt, comment='#', sep=r'\s+', header=None)
        ref_txt_data_merge.append(ref_txt_data)     
    ref_txt_data_merge = pd.concat(ref_txt_data_merge, axis=1, ignore_index=True,)
    ref_txt_data_merge.to_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_Raw_All.csv'), index=None, header=True)
    
    ref_data_scp = NDDataset(data=ref_txt_data_merge.iloc[:, 1::2].T.values,
                             author="Cheng Liu",
                             description="Kbeta of Mn, ALBA",
                             history="creation",
                             )
    ref_data_scp.x = Coord(ref_txt_data_merge.iloc[:, 0].values, name='Energy', units=ur.eV,)
    ref_data_scp.y = Coord(np.arange((ref_txt_data_merge.shape[1]//2)), name='numbers',)
    
    # PCA 重构数据
    recon_scp_data = scp.denoise(ref_data_scp, ratio=99.8,)
    recon_scp_data.plot(clear=True)
    
    # 基线校准
    _ = blc.fit(recon_scp_data)   # fit the baseline
    scp_baseline = blc.baseline
    scp_corrected = blc.corrected  # get the corrected dataset
    # _ = scp_corrected.plot(clear=True)
    
    # 平均化
    recon_ref_data_mean = scp_corrected.mean(dim='y', keepdim=True)
    # recon_ref_data_mean.plot(clear=True)
    
    # 归一化
    inttrapz_area = scipy.integrate.trapezoid(y=recon_ref_data_mean.data, x=recon_ref_data_mean.x.data)
    recon_ref_data_mean_corrected_normal = np.divide(recon_ref_data_mean, inttrapz_area)
    recon_ref_data_mean_corrected_normal.plot(clear=True)
    recon_ref_data_mean_corrected_normal.write_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_mean_normal.csv'),)
    data_mean_corrected_normal = pd.concat([data_mean_corrected_normal, pd.DataFrame(recon_ref_data_mean_corrected_normal.data.T)], ignore_index=True, axis=1) 
        
pd.concat([pd.DataFrame(recon_ref_data_mean.x.data), data_mean_corrected_normal], ignore_index=True, axis=1).to_csv(path.joinpath(path_out_folder, r'all_mean_normal.csv'), header=True, index=None)

In [None]:
script = """

#-----------------------------------------------------------
# syntax for parameters definition :
# name : value, low_bound,  high_bound
#  * for fixed parameters
#  $ for variable parameters
#  > for reference to a parameter in the COMMON block
#    (> is forbidden in the COMMON block)
# common block parameters should not have a _ in their names
#-----------------------------------------------------------
#
COMMON:
# common parameters ex.
# $ gwidth: 1.0, 0.0, none
# $ gratio: 0.5, 0.0, 1.0
# $ gasym: 0.3, 0.0, 1.0

MODEL: LINE_1
shape: asymmetricvoigtmodel
    $ ampl:  0.12, 0.10, 0.16
    $ pos:   6492.6, 6491.3, 9494.6
    $ ratio: 0.5, 0.0, 1.0
    $ asym: 0.5, 0.0, 1.0
    $ width: 0.5, 0.0, 10

MODEL: LINE_2
shape: asymmetricvoigtmodel
    $ ampl:  0.03, 0.01, 0.05
    $ pos:   6477.0, 6470.0, 6480.0
    $ ratio: 0.5, 0.0, 1.0
    $ asym: 0.5, 0.0, 1.0
    $ width: 0.5, 0.0, 10

"""

# script = """

# #-----------------------------------------------------------
# # syntax for parameters definition :
# # name : value, low_bound,  high_bound
# #  * for fixed parameters
# #  $ for variable parameters
# #  > for reference to a parameter in the COMMON block
# #    (> is forbidden in the COMMON block)
# # common block parameters should not have a _ in their names
# #-----------------------------------------------------------
# #
# COMMON:
# # common parameters ex.

# MODEL: linez
# shape: asymmetricvoigtmodel
# $ ampl: 0.12, 0.10, 0.16
# $ width: 0.5, 0.0, 10
# $ pos: 6492.6, 6492.4,  9493.5
# $ ratio: 0.5, 0.0, 1.0
# $ asym: 0.3, 0.0, 1.0

# """

In [None]:
# 获取 Peaks 的 STD 和 面积的 STD
path_filelist = []
for item in path_out_folder.iterdir():
    if item.is_dir():
        path_filelist.append(item)
# display(path_filelist)

# MnO2 作为标样
data = pd.read_csv(path.joinpath(path_filelist[4], f'{path_filelist[4].parts[-1]}_mean_normal.csv'), comment='#', sep=r',', header=0, index_col=None).to_numpy()
ref = NDDataset(data=data[:, 1], title=r'Absorption', name=f'{path_filelist[4].parts[-1]}',)
ref.x = Coord(data[:, 0], title='Energy', units=ur.eV,)
# ref.plot()

std_out = pd.DataFrame()
for file in path_filelist:
    data = pd.read_csv(path.joinpath(file, f'{file.parts[-1]}_Raw_All.csv'), sep=',', header=0, index_col=None, comment="#")
    scp_data = NDDataset(data=data.iloc[:, 1::2].values.T,
                         author="Cheng Liu",
                         description="Kbeta of Mn, ALBA",
                         history="creation",
                         )
    scp_data.x = Coord(data.iloc[:, 0].values, name='Energy', units=ur.eV,)
    scp_data.y = Coord(np.arange((data.iloc[:, 1::2].shape[1])), name='numbers', )
    # scp_data.plot()

    # PCA 重构数据
    recon_scp_data = scp.denoise(scp_data, ratio=99.8,)
    # print(recon_scp_data.shape)
    # _ = recon_scp_data.plot(clear=True)
    
    # 基线校准
    _ = blc.fit(recon_scp_data)   # fit the baseline
    scp_baseline = blc.baseline
    scp_corrected = blc.corrected  # get the corrected dataset
    # _ = scp_corrected.plot(clear=True)
    # _ = scp_baseline.plot(clear=True)
    
    # 归一化
    inttrapz_area = scipy.integrate.trapezoid(y=scp_corrected.data, x=scp_corrected.x.data)
    for i in range(scp_corrected.shape[0]):
        scp_corrected[i, :] = np.divide(scp_corrected[i, :], inttrapz_area[i])
    
    # 寻峰，以及 std 分布
    peakslist = [s.find_peaks(distance=10)[0].x.data for s in scp_corrected[:, 6490.0:6500.0]]
    peakslist = pd.DataFrame(peakslist)

#     # 寻峰，以及 std 分布，fitting 的办法
#     peakslist = []
#     for i in range(scp_corrected.shape[0]):
#         f1 = scp.Optimize(log_level="WARNING",)
#         f1.script = script
#         f1.max_iter = 2000
#         f1.fit(scp_corrected[i, :])

#         # # Show the result
#         # scp_corrected[i, :].plot()
#         # ax = (f1.components[:]).plot(clear=False)
#         # ax.autoscale(enable=True, axis="y")

#         # # plotmerit
#         # som = f1.inverse_transform()
#         # f1.plotmerit(offset=0, kind="scatter")
#         recon_scp_corrected = f1.inverse_transform()
#         display(recon_scp_corrected)
#         peaks = recon_scp_corrected[6490.0:6500.0].find_peaks(distance=10)[0].x.data
#         peakslist.append(peaks)
#     _ = pd.DataFrame(peakslist).plot()
    # peakstd = pd.DataFrame(peakslist).std(ddof=0)

    peakstd = peakslist.std(ddof=0)
    peakmean = peakslist.mean()
    # _ = peakslist.plot()

    # IDA 面积，以及 std 分布
    diff = scp_corrected - ref
    # diff.plot(clear=True)
    arealist = scipy.integrate.trapezoid(y=np.abs(diff.data), x=diff.x.data)
    arealist = pd.DataFrame(arealist)
    arealist_std = arealist.std(ddof=0)
    arealist_mean = arealist.mean()
    # _ = inttrapz_area.plot()
    errorbar = pd.concat([pd.Series(f'{file.parts[-1]}'), peakmean, peakstd, arealist_mean, arealist_std], axis=1, ignore_index=True,)    
    std_out = pd.concat([std_out, errorbar], axis=0, ignore_index=True,)

    # 保存数据
    (pd.concat([pd.Series(scp_corrected.x.data), pd.DataFrame(scp_corrected.data).T], axis=1, ignore_index=True,).
     to_csv(path.joinpath(file, f'{file.parts[-1]}_spectrum_list.csv'), index=None, header=True,))
    (pd.concat([peakslist, arealist], axis=1, ignore_index=True,)
     .to_csv(path.joinpath(file, f'{file.parts[-1]}_peak_area_list.csv'), index=None, header=[r'peaklist', r'arealist']))
    (pd.concat([pd.Series(diff.x.data), pd.DataFrame(diff.data).T], axis=1, ignore_index=True,).
     to_csv(path.joinpath(file, f'{file.parts[-1]}_diff_list.csv'), index=None, header=True,))
std_out.to_csv(path.joinpath(path_out_folder, r'peak_area_std.csv'), index=False, header=[r'Samples', r'peak_mean', r'peak_std', r'area_mean', r'area_std'])
print(r'Done')

##### Peak + std, area + std

In [None]:
# 读取数据文件夹以及文件
data_merge = pd.read_csv(path.joinpath(path_out_folder, r'peak_area_std1.csv'), comment='#', sep=r',', header=0, index_col=None)
spectrum = pd.read_csv(path.joinpath(path_out_folder, r'all_mean_normal.csv'), comment='#', sep=r',', header=0, index_col=None)
display(data_merge.head(2))

# 画图
fig = plt.figure(figsize=(10.5, 3.3))
gs = gridspec.GridSpec(1, 3, width_ratios=[1, 1, 1], height_ratios=[1],
                       wspace=None, hspace=None, figure=fig)
labels = [r'R1_MnOOH', r'R2_ZnMn2O4', r'R3_MnO', r'R4_Mn2O3', r'R5_MnO2', r'S1_pristine', r'S2_1stDisch',
          r'S3_1stHCh_1p53V', r'S4_1stHCh_1p63V', r'S5_1stCh', r'S6_2ndDisch_1p3V', r'S7_2ndDisch']

# 图 A: energy peak + std
energy = data_merge.iloc[:, 1:3].copy().dropna()
subfig_a = fig.add_subfigure(gs[0, 0], zorder=0)
ax = subfig_a.add_axes((0, 0, 0.8, 0.8), zorder=0)

ax.plot(energy.iloc[:, 0], lw=1, ls='-', marker='o', zorder=5, color=colors[0])
ax.fill_between(x=np.arange(energy.shape[0]), y1=(energy.iloc[:, 0] + energy.iloc[:, 1]),
                y2=(energy.iloc[:, 0] - energy.iloc[:, 1]), color=colors[2], alpha=0.3)

# ax.set_ylim(6492.4, 6493.3)  # ax.set_ylim(6492.1, 6493.7)
ax.set_xticks(np.arange(energy.shape[0]), labels=labels)
plt.setp(ax.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
ax.xaxis.set_minor_locator(ticker.NullLocator())
ax.set_ylabel(r'Energy (eV)', fontsize=11)
ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.2))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.1))

ax.tick_params(axis='x', labelsize=9)
ax.tick_params(axis='y', labelsize=9)

ax.vlines(x=4, colors='k', ymin=6492.0, ymax=6493.9, linestyles='dashed', alpha=0.8)
# ax.text(0.02, 0.98, r'1$^{st}$ moment $\mathit{K \beta _{1,3}}$', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(0.02, 0.1, r'References', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(0.58, 0.95, r'Charged States', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(-0.3, 1.0, r'A', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

# 图 B: area + std
area = data_merge.iloc[:, 4:6].copy().dropna()
subfig_b = fig.add_subfigure(gs[0, 1], zorder=0)
ax = subfig_b.add_axes((0.01, 0, 0.8, 0.8), zorder=0)

ax.plot((area.iloc[:, 0]-area.iloc[4, 0])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3, linewidth=1, linestyle='-', marker='o', zorder=5, color=colors[0])
y1 = (area.iloc[:, 0]-area.iloc[4, 0] + area.iloc[:, 1])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
y2 = (area.iloc[:, 0]-area.iloc[4, 0] - area.iloc[:, 1])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
# display(y1, y2)
ax.fill_between(x=np.arange(area.shape[0]), y1=y1, y2=y2, color=colors[2], alpha=0.3)


ax.set_xticks(np.arange(len(labels)), labels=labels)
plt.setp(ax.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
ax.xaxis.set_minor_locator(ticker.NullLocator())
ax.set_ylabel(r'local magnetic moment ($\mathrm{\mu _B}$)', fontsize=11)  # Total Magnetization
# ax.set_ylim(2.7, 5.1)
ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.4))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.2))

ax.vlines(x=4, colors='k', ymin=1.7, ymax=5.3, linestyles='dashed')
ax.text(0.02, 0.1, r'References', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(0.58, 0.95, r'Charged States', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(-0.22, 1.0, r'B', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

# 图 C: Spectrum
subfig_c = fig.add_subfigure(gs[0, 2], zorder=0)
ax = subfig_c.add_axes((0.01, 0, 0.8, 0.8), zorder=0)
colormap = ListedColormap(mpl.colormaps['sunset'](np.linspace(0, 1.0, spectrum.shape[1]-1)), name=r'colormap')

# 多线叠加
for i in range(spectrum.shape[1]-1):
    ax.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 1+i], lw=1, label=labels[i], color=colormap.colors[i], zorder=5, alpha=1-0.01*i)

ax.set_xlabel(r'Energy (eV)', fontsize=11, labelpad=3)
ax.set_xlim(6460, 6510)
ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10))
ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(5))

ax.set_ylabel(ylabel=r'Intensity (a.u.)', fontsize=11, labelpad=3)
ax.set_ylim(0, 0.17)
ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(base=0.04))
ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(base=0.02))

ax.tick_params(axis='x', labelsize=9) 
ax.tick_params(axis='y', labelsize=9)

ax.legend(loc='upper left', bbox_to_anchor=(0.01, 1.0), ncols=1, frameon=False,
          labelcolor='linecolor', fontsize=8, columnspacing=0.5)
ax.text(-0.22, 1.0, r'C', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

axins = ax.inset_axes([0.78, 0.32, 0.2, 0.65])
for i in range(spectrum.shape[1]-1):
    axins.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 1+i], lw=1, label=labels[i], color=colormap.colors[i], zorder=0, alpha=1-0.01*i)
axins.set_xlim(6491, 6495)
axins.spines.right.set_visible(False)
axins.spines.bottom.set_visible(False)
axins.spines.top.set_visible(False)
axins.spines.left.set_visible(False)
axins.set(xticks=[], xlabel=None, yticks=[], ylabel=None)

plt.savefig(path.joinpath(path_out_folder, r'aa.tif'), pad_inches=0.05, bbox_inches='tight', dpi=600)
# plt.close()
print(r'Done')

In [None]:
# 读取数据文件夹以及文件
filetype = r'mean'
path_file = path.joinpath(path_out_folder, filetype)
path_file.mkdir(parents=True, exist_ok=True,)
path_filelist = list(path_file.glob(f'all_*_{filetype}.csv'))
path_filelist = path_filelist[0:]
display(path_filelist)

data_merge = pd.DataFrame()
for file in path_filelist:
    data = pd.read_csv(file, comment='#', sep=r',', header=0, index_col=None)
    data_merge = pd.concat([data_merge, data], axis=1, ignore_index=True,)
# display(data_merge.head(13))

# 画图
fig = plt.figure(figsize=(3.3, 2.5))
gs = gridspec.GridSpec(1, 1, width_ratios=None, height_ratios=None,
                       wspace=None, hspace=None, figure=fig)
labels = [r'R1_MnOOH', r'R2_ZnMn2O4', r'$\mathrm{Ref.MnO}$', r'$\mathrm{Ref.Mn_2O_3}$', r'$\mathrm{Ref.MnO_2}$', r'Pristine', r'Discharge',
          r'S3_1stHCh_1p53V', r'S4_1stHCh_1p63V', r'S5_1stCh', r'S6_2ndDisch_1p3V', r'S7_2ndDisch']


# 图 C: Spectrum
spectrum = data_merge.iloc[:, 4:].copy().dropna()
subfig_c = fig.add_subfigure(gs[0, 0], zorder=0)
ax = subfig_c.add_axes((0, 0, 1.0, 1.0), zorder=0)
ax.set_box_aspect(0.8)

colormap = ListedColormap(mpl.colormaps['sunset'](np.linspace(0.0, 0.5, spectrum.shape[1]-1)), name=r'colormap')

# 多线叠加
for i in range(spectrum.shape[1]-8):
    ax.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 3+i], lw=1, label=labels[i+2], color=colors[i], zorder=5, alpha=1-0.01*i)

ax.set_xlabel(r'Energy (eV)', fontsize=11, labelpad=3)
ax.set_xlim(6460, 6510)
ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10))
ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(5))

ax.set_ylabel(ylabel=r'Intensity (a.u.)', fontsize=11, labelpad=3)
ax.set_ylim(0, 0.17)
ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(base=0.04))
ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(base=0.02))

ax.tick_params(axis='x', labelsize=9) 
ax.tick_params(axis='y', labelsize=9)

ax.legend(loc='upper left', bbox_to_anchor=(0.01, 1.0), ncols=1, frameon=False,
          labelcolor='linecolor', fontsize=8, columnspacing=0.5)
# ax.text(-0.22, 1.0, r'C', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

axins = ax.inset_axes([0.78, 0.32, 0.2, 0.65])
for i in range(spectrum.shape[1]-8):
    axins.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 3+i], lw=1, label=labels[i+2], color=colors[i], zorder=0, alpha=1-0.01*i)
axins.set_xlim(6491, 6495)
axins.spines.right.set_visible(False)
axins.spines.bottom.set_visible(False)
axins.spines.top.set_visible(False)
axins.spines.left.set_visible(False)
axins.set(xticks=[], xlabel=None, yticks=[], ylabel=None)

plt.savefig(path.joinpath(path_out, f'all_{filetype}abaa.tif'), pad_inches=0.05, bbox_inches='tight', dpi=600)
# plt.close()
print(r'Done')

#### 单张图

In [None]:
labels = [r'Pristine', r'$1^{st}$ Discharge', r'$1^{st}$ Charge', r'$2^{nd}$ Discharge']

fig = plt.figure(figsize=(3.3, 2.5))
ax = fig.add_subplot()
area = data_merge.iloc[:, 2:4].copy().dropna()

y= (area.iloc[[5,6,9,11], 0]-area.iloc[4, 0])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
y1 = (area.iloc[[5,6,9,11], 0]-area.iloc[4, 0] + area.iloc[:, 1])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
y1 = y1.dropna()
yerror = y-y1

ax.errorbar(x=np.arange(y.shape[0]), y=y.dropna().values, yerr=yerror, linewidth=1, linestyle='-', marker='o', zorder=5, color='k', capsize=6)

ax.set_xticks(np.arange(len(labels)), labels=labels)
plt.setp(ax.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
ax.xaxis.set_minor_locator(ticker.NullLocator())
ax.set_ylabel(r'local magnetic moment ($\mathrm{\mu _B}$)', fontsize=11)  # Total Magnetization
ax.set_ylim(2.4, 3.6)
ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.3))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.15))

ax.text(0.02, 0.07, r'Charged States', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)

ax2 = ax.twinx()

d = (area.iloc[[5,6,9,11], 0]-area.iloc[2, 0])*(2/(area.iloc[4, 0] - area.iloc[2, 0]))+2
d1 =(area.iloc[[5,6,9,11], 0]-area.iloc[2, 0] + area.iloc[:, 1])*(2/(area.iloc[4, 0] - area.iloc[2, 0]))+2
derror = (d1-d).dropna()
ax2.errorbar(x=np.arange(derror.shape[0]), y=d.dropna().values, yerr=derror, linewidth=1, linestyle='-', marker='o', zorder=5, color='k', capsize=6)

ax2.tick_params(axis='x', labelsize=9) 
ax2.tick_params(axis='y', labelsize=9, labelcolor='k')

ax2.set_ylim(4.6, 3.4)
ax2.yaxis.set_major_locator(ticker.MultipleLocator(base=0.2))
ax2.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.1))
ax2.set_ylabel(r'Average Mn Oxidation State', fontsize=11, color='k')  # Total Magnetization

# plt.tight_layout()
plt.savefig(path.joinpath(path_out, r'Kbeta_3.tif'), transparent=False,
            pad_inches=0.05, bbox_inches='tight', dpi=300)
plt.close()
print(r'Done')

### Version-4

In [None]:
# 导入相关的包
import sys
from IPython.display import display
import spectrochempy as scp
from spectrochempy import Coord, CoordSet, NDDataset, ur
import numpy as np
import pandas as pd
from pathlib import Path as path

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
import matplotlib.gridspec as gridspec
from matplotlib import ticker
from matplotlib.colors import ListedColormap
from matplotlib.colorbar import Colorbar

In [None]:
# 画图的初始设置
plt.style.use(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-python\Figure\liuchzzyy.mplstyle')
# display(plt.style.available)

# 颜色设定
sys.path.append(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Python\Figure')
from colors import tol_cmap, tol_cset
colors = list(tol_cset('vibrant'))
if r'sunset' not in plt.colormaps():
    plt.colormaps.register(tol_cmap('sunset'))
if r'rainbow_PuRd' not in plt.colormaps():
    plt.colormaps.register(tol_cmap('rainbow_PuRd')) # 备用 plasma

# 输出的文件夹
path_out = path(r"C:\Users\chengliu\Desktop\Figure")

#### 读取数据并 denoise, 得到平均化，NMF 和 MCR 后的标样

In [None]:
# 读取数据文件夹
path_data_folder = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\XAS\ExSitu\αMnO2\Kbeta\2023-CLAESS\Data')
path_out_folder = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\XAS\ExSitu\αMnO2\Kbeta\2023-CLAESS\Results\Version-3')

#### 标样的处理

In [None]:
path_ref_filelist = []
for item in path_data_folder.iterdir():
    if (item.is_dir()) and (item.parts[-1] in ['R3_MnO', 'R5_MnO2']):
        file_dir = path.joinpath(item, r'Mn')
        path_ref_filelist.append(file_dir)
display(path_ref_filelist)

In [None]:
%matplotlib inline
path_out_ref = path.joinpath(path_out_folder, r'references')
path_out_ref.mkdir(parents=True, exist_ok=True)

# 基线校准
blc = scp.Baseline(
    log_level="INFO",
    model="polynomial",  # use a polynomial model
    order='linear',  # with linear method
    ranges=([6462., 6463.], [6510., 6511.]),)
    
for path_ref in path_ref_filelist:
    
    ref_txt_data_merge = []
    for filetxt in path_ref.glob(r'*.txt'):
        ref_txt_data = pd.read_csv(filetxt, comment='#', sep=r'\s+', header=None)
        ref_txt_data_merge.append(ref_txt_data)     
    ref_txt_data_merge = pd.concat(ref_txt_data_merge, axis=1, ignore_index=True,)
    ref_txt_data_merge.to_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_Raw_All.csv'), index=None, header=True)
    
    # 画图
    fig = plt.figure(figsize=(3.3, 2.5))
    gs = gridspec.GridSpec(1, 1, width_ratios=None, height_ratios=None, wspace=0, hspace=0, figure=fig)
    subfig = fig.add_subfigure(gs[0, 0], zorder=0)
    ax = subfig.add_axes((0, 0, 1, 1),zorder=0)
    colors_map = ListedColormap(mpl.colormaps['Spectral'](np.linspace(0.0, 1.0, ref_txt_data_merge.shape[1]//2)), name='colors_map')
    for i in range(ref_txt_data_merge.shape[1]//2):
        ax.plot(ref_txt_data_merge.iloc[:, 2*i], ref_txt_data_merge.iloc[:, 2*i+1], c=colors_map.colors[i])
    ax.set_xlim(6462, 6512)
    ax.set_xlabel(r'Energy (eV)', fontsize=11)
    ax.xaxis.set_major_locator(ticker.MultipleLocator(base=10, offset=-8))
    ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=5, offset=-8))
    ax.set_ylim(0, 16)
    ax.set_ylabel(r"Absorption (arb.u.)", fontsize=11)
    ax.yaxis.set_major_locator(ticker.MultipleLocator(base=4))
    ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=2))
    
    ax.tick_params(axis='both', which='both', labelsize=9, direction='out', bottom=True, top=False, left=True, right=False,
                  labelbottom=True, labeltop=False, labelleft=True, labelright=False,) 

    plt.savefig(path.joinpath(path_out_ref, f'1_300_{path_ref.parts[-2]}_all.tif'), transparent=False, pad_inches=0.05, bbox_inches='tight', dpi=300)
    plt.savefig(path.joinpath(path_out_ref, f'1_600_{path_ref.parts[-2]}_all.tif'), transparent=False, pad_inches=0.05, bbox_inches='tight', dpi=600)
    plt.close()
    
    ref_data_scp = NDDataset(data=ref_txt_data_merge.iloc[:, 1::2].T.values,
                             author="Cheng Liu",
                             description="Kbeta of Mn, ALBA",
                             history="creation",
                             )
    ref_data_scp.x = Coord(ref_txt_data_merge.iloc[:, 0].values, name='Energy', units=ur.eV,)
    ref_data_scp.y = Coord(np.arange((ref_txt_data_merge.shape[1]//2)), name='numbers',)
    
    # PCA 重构数据
    recon_ref_data_scp = scp.denoise(ref_data_scp, ratio=99.8,)
    # recon_ref_data_scp.plot(clear=True)
    
    # 平均化
    recon_ref_data_mean = recon_ref_data_scp.mean(dim='y', keepdim=True)
    recon_ref_data_mean = scp.stack([recon_ref_data_mean])
    # recon_ref_data_mean.plot(clear=True)
    
    # 去背景
    _ = blc.fit(recon_ref_data_mean)   # fit the baseline
    recon_ref_data_mean_baseline = blc.baseline
    recon_ref_data_mean_corrected = blc.corrected  # get the corrected dataset
    # recon_ref_data_mean_corrected.plot(clear=True)
    recon_ref_data_mean_corrected.write_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_mean.csv'),)
    
#     # 归一化
#     inttrapz_area = mean_corrected.trapezoid(dim="x")
#     recon_ref_data_mean_corrected_normal = np.divide(recon_ref_data_mean_corrected, inttrapz_area)
#     recon_ref_data_mean_corrected_normal.plot(clear=True)
#     recon_ref_data_mean_corrected_normal.write_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_mean_normal.csv'),)

In [None]:
recon_ref_data_mean

In [None]:
# 读取标样
path_ref_filelist = []
for item in path_data_folder.iterdir():
    if (item.is_dir()) and (item.parts[-1] in ['R3_MnO', 'R5_MnO2']):
        file_dir = path.joinpath(item, r'Mn')
        path_ref_filelist.append(file_dir)
path_ref_filelist = path_ref_filelist[0:]
# display(path_ref_filelist)

for path_ref in path_ref_filelist:
    ref_txt_data_merge = pd.DataFrame()
    for filetxt in path_ref.glob(r'*.txt'):
        ref_txt_data = pd.read_csv(filetxt, comment='#', sep=r'\s+', header=None)
        ref_txt_data_merge = pd.concat([ref_txt_data_merge, ref_txt_data], axis=1, ignore_index=True,)
    ref_txt_data_merge = ref_txt_data_merge.to_numpy()
    ref_data_scp = NDDataset(data=ref_txt_data_merge[:, 1::2].T,
                             author="Cheng Liu",
                             description="Kbeta of Mn, ALBA",
                             history="creation",
                             )
    ref_data_scp.x = Coord(ref_txt_data_merge[:, 0], name='Energy', units=ur.eV,)
    ref_data_scp.y = Coord(np.arange((ref_txt_data_merge[:, 1::2].shape[1])), name='numbers', )
    # ref_data_scp.plot()

    # PCA 重构数据
    recon_ref_data_scp = scp.denoise(ref_data_scp, ratio=99.8,)
    # recon_ref_data_scp.plot()

    # 基线校准
    blc = scp.Baseline(
            log_level="INFO",
            model="polynomial",  # use a polynomial model
            order='linear',  # with linear method
            ranges=([6462., 6463.], [6510., 6511.]),
            )

    _ = blc.fit(recon_ref_data_scp)   # fit the baseline
    scp_baseline = blc.baseline
    scp_corrected = blc.corrected  # get the corrected dataset
    # scp_corrected.plot()

    # Evolving Factor Analysis (EFA) 计算
    efa = scp.EFA()
    efa.fit(scp_corrected)
    efa.n_components = 2
    C0 = efa.transform()
    # _ = C0.T.plot()
    St = efa.get_components()
    # _ = St.plot(title="components", legend=St.k.labels)

    # # NMF
    # scp_corrected -= scp_corrected.min()
    # model = scp.NMF(n_components=2, log_level="INFO")
    # _ = model.fit(scp_corrected)
    # C0 = model.transform()
    # _ = C0.T.plot()
    # St = model.components
    # _ = St.plot(title="components", legend=St.k.labels)

    # MCR
    mcr = scp.MCRALS(max_iter=100, normSpec="euclid", tol=0.0001, maxdiv=200,
                     nonnegConc='all', nonnegSpec='all',
                     )
    mcr.fit(scp_corrected, St)
    # _ = mcr.C.T.plot()
    # _ = mcr.St.plot()

    # MCR 归一化
    mcr_st = mcr.St
    inttrapz_area = mcr_st.trapezoid(dim="x")
    for i in range(mcr_st.shape[0]):
        mcr_st[i, :] = np.divide(mcr_st[i, :], inttrapz_area[i])

    # EFA 归一化
    inttrapz_area = St.trapezoid(dim="x")
    for i in range(St.shape[0]):
        St[i, :] = np.divide(St[i, :], inttrapz_area[i])

    # MEAN
    mean = scp.mean(recon_ref_data_scp.T, dim='y', keepdims=True,).T
    _ = blc.fit(mean)   # fit the baseline
    mean_baseline = blc.baseline
    mean_corrected = blc.corrected  # get the corrected dataset
    # mean_corrected.plot()

    # MEAN 归一化
    inttrapz_area = mean_corrected.trapezoid(dim="x")
    for i in range(mean_corrected.shape[0]):
        mean_corrected[i, :] = np.divide(mean_corrected[i, :], inttrapz_area[i])

    # # 画图
    # ax = St[1].plot(c='r')
    # mean_corrected.plot(ax=ax, clear=False, c='b')
    # mcr_st[0].plot(ax=ax, clear=False, c='k')

    # 保存数据
    path_out_ref = path.joinpath(path_out_folder, r'references')
    path_out_ref.mkdir(parents=True, exist_ok=True)
    St[1].write_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_ref_efa.csv'),)
    mean_corrected.write_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_ref_mean.csv'),)
    mcr_st[0].write_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_ref_mcr.csv'),)
print(r'Done')

In [None]:
script = """

#-----------------------------------------------------------
# syntax for parameters definition :
# name : value, low_bound,  high_bound
#  * for fixed parameters
#  $ for variable parameters
#  > for reference to a parameter in the COMMON block
#    (> is forbidden in the COMMON block)
# common block parameters should not have a _ in their names
#-----------------------------------------------------------
#
COMMON:
# common parameters ex.
# $ gwidth: 1.0, 0.0, none
# $ gratio: 0.5, 0.0, 1.0
# $ gasym: 0.3, 0.0, 1.0

MODEL: LINE_1
shape: asymmetricvoigtmodel
    $ ampl:  0.12, 0.10, 0.16
    $ pos:   6492.6, 6491.3, 9494.6
    $ ratio: 0.5, 0.0, 1.0
    $ asym: 0.5, 0.0, 1.0
    $ width: 0.5, 0.0, 10

MODEL: LINE_2
shape: asymmetricvoigtmodel
    $ ampl:  0.03, 0.01, 0.05
    $ pos:   6477.0, 6470.0, 6480.0
    $ ratio: 0.5, 0.0, 1.0
    $ asym: 0.5, 0.0, 1.0
    $ width: 0.5, 0.0, 10

"""

In [None]:
# script = """

# #-----------------------------------------------------------
# # syntax for parameters definition :
# # name : value, low_bound,  high_bound
# #  * for fixed parameters
# #  $ for variable parameters
# #  > for reference to a parameter in the COMMON block
# #    (> is forbidden in the COMMON block)
# # common block parameters should not have a _ in their names
# #-----------------------------------------------------------
# #
# COMMON:
# # common parameters ex.

# MODEL: linez
# shape: asymmetricvoigtmodel
# $ ampl: 0.12, 0.10, 0.16
# $ width: 0.5, 0.0, 10
# $ pos: 6492.6, 6492.4,  9493.5
# $ ratio: 0.5, 0.0, 1.0
# $ asym: 0.3, 0.0, 1.0

# """

In [None]:
# 读取归一化后的 ref 谱线
filetype = r'efa'
path_filetype_folder = path.joinpath(path_out_folder, filetype)
path_filetype_folder.mkdir(parents=True, exist_ok=True)

filelist_ref = list(path_out_ref.glob(f'R3_*_ref_{filetype}.csv'))
# print(filelist_ref)
data = pd.read_csv(filelist_ref[0], comment='#', sep=r',', header=0).to_numpy()
ref = NDDataset(data=data[:, 1], title=r'Absorption', name=f'{filelist_ref[0].stem}',)
ref.x = Coord(data[:, 0], title='Energy', units=ur.eV,)
# ref.plot()

# 读取数据文件夹
filelist1 = []
for item in path_data_folder.iterdir():
    if item.is_dir():
        file_dir = path.joinpath(item, r'Mn')
        filelist1.append(file_dir)
filelist1 = filelist1[0:]
# print(filelist1)

std_out = pd.DataFrame()
for file in filelist1:
    data = pd.DataFrame()
    for filetxt in file.glob('*.txt'):
        data_txt = pd.read_csv(filetxt, comment='#', sep=r'\s+', header=None)
        data = pd.concat([data, data_txt], axis=1, ignore_index=True,)
    data = data.to_numpy()
    scp_data = NDDataset(data=data[:, 1::2].T,
                         author="Cheng Liu",
                         description="Kbeta of Mn, ALBA",
                         history="creation",
                         )
    scp_data.x = Coord(data[:, 0], name='Energy', units=ur.eV,)
    scp_data.y = Coord(np.arange((data[:, 1::2].shape[1])), name='numbers', )
    # scp_data.plot()

    # PCA 重构数据
    recon_scp_data = scp.denoise(scp_data, ratio=99.8,)
    # print(recon_scp_data.shape)
    # recon_scp_data.plot()

    # 基线校准
    blc = scp.Baseline(
            log_level="INFO",
            model="polynomial",  # use a polynomial model
            order='linear',  # with linear method
            ranges=([6462., 6463.], [6510., 6511.]),
            )

    _ = blc.fit(recon_scp_data)   # fit the baseline
    scp_baseline = blc.baseline
    scp_corrected = blc.corrected  # get the corrected dataset
    # scp_corrected.plot()

    # scp.plot_multiple(
    #     method="scatter",
    #     ms=5,
    #     datasets=[scp_baseline[1], scp_corrected[1], scp_data[1]],
    #     labels=["baseline", "corrected_data", 'average_raw'],
    #     legend="best",
    # )

    # Evolving Factor Analysis (EFA) 计算
    efa = scp.EFA()
    efa.fit(scp_corrected)
    efa.n_components = 2
    # C0 = efa.transform()
    # _ = C0.T.plot()
    St = efa.get_components()
    # _ = St.plot(title="components", legend=St.k.labels)

    # MCR
    mcr = scp.MCRALS(max_iter=100, normSpec="euclid", tol=0.0001, maxdiv=200,
                     nonnegConc='all', nonnegSpec='all',)
    mcr.fit(scp_corrected, St)
    # _ = mcr.C.T.plot()
    # _ = mcr.St.plot()

    # MCR 归一化
    mcr_st = mcr.St
    inttrapz_area = mcr_st.trapezoid(dim="x")
    for i in range(mcr_st.shape[0]):
        mcr_st[i, :] = np.divide(mcr_st[i, :], inttrapz_area[i])

    # EFA 归一化
    inttrapz_EFA_ref = St.trapezoid(dim="x")
    # intsimps_EFA_ref = St.simpson(dim="x")
    for i in range(St.shape[0]):
        St[i, :] = np.divide(St[i, :], inttrapz_EFA_ref[i])

    # 平均值
    mean = scp.mean(scp_corrected.T, dim='y', keepdims=True,).T
    _ = blc.fit(mean)   # fit the baseline
    mean_baseline = blc.baseline
    mean_corrected = blc.corrected  # get the corrected dataset
    # mean_corrected.plot()

    # 平均值 归一化
    inttrapz_MEAN_ref = mean_corrected.trapezoid(dim="x")
    # intsimps_MEAN_ref = mean_corrected.simpson(dim="x")
    for i in range(mean_corrected.shape[0]):
        mean_corrected[i, :] = np.divide(mean_corrected[i, :], inttrapz_MEAN_ref[i])

    # # 画图
    # ax = St[1].plot(c='r')
    # mean_corrected.plot(ax=ax, clear=False, c='b')
    # mcr_st[0].plot(ax=ax, clear=False, c='k')

    # 整体数据 归一化
    inttrapz_area = scp_corrected.trapezoid(dim="x")
#     intsimps_area = scp_corrected.simpson(dim="x")
    for i in range(scp_corrected.shape[0]):
        scp_corrected[i, :] = np.divide(scp_corrected[i, :], inttrapz_area[i])
    # _ = scp_corrected.plot(lw=1.0, figure_figsize=(3.3, 2.5), clear=True,)

    # 面积，以及 std 分布
    # (scp_corrected[:]-ref).plot()
    inttrapz_area = scp.abs(scp_corrected[:]-ref).trapezoid(dim="x")
    inttrapz_area_std = pd.DataFrame(inttrapz_area.data).std(ddof=0)
    # inttrapz_area.plot()

    # # 寻峰，以及 std 分布
    # peakslist = [s.find_peaks(distance=10)[0].x.data for s in scp_corrected[:, 6490.0:6500.0]]
    # peakstd = pd.DataFrame(peakslist).std(ddof=0)
    # # _ = pd.DataFrame(peakslist).plot()

    # 寻峰，以及 std 分布，fitting 的办法
    peakslist = []
    for i in range(scp_corrected.shape[0]):
        f1 = scp.Optimize(log_level="WARNING",)
        f1.script = script
        f1.max_iter = 2000
        f1.fit(scp_corrected[i, :])

        # # Show the result
        # scp_corrected[i, :].plot()
        # ax = (f1.components[:]).plot(clear=False)
        # ax.autoscale(enable=True, axis="y")

        # # plotmerit
        # som = f1.inverse_transform()
        # f1.plotmerit(offset=0, kind="scatter")
        recon_scp_corrected = f1.inverse_transform()
        # display(recon_scp_corrected)
        peaks = recon_scp_corrected[6490.0:6500.0].find_peaks(distance=10)[0].x.data
        peakslist.append(peaks)
    # pd.DataFrame(peakslist).plot()
    peakstd = pd.DataFrame(peakslist).std(ddof=0)

    std = pd.concat([peakstd, inttrapz_area_std], axis=1, ignore_index=True,)
    std_out = pd.concat([std_out, std], axis=0, ignore_index=True,)

    # 保存数据
    path_file = path.joinpath(path_filetype_folder, file.parts[-2])
    path_file.mkdir(parents=True, exist_ok=True)
    St[1].write_csv(path.joinpath(path_file, f'{file.parts[-2]}_efa.csv'),)
    mean_corrected.write_csv(path.joinpath(path_file, f'{file.parts[-2]}_mean.csv'),)
    mcr_st[0].write_csv(path.joinpath(path_file, f'{file.parts[-2]}_mcr.csv'),)

    (pd.concat([pd.Series(data[:, 0]), pd.DataFrame(scp_corrected.data).T], axis=1, ignore_index=True,).
     to_csv(path.joinpath(path_file, f'{file.parts[-2]}_spectrum_norm.csv'), index=None, header=True,))
    (pd.concat([pd.DataFrame(peakslist), pd.DataFrame(inttrapz_area.data)], axis=1, ignore_index=True,)
     .to_csv(path.joinpath(path_file, f'{file.parts[-2]}_peak_area.csv'),
             index=None, header=[f'{file.parts[-2]}_peak', f'{file.parts[-2]}_area']))

std_out.to_csv(path.joinpath(path_filetype_folder, f'peak_area_std_{filetype}.csv'), index=False, header=[r'peak_std', r'area_std'],)
print(r'Done')

In [None]:
# 读取 std 文件

std_file = list(path_filetype_folder.glob(f'*_{filetype}.csv'))
df_std = pd.read_csv(std_file[0], index_col=None, header=0)

# 读取所有的 filetype 文件路径
filelist2 = []
for item in path_filetype_folder.iterdir():
    if item.is_dir():
        file_dir = list(item.glob(f'*_{filetype}.csv'))
        filelist2.append(file_dir)
filelist2 = filelist2[0:]
# print(filelist2)
# 将所有 Raw Data 写入 NDDataset
data = pd.DataFrame()
for file in filelist2:
    df = pd.read_csv(file[0], index_col=None, header=0)
    data = pd.concat([data, df], axis=1, ignore_index=True,)
data = data.to_numpy()
scp_data = NDDataset(data=data[:, 1::2].T,
                     author="Cheng Liu",
                     description="Kbeta of Mn, ALBA",
                     history="creation",
                     title='Count',
                     )
scp_data.x = Coord(data[:, 0], title='Energy', units=ur.eV,)
scp_data.y = Coord(np.arange((data[:, 1::2].shape[1])), title='numbers', )
# scp_data.plot()

# 基线校准
blc = scp.Baseline(
        log_level="INFO",
        model="polynomial",  # use a polynomial model
        order='linear',  # with linear method
        # ranges=([6462., 6465.], [6505., 6511.]),
        ranges=([6462., 6463.], [6510., 6511.]),
        )

_ = blc.fit(scp_data)   # fit the baseline
scp_baseline = blc.baseline
scp_corrected = blc.corrected  # get the corrected dataset

# scp.plot_multiple(
#     method="scatter",
#     ms=5,
#     datasets=[scp_baseline[1], scp_corrected[1], scp_data[1]],
#     labels=["baseline", "corrected_data", 'average_raw'],
#     legend="best",
# )

# 计算面积
inttrapz_area = scp_corrected.trapezoid(dim="x")
# intsimps_area = scp_corrected.simpson(dim="x")

# scp.plot_multiple(
#     method="scatter",
#     ms=5,
#     datasets=[inttrapz_area,  intsimps_area],
#     labels=["trapzoidal rule", "simpson' rule"],
#     legend="best",
# )

# 归一化
for i in range(scp_corrected.shape[0]):
    scp_corrected[i, :] = np.divide(scp_corrected[i, :], inttrapz_area[i])
# _ = scp_corrected.plot(lw=1.0, figure_figsize=(3.3, 2.5), clear=True,)

# # 寻峰
# peakslist = [s.find_peaks(distance=10, )[0].x.data for s in scp_corrected[:, 6490.0:6500.0]]
# peakslist = pd.DataFrame(peakslist)
# # _ = peakslist.plot(lw=1.0)

# 寻峰，fitting 的办法
peakslist = []
for i in range(scp_corrected.shape[0]):
    f1 = scp.Optimize(log_level="WARNING",)
    f1.script = script
    f1.max_iter = 2000
    f1.fit(scp_corrected[i, :])

    # # Show the result
    # scp_corrected[i, :].plot()
    # ax = (f1.components[:]).plot(clear=False)
    # ax.autoscale(enable=True, axis="y")

    # # plotmerit
    # som = f1.inverse_transform()
    # f1.plotmerit(offset=0, kind="scatter")
    recon_scp_corrected = f1.inverse_transform()
    # display(recon_scp_corrected)
    peaks = recon_scp_corrected[6490.0:6500.0].find_peaks(distance=10)[0].x.data
    peakslist.append(peaks)
# pd.DataFrame(peakslist).plot()
peakslist = pd.DataFrame(peakslist)

# 面积
ref = scp_corrected[2].copy()
diff = scp_corrected[:] - ref
diff_area = diff.abs().trapezoid(dim="x")
# _ = diff_area.plot()

# 保存数据
scp_corrected.to_xarray().to_pandas().T.to_csv(path.joinpath(path_filetype_folder, f'all_spectrum_{filetype}.csv'), header=True,)
pd.concat([peakslist, df_std['peak_std'], pd.DataFrame(diff_area.data), df_std['area_std']], 
          axis=1, ignore_index=True,).to_csv(path.joinpath(path_filetype_folder, f'all_peak_area_{filetype}.csv'),
                                             index=False, header=[r'peak', r'std', r'area', r'std'],)

print(r'Done')

In [None]:
path_out_folder = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\XAS\ExSitu\αMnO2\Kbeta\2023-CLAESS\Results\Version-2')

##### Peak + std, area + std

In [None]:
# 读取数据文件夹以及文件
filetype = r'mean'
path_file = path.joinpath(path_out_folder, filetype)
path_file.mkdir(parents=True, exist_ok=True,)
path_filelist = list(path_file.glob(f'all_*_{filetype}.csv'))
path_filelist = path_filelist[0:]
display(path_filelist)

data_merge = pd.DataFrame()
for file in path_filelist:
    data = pd.read_csv(file, comment='#', sep=r',', header=0, index_col=None)
    data_merge = pd.concat([data_merge, data], axis=1, ignore_index=True,)
# display(data_merge.head(13))

# 画图
fig = plt.figure(figsize=(10.5, 3.3))
gs = gridspec.GridSpec(1, 3, width_ratios=[1, 1, 1], height_ratios=[1],
                       wspace=None, hspace=None, figure=fig)
labels = [r'R1_MnOOH', r'R2_ZnMn2O4', r'R3_MnO', r'R4_Mn2O3', r'R5_MnO2', r'S1_pristine', r'S2_1stDisch',
          r'S3_1stHCh_1p53V', r'S4_1stHCh_1p63V', r'S5_1stCh', r'S6_2ndDisch_1p3V', r'S7_2ndDisch']

# 图 A: energy peak + std
energy = data_merge.iloc[:, 0:2].copy().dropna()
subfig_a = fig.add_subfigure(gs[0, 0], zorder=0)
ax = subfig_a.add_axes((0, 0, 0.8, 0.8), zorder=0)

ax.plot(energy.iloc[:, 0], lw=1, ls='-', marker='o', zorder=5, color=colors[0])
ax.fill_between(x=np.arange(energy.shape[0]), y1=(energy.iloc[:, 0] + energy.iloc[:, 1]),
                y2=(energy.iloc[:, 0] - energy.iloc[:, 1]), color=colors[2], alpha=0.3)

ax.set_ylim(6492.4, 6493.3)  # ax.set_ylim(6492.1, 6493.7)
ax.set_xticks(np.arange(energy.shape[0]), labels=labels)
plt.setp(ax.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
ax.xaxis.set_minor_locator(ticker.NullLocator())
ax.set_ylabel(r'Energy (eV)', fontsize=11)
ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.2))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.1))

ax.tick_params(axis='x', labelsize=9)
ax.tick_params(axis='y', labelsize=9)

ax.vlines(x=4, colors='k', ymin=6492.0, ymax=6493.9, linestyles='dashed', alpha=0.8)
# ax.text(0.02, 0.98, r'1$^{st}$ moment $\mathit{K \beta _{1,3}}$', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(0.02, 0.1, r'References', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(0.58, 0.95, r'Charged States', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(-0.3, 1.0, r'A', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

# 图 B: area + std
area = data_merge.iloc[:, 2:4].copy().dropna()
subfig_b = fig.add_subfigure(gs[0, 1], zorder=0)
ax = subfig_b.add_axes((0.01, 0, 0.8, 0.8), zorder=0)

ax.plot((area.iloc[:, 0]-area.iloc[4, 0])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3, linewidth=1, linestyle='-', marker='o', zorder=5, color=colors[0])
y1 = (area.iloc[:, 0]-area.iloc[4, 0] + area.iloc[:, 1])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
y2 = (area.iloc[:, 0]-area.iloc[4, 0] - area.iloc[:, 1])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
# display(y1, y2)
ax.fill_between(x=np.arange(area.shape[0]), y1=y1, y2=y2, color=colors[2], alpha=0.3)


ax.set_xticks(np.arange(len(labels)), labels=labels)
plt.setp(ax.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
ax.xaxis.set_minor_locator(ticker.NullLocator())
ax.set_ylabel(r'local magnetic moment ($\mathrm{\mu _B}$)', fontsize=11)  # Total Magnetization
ax.set_ylim(2.7, 5.1)
ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.4))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.2))

ax.vlines(x=4, colors='k', ymin=1.7, ymax=5.3, linestyles='dashed')
ax.text(0.02, 0.1, r'References', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(0.58, 0.95, r'Charged States', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(-0.22, 1.0, r'B', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

# 图 C: Spectrum
spectrum = data_merge.iloc[:, 4:].copy().dropna()
subfig_c = fig.add_subfigure(gs[0, 2], zorder=0)
ax = subfig_c.add_axes((0.01, 0, 0.8, 0.8), zorder=0)
colormap = ListedColormap(mpl.colormaps['sunset'](np.linspace(0, 1.0, spectrum.shape[1]-1)), name=r'colormap')

# 多线叠加
for i in range(spectrum.shape[1]-1):
    ax.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 1+i], lw=1, label=labels[i], color=colormap.colors[i], zorder=5, alpha=1-0.01*i)

ax.set_xlabel(r'Energy (eV)', fontsize=11, labelpad=3)
ax.set_xlim(6460, 6510)
ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10))
ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(5))

ax.set_ylabel(ylabel=r'Intensity (a.u.)', fontsize=11, labelpad=3)
ax.set_ylim(0, 0.17)
ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(base=0.04))
ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(base=0.02))

ax.tick_params(axis='x', labelsize=9) 
ax.tick_params(axis='y', labelsize=9)

ax.legend(loc='upper left', bbox_to_anchor=(0.01, 1.0), ncols=1, frameon=False,
          labelcolor='linecolor', fontsize=8, columnspacing=0.5)
ax.text(-0.22, 1.0, r'C', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

axins = ax.inset_axes([0.78, 0.32, 0.2, 0.65])
for i in range(spectrum.shape[1]-1):
    axins.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 1+i], lw=1, label=labels[i], color=colormap.colors[i], zorder=0, alpha=1-0.01*i)
axins.set_xlim(6491, 6495)
axins.spines.right.set_visible(False)
axins.spines.bottom.set_visible(False)
axins.spines.top.set_visible(False)
axins.spines.left.set_visible(False)
axins.set(xticks=[], xlabel=None, yticks=[], ylabel=None)

plt.savefig(path.joinpath(path_file, f'all_{filetype}.tif'), pad_inches=0.05, bbox_inches='tight', dpi=600)
# plt.close()
print(r'Done')

In [None]:
# 读取数据文件夹以及文件
filetype = r'mean'
path_file = path.joinpath(path_out_folder, filetype)
path_file.mkdir(parents=True, exist_ok=True,)
path_filelist = list(path_file.glob(f'all_*_{filetype}.csv'))
path_filelist = path_filelist[0:]
display(path_filelist)

data_merge = pd.DataFrame()
for file in path_filelist:
    data = pd.read_csv(file, comment='#', sep=r',', header=0, index_col=None)
    data_merge = pd.concat([data_merge, data], axis=1, ignore_index=True,)
# display(data_merge.head(13))

# 画图
fig = plt.figure(figsize=(3.3, 2.5))
gs = gridspec.GridSpec(1, 1, width_ratios=None, height_ratios=None,
                       wspace=None, hspace=None, figure=fig)
labels = [r'R1_MnOOH', r'R2_ZnMn2O4', r'$\mathrm{Ref.MnO}$', r'$\mathrm{Ref.Mn_2O_3}$', r'$\mathrm{Ref.MnO_2}$', r'Pristine', r'Discharge',
          r'S3_1stHCh_1p53V', r'S4_1stHCh_1p63V', r'S5_1stCh', r'S6_2ndDisch_1p3V', r'S7_2ndDisch']


# 图 C: Spectrum
spectrum = data_merge.iloc[:, 4:].copy().dropna()
subfig_c = fig.add_subfigure(gs[0, 0], zorder=0)
ax = subfig_c.add_axes((0, 0, 1.0, 1.0), zorder=0)
ax.set_box_aspect(0.8)

colormap = ListedColormap(mpl.colormaps['sunset'](np.linspace(0.0, 0.5, spectrum.shape[1]-1)), name=r'colormap')

# 多线叠加
for i in range(spectrum.shape[1]-8):
    ax.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 3+i], lw=1, label=labels[i+2], color=colors[i], zorder=5, alpha=1-0.01*i)

ax.set_xlabel(r'Energy (eV)', fontsize=11, labelpad=3)
ax.set_xlim(6460, 6510)
ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10))
ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(5))

ax.set_ylabel(ylabel=r'Intensity (a.u.)', fontsize=11, labelpad=3)
ax.set_ylim(0, 0.17)
ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(base=0.04))
ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(base=0.02))

ax.tick_params(axis='x', labelsize=9) 
ax.tick_params(axis='y', labelsize=9)

ax.legend(loc='upper left', bbox_to_anchor=(0.01, 1.0), ncols=1, frameon=False,
          labelcolor='linecolor', fontsize=8, columnspacing=0.5)
# ax.text(-0.22, 1.0, r'C', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

axins = ax.inset_axes([0.78, 0.32, 0.2, 0.65])
for i in range(spectrum.shape[1]-8):
    axins.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 3+i], lw=1, label=labels[i+2], color=colors[i], zorder=0, alpha=1-0.01*i)
axins.set_xlim(6491, 6495)
axins.spines.right.set_visible(False)
axins.spines.bottom.set_visible(False)
axins.spines.top.set_visible(False)
axins.spines.left.set_visible(False)
axins.set(xticks=[], xlabel=None, yticks=[], ylabel=None)

plt.savefig(path.joinpath(path_out, f'all_{filetype}abaa.tif'), pad_inches=0.05, bbox_inches='tight', dpi=600)
# plt.close()
print(r'Done')

#### 单张图

In [None]:
labels = [r'Pristine', r'$1^{st}$ Discharge', r'$1^{st}$ Charge', r'$2^{nd}$ Discharge']

fig = plt.figure(figsize=(3.3, 2.5))
ax = fig.add_subplot()
area = data_merge.iloc[:, 2:4].copy().dropna()

y= (area.iloc[[5,6,9,11], 0]-area.iloc[4, 0])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
y1 = (area.iloc[[5,6,9,11], 0]-area.iloc[4, 0] + area.iloc[:, 1])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
y1 = y1.dropna()
yerror = y-y1

ax.errorbar(x=np.arange(y.shape[0]), y=y.dropna().values, yerr=yerror, linewidth=1, linestyle='-', marker='o', zorder=5, color='k', capsize=6)

ax.set_xticks(np.arange(len(labels)), labels=labels)
plt.setp(ax.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
ax.xaxis.set_minor_locator(ticker.NullLocator())
ax.set_ylabel(r'local magnetic moment ($\mathrm{\mu _B}$)', fontsize=11)  # Total Magnetization
ax.set_ylim(2.4, 3.6)
ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.3))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.15))

ax.text(0.02, 0.07, r'Charged States', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)

ax2 = ax.twinx()

d = (area.iloc[[5,6,9,11], 0]-area.iloc[2, 0])*(2/(area.iloc[4, 0] - area.iloc[2, 0]))+2
d1 =(area.iloc[[5,6,9,11], 0]-area.iloc[2, 0] + area.iloc[:, 1])*(2/(area.iloc[4, 0] - area.iloc[2, 0]))+2
derror = (d1-d).dropna()
ax2.errorbar(x=np.arange(derror.shape[0]), y=d.dropna().values, yerr=derror, linewidth=1, linestyle='-', marker='o', zorder=5, color='k', capsize=6)

ax2.tick_params(axis='x', labelsize=9) 
ax2.tick_params(axis='y', labelsize=9, labelcolor='k')

ax2.set_ylim(4.6, 3.4)
ax2.yaxis.set_major_locator(ticker.MultipleLocator(base=0.2))
ax2.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.1))
ax2.set_ylabel(r'Average Mn Oxidation State', fontsize=11, color='k')  # Total Magnetization

# plt.tight_layout()
plt.savefig(path.joinpath(path_out, r'Kbeta_3.tif'), transparent=False,
            pad_inches=0.05, bbox_inches='tight', dpi=300)
plt.close()
print(r'Done')

### Version-3

In [None]:
# 导入相关的包
# %matplotlib ipympl
import sys
from IPython.display import display
import spectrochempy as scp
from spectrochempy import Coord, CoordSet, NDDataset, ur
import numpy as np
import pandas as pd
from pathlib import Path as path

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
import matplotlib.gridspec as gridspec
from matplotlib import ticker
from matplotlib.colors import ListedColormap
from matplotlib.colorbar import Colorbar

In [None]:
# 画图的初始设置
plt.style.use(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-python\Figure\liuchzzyy.mplstyle')
# display(plt.style.available)

# 颜色设定
sys.path.append(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Python\Figure')
from colors import tol_cmap, tol_cset
colors = list(tol_cset('vibrant'))
if r'sunset' not in plt.colormaps():
    plt.colormaps.register(tol_cmap('sunset'))
if r'rainbow_PuRd' not in plt.colormaps():
    plt.colormaps.register(tol_cmap('rainbow_PuRd')) # 备用 plasma

# 输出的文件夹
path_out = path(r"C:\Users\chengliu\Desktop\Figure")

#### 读取数据并 denoise, 得到平均化，NMF 和 MCR 后的标样

In [None]:
# 读取数据文件夹
path_data_folder = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\XAS\ExSitu\αMnO2\Kbeta\2023-CLAESS\Data')
path_out_folder = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\XAS\ExSitu\αMnO2\Kbeta\2023-CLAESS\Results\Version-3')

In [None]:
# 读取标样
path_ref_filelist = []
for item in path_data_folder.iterdir():
    if (item.is_dir()) and (item.parts[-1] in ['R3_MnO', 'R5_MnO2']):
        file_dir = path.joinpath(item, r'Mn')
        path_ref_filelist.append(file_dir)
path_ref_filelist = path_ref_filelist[0:]
# display(path_ref_filelist)

for path_ref in path_ref_filelist:
    ref_txt_data_merge = pd.DataFrame()
    for filetxt in path_ref.glob(r'*.txt'):
        ref_txt_data = pd.read_csv(filetxt, comment='#', sep=r'\s+', header=None)
        ref_txt_data_merge = pd.concat([ref_txt_data_merge, ref_txt_data], axis=1, ignore_index=True,)
    ref_txt_data_merge = ref_txt_data_merge.to_numpy()
    ref_data_scp = NDDataset(data=ref_txt_data_merge[:, 1::2].T,
                             author="Cheng Liu",
                             description="Kbeta of Mn, ALBA",
                             history="creation",
                             )
    ref_data_scp.x = Coord(ref_txt_data_merge[:, 0], name='Energy', units=ur.eV,)
    ref_data_scp.y = Coord(np.arange((ref_txt_data_merge[:, 1::2].shape[1])), name='numbers', )
    # ref_data_scp.plot()

    # PCA 重构数据
    recon_ref_data_scp = scp.denoise(ref_data_scp, ratio=99.8,)
    # recon_ref_data_scp.plot()

    # 基线校准
    blc = scp.Baseline(
            log_level="INFO",
            model="polynomial",  # use a polynomial model
            order='linear',  # with linear method
            ranges=([6462., 6463.], [6510., 6511.]),
            )

    _ = blc.fit(recon_ref_data_scp)   # fit the baseline
    scp_baseline = blc.baseline
    scp_corrected = blc.corrected  # get the corrected dataset
    # scp_corrected.plot()

    # Evolving Factor Analysis (EFA) 计算
    efa = scp.EFA()
    efa.fit(scp_corrected)
    efa.n_components = 2
    C0 = efa.transform()
    # _ = C0.T.plot()
    St = efa.get_components()
    # _ = St.plot(title="components", legend=St.k.labels)

    # # NMF
    # scp_corrected -= scp_corrected.min()
    # model = scp.NMF(n_components=2, log_level="INFO")
    # _ = model.fit(scp_corrected)
    # C0 = model.transform()
    # _ = C0.T.plot()
    # St = model.components
    # _ = St.plot(title="components", legend=St.k.labels)

    # MCR
    mcr = scp.MCRALS(max_iter=100, normSpec="euclid", tol=0.0001, maxdiv=200,
                     nonnegConc='all', nonnegSpec='all',
                     )
    mcr.fit(scp_corrected, St)
    # _ = mcr.C.T.plot()
    # _ = mcr.St.plot()

    # MCR 归一化
    mcr_st = mcr.St
    inttrapz_area = mcr_st.trapezoid(dim="x")
    for i in range(mcr_st.shape[0]):
        mcr_st[i, :] = np.divide(mcr_st[i, :], inttrapz_area[i])

    # EFA 归一化
    inttrapz_area = St.trapezoid(dim="x")
    for i in range(St.shape[0]):
        St[i, :] = np.divide(St[i, :], inttrapz_area[i])

    # MEAN
    mean = scp.mean(recon_ref_data_scp.T, dim='y', keepdims=True,).T
    _ = blc.fit(mean)   # fit the baseline
    mean_baseline = blc.baseline
    mean_corrected = blc.corrected  # get the corrected dataset
    # mean_corrected.plot()

    # MEAN 归一化
    inttrapz_area = mean_corrected.trapezoid(dim="x")
    for i in range(mean_corrected.shape[0]):
        mean_corrected[i, :] = np.divide(mean_corrected[i, :], inttrapz_area[i])

    # # 画图
    # ax = St[1].plot(c='r')
    # mean_corrected.plot(ax=ax, clear=False, c='b')
    # mcr_st[0].plot(ax=ax, clear=False, c='k')

    # 保存数据
    path_out_ref = path.joinpath(path_out_folder, r'references')
    path_out_ref.mkdir(parents=True, exist_ok=True)
    St[1].write_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_ref_efa.csv'),)
    mean_corrected.write_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_ref_mean.csv'),)
    mcr_st[0].write_csv(path.joinpath(path_out_ref, f'{path_ref.parts[-2]}_ref_mcr.csv'),)
print(r'Done')

In [None]:
script = """

#-----------------------------------------------------------
# syntax for parameters definition :
# name : value, low_bound,  high_bound
#  * for fixed parameters
#  $ for variable parameters
#  > for reference to a parameter in the COMMON block
#    (> is forbidden in the COMMON block)
# common block parameters should not have a _ in their names
#-----------------------------------------------------------
#
COMMON:
# common parameters ex.
# $ gwidth: 1.0, 0.0, none
# $ gratio: 0.5, 0.0, 1.0
# $ gasym: 0.3, 0.0, 1.0

MODEL: LINE_1
shape: asymmetricvoigtmodel
    $ ampl:  0.12, 0.10, 0.16
    $ pos:   6492.6, 6491.3, 9494.6
    $ ratio: 0.5, 0.0, 1.0
    $ asym: 0.5, 0.0, 1.0
    $ width: 0.5, 0.0, 10

MODEL: LINE_2
shape: asymmetricvoigtmodel
    $ ampl:  0.03, 0.01, 0.05
    $ pos:   6477.0, 6470.0, 6480.0
    $ ratio: 0.5, 0.0, 1.0
    $ asym: 0.5, 0.0, 1.0
    $ width: 0.5, 0.0, 10

"""

In [None]:
# script = """

# #-----------------------------------------------------------
# # syntax for parameters definition :
# # name : value, low_bound,  high_bound
# #  * for fixed parameters
# #  $ for variable parameters
# #  > for reference to a parameter in the COMMON block
# #    (> is forbidden in the COMMON block)
# # common block parameters should not have a _ in their names
# #-----------------------------------------------------------
# #
# COMMON:
# # common parameters ex.

# MODEL: linez
# shape: asymmetricvoigtmodel
# $ ampl: 0.12, 0.10, 0.16
# $ width: 0.5, 0.0, 10
# $ pos: 6492.6, 6492.4,  9493.5
# $ ratio: 0.5, 0.0, 1.0
# $ asym: 0.3, 0.0, 1.0

# """

In [None]:
# 读取归一化后的 ref 谱线
filetype = r'efa'
path_filetype_folder = path.joinpath(path_out_folder, filetype)
path_filetype_folder.mkdir(parents=True, exist_ok=True)

filelist_ref = list(path_out_ref.glob(f'R3_*_ref_{filetype}.csv'))
# print(filelist_ref)
data = pd.read_csv(filelist_ref[0], comment='#', sep=r',', header=0).to_numpy()
ref = NDDataset(data=data[:, 1], title=r'Absorption', name=f'{filelist_ref[0].stem}',)
ref.x = Coord(data[:, 0], title='Energy', units=ur.eV,)
# ref.plot()

# 读取数据文件夹
filelist1 = []
for item in path_data_folder.iterdir():
    if item.is_dir():
        file_dir = path.joinpath(item, r'Mn')
        filelist1.append(file_dir)
filelist1 = filelist1[0:]
# print(filelist1)

std_out = pd.DataFrame()
for file in filelist1:
    data = pd.DataFrame()
    for filetxt in file.glob('*.txt'):
        data_txt = pd.read_csv(filetxt, comment='#', sep=r'\s+', header=None)
        data = pd.concat([data, data_txt], axis=1, ignore_index=True,)
    data = data.to_numpy()
    scp_data = NDDataset(data=data[:, 1::2].T,
                         author="Cheng Liu",
                         description="Kbeta of Mn, ALBA",
                         history="creation",
                         )
    scp_data.x = Coord(data[:, 0], name='Energy', units=ur.eV,)
    scp_data.y = Coord(np.arange((data[:, 1::2].shape[1])), name='numbers', )
    # scp_data.plot()

    # PCA 重构数据
    recon_scp_data = scp.denoise(scp_data, ratio=99.8,)
    # print(recon_scp_data.shape)
    # recon_scp_data.plot()

    # 基线校准
    blc = scp.Baseline(
            log_level="INFO",
            model="polynomial",  # use a polynomial model
            order='linear',  # with linear method
            ranges=([6462., 6463.], [6510., 6511.]),
            )

    _ = blc.fit(recon_scp_data)   # fit the baseline
    scp_baseline = blc.baseline
    scp_corrected = blc.corrected  # get the corrected dataset
    # scp_corrected.plot()

    # scp.plot_multiple(
    #     method="scatter",
    #     ms=5,
    #     datasets=[scp_baseline[1], scp_corrected[1], scp_data[1]],
    #     labels=["baseline", "corrected_data", 'average_raw'],
    #     legend="best",
    # )

    # Evolving Factor Analysis (EFA) 计算
    efa = scp.EFA()
    efa.fit(scp_corrected)
    efa.n_components = 2
    # C0 = efa.transform()
    # _ = C0.T.plot()
    St = efa.get_components()
    # _ = St.plot(title="components", legend=St.k.labels)

    # MCR
    mcr = scp.MCRALS(max_iter=100, normSpec="euclid", tol=0.0001, maxdiv=200,
                     nonnegConc='all', nonnegSpec='all',)
    mcr.fit(scp_corrected, St)
    # _ = mcr.C.T.plot()
    # _ = mcr.St.plot()

    # MCR 归一化
    mcr_st = mcr.St
    inttrapz_area = mcr_st.trapezoid(dim="x")
    for i in range(mcr_st.shape[0]):
        mcr_st[i, :] = np.divide(mcr_st[i, :], inttrapz_area[i])

    # EFA 归一化
    inttrapz_EFA_ref = St.trapezoid(dim="x")
    # intsimps_EFA_ref = St.simpson(dim="x")
    for i in range(St.shape[0]):
        St[i, :] = np.divide(St[i, :], inttrapz_EFA_ref[i])

    # 平均值
    mean = scp.mean(scp_corrected.T, dim='y', keepdims=True,).T
    _ = blc.fit(mean)   # fit the baseline
    mean_baseline = blc.baseline
    mean_corrected = blc.corrected  # get the corrected dataset
    # mean_corrected.plot()

    # 平均值 归一化
    inttrapz_MEAN_ref = mean_corrected.trapezoid(dim="x")
    # intsimps_MEAN_ref = mean_corrected.simpson(dim="x")
    for i in range(mean_corrected.shape[0]):
        mean_corrected[i, :] = np.divide(mean_corrected[i, :], inttrapz_MEAN_ref[i])

    # # 画图
    # ax = St[1].plot(c='r')
    # mean_corrected.plot(ax=ax, clear=False, c='b')
    # mcr_st[0].plot(ax=ax, clear=False, c='k')

    # 整体数据 归一化
    inttrapz_area = scp_corrected.trapezoid(dim="x")
#     intsimps_area = scp_corrected.simpson(dim="x")
    for i in range(scp_corrected.shape[0]):
        scp_corrected[i, :] = np.divide(scp_corrected[i, :], inttrapz_area[i])
    # _ = scp_corrected.plot(lw=1.0, figure_figsize=(3.3, 2.5), clear=True,)

    # 面积，以及 std 分布
    # (scp_corrected[:]-ref).plot()
    inttrapz_area = scp.abs(scp_corrected[:]-ref).trapezoid(dim="x")
    inttrapz_area_std = pd.DataFrame(inttrapz_area.data).std(ddof=0)
    # inttrapz_area.plot()

    # # 寻峰，以及 std 分布
    # peakslist = [s.find_peaks(distance=10)[0].x.data for s in scp_corrected[:, 6490.0:6500.0]]
    # peakstd = pd.DataFrame(peakslist).std(ddof=0)
    # # _ = pd.DataFrame(peakslist).plot()

    # 寻峰，以及 std 分布，fitting 的办法
    peakslist = []
    for i in range(scp_corrected.shape[0]):
        f1 = scp.Optimize(log_level="WARNING",)
        f1.script = script
        f1.max_iter = 2000
        f1.fit(scp_corrected[i, :])

        # # Show the result
        # scp_corrected[i, :].plot()
        # ax = (f1.components[:]).plot(clear=False)
        # ax.autoscale(enable=True, axis="y")

        # # plotmerit
        # som = f1.inverse_transform()
        # f1.plotmerit(offset=0, kind="scatter")
        recon_scp_corrected = f1.inverse_transform()
        # display(recon_scp_corrected)
        peaks = recon_scp_corrected[6490.0:6500.0].find_peaks(distance=10)[0].x.data
        peakslist.append(peaks)
    # pd.DataFrame(peakslist).plot()
    peakstd = pd.DataFrame(peakslist).std(ddof=0)

    std = pd.concat([peakstd, inttrapz_area_std], axis=1, ignore_index=True,)
    std_out = pd.concat([std_out, std], axis=0, ignore_index=True,)

    # 保存数据
    path_file = path.joinpath(path_filetype_folder, file.parts[-2])
    path_file.mkdir(parents=True, exist_ok=True)
    St[1].write_csv(path.joinpath(path_file, f'{file.parts[-2]}_efa.csv'),)
    mean_corrected.write_csv(path.joinpath(path_file, f'{file.parts[-2]}_mean.csv'),)
    mcr_st[0].write_csv(path.joinpath(path_file, f'{file.parts[-2]}_mcr.csv'),)

    (pd.concat([pd.Series(data[:, 0]), pd.DataFrame(scp_corrected.data).T], axis=1, ignore_index=True,).
     to_csv(path.joinpath(path_file, f'{file.parts[-2]}_spectrum_norm.csv'), index=None, header=True,))
    (pd.concat([pd.DataFrame(peakslist), pd.DataFrame(inttrapz_area.data)], axis=1, ignore_index=True,)
     .to_csv(path.joinpath(path_file, f'{file.parts[-2]}_peak_area.csv'),
             index=None, header=[f'{file.parts[-2]}_peak', f'{file.parts[-2]}_area']))

std_out.to_csv(path.joinpath(path_filetype_folder, f'peak_area_std_{filetype}.csv'), index=False, header=[r'peak_std', r'area_std'],)
print(r'Done')

In [None]:
# 读取 std 文件

std_file = list(path_filetype_folder.glob(f'*_{filetype}.csv'))
df_std = pd.read_csv(std_file[0], index_col=None, header=0)

# 读取所有的 filetype 文件路径
filelist2 = []
for item in path_filetype_folder.iterdir():
    if item.is_dir():
        file_dir = list(item.glob(f'*_{filetype}.csv'))
        filelist2.append(file_dir)
filelist2 = filelist2[0:]
# print(filelist2)
# 将所有 Raw Data 写入 NDDataset
data = pd.DataFrame()
for file in filelist2:
    df = pd.read_csv(file[0], index_col=None, header=0)
    data = pd.concat([data, df], axis=1, ignore_index=True,)
data = data.to_numpy()
scp_data = NDDataset(data=data[:, 1::2].T,
                     author="Cheng Liu",
                     description="Kbeta of Mn, ALBA",
                     history="creation",
                     title='Count',
                     )
scp_data.x = Coord(data[:, 0], title='Energy', units=ur.eV,)
scp_data.y = Coord(np.arange((data[:, 1::2].shape[1])), title='numbers', )
# scp_data.plot()

# 基线校准
blc = scp.Baseline(
        log_level="INFO",
        model="polynomial",  # use a polynomial model
        order='linear',  # with linear method
        # ranges=([6462., 6465.], [6505., 6511.]),
        ranges=([6462., 6463.], [6510., 6511.]),
        )

_ = blc.fit(scp_data)   # fit the baseline
scp_baseline = blc.baseline
scp_corrected = blc.corrected  # get the corrected dataset

# scp.plot_multiple(
#     method="scatter",
#     ms=5,
#     datasets=[scp_baseline[1], scp_corrected[1], scp_data[1]],
#     labels=["baseline", "corrected_data", 'average_raw'],
#     legend="best",
# )

# 计算面积
inttrapz_area = scp_corrected.trapezoid(dim="x")
# intsimps_area = scp_corrected.simpson(dim="x")

# scp.plot_multiple(
#     method="scatter",
#     ms=5,
#     datasets=[inttrapz_area,  intsimps_area],
#     labels=["trapzoidal rule", "simpson' rule"],
#     legend="best",
# )

# 归一化
for i in range(scp_corrected.shape[0]):
    scp_corrected[i, :] = np.divide(scp_corrected[i, :], inttrapz_area[i])
# _ = scp_corrected.plot(lw=1.0, figure_figsize=(3.3, 2.5), clear=True,)

# # 寻峰
# peakslist = [s.find_peaks(distance=10, )[0].x.data for s in scp_corrected[:, 6490.0:6500.0]]
# peakslist = pd.DataFrame(peakslist)
# # _ = peakslist.plot(lw=1.0)

# 寻峰，fitting 的办法
peakslist = []
for i in range(scp_corrected.shape[0]):
    f1 = scp.Optimize(log_level="WARNING",)
    f1.script = script
    f1.max_iter = 2000
    f1.fit(scp_corrected[i, :])

    # # Show the result
    # scp_corrected[i, :].plot()
    # ax = (f1.components[:]).plot(clear=False)
    # ax.autoscale(enable=True, axis="y")

    # # plotmerit
    # som = f1.inverse_transform()
    # f1.plotmerit(offset=0, kind="scatter")
    recon_scp_corrected = f1.inverse_transform()
    # display(recon_scp_corrected)
    peaks = recon_scp_corrected[6490.0:6500.0].find_peaks(distance=10)[0].x.data
    peakslist.append(peaks)
# pd.DataFrame(peakslist).plot()
peakslist = pd.DataFrame(peakslist)

# 面积
ref = scp_corrected[2].copy()
diff = scp_corrected[:] - ref
diff_area = diff.abs().trapezoid(dim="x")
# _ = diff_area.plot()

# 保存数据
scp_corrected.to_xarray().to_pandas().T.to_csv(path.joinpath(path_filetype_folder, f'all_spectrum_{filetype}.csv'), header=True,)
pd.concat([peakslist, df_std['peak_std'], pd.DataFrame(diff_area.data), df_std['area_std']], 
          axis=1, ignore_index=True,).to_csv(path.joinpath(path_filetype_folder, f'all_peak_area_{filetype}.csv'),
                                             index=False, header=[r'peak', r'std', r'area', r'std'],)

print(r'Done')

In [None]:
path_out_folder = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\XAS\ExSitu\αMnO2\Kbeta\2023-CLAESS\Results\Version-2')

##### Peak + std, area + std

In [None]:
# 读取数据文件夹以及文件
filetype = r'mean'
path_file = path.joinpath(path_out_folder, filetype)
path_file.mkdir(parents=True, exist_ok=True,)
path_filelist = list(path_file.glob(f'all_*_{filetype}.csv'))
path_filelist = path_filelist[0:]
display(path_filelist)

data_merge = pd.DataFrame()
for file in path_filelist:
    data = pd.read_csv(file, comment='#', sep=r',', header=0, index_col=None)
    data_merge = pd.concat([data_merge, data], axis=1, ignore_index=True,)
# display(data_merge.head(13))

# 画图
fig = plt.figure(figsize=(10.5, 3.3))
gs = gridspec.GridSpec(1, 3, width_ratios=[1, 1, 1], height_ratios=[1],
                       wspace=None, hspace=None, figure=fig)
labels = [r'R1_MnOOH', r'R2_ZnMn2O4', r'R3_MnO', r'R4_Mn2O3', r'R5_MnO2', r'S1_pristine', r'S2_1stDisch',
          r'S3_1stHCh_1p53V', r'S4_1stHCh_1p63V', r'S5_1stCh', r'S6_2ndDisch_1p3V', r'S7_2ndDisch']

# 图 A: energy peak + std
energy = data_merge.iloc[:, 0:2].copy().dropna()
subfig_a = fig.add_subfigure(gs[0, 0], zorder=0)
ax = subfig_a.add_axes((0, 0, 0.8, 0.8), zorder=0)

ax.plot(energy.iloc[:, 0], lw=1, ls='-', marker='o', zorder=5, color=colors[0])
ax.fill_between(x=np.arange(energy.shape[0]), y1=(energy.iloc[:, 0] + energy.iloc[:, 1]),
                y2=(energy.iloc[:, 0] - energy.iloc[:, 1]), color=colors[2], alpha=0.3)

ax.set_ylim(6492.4, 6493.3)  # ax.set_ylim(6492.1, 6493.7)
ax.set_xticks(np.arange(energy.shape[0]), labels=labels)
plt.setp(ax.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
ax.xaxis.set_minor_locator(ticker.NullLocator())
ax.set_ylabel(r'Energy (eV)', fontsize=11)
ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.2))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.1))

ax.tick_params(axis='x', labelsize=9)
ax.tick_params(axis='y', labelsize=9)

ax.vlines(x=4, colors='k', ymin=6492.0, ymax=6493.9, linestyles='dashed', alpha=0.8)
# ax.text(0.02, 0.98, r'1$^{st}$ moment $\mathit{K \beta _{1,3}}$', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(0.02, 0.1, r'References', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(0.58, 0.95, r'Charged States', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(-0.3, 1.0, r'A', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

# 图 B: area + std
area = data_merge.iloc[:, 2:4].copy().dropna()
subfig_b = fig.add_subfigure(gs[0, 1], zorder=0)
ax = subfig_b.add_axes((0.01, 0, 0.8, 0.8), zorder=0)

ax.plot((area.iloc[:, 0]-area.iloc[4, 0])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3, linewidth=1, linestyle='-', marker='o', zorder=5, color=colors[0])
y1 = (area.iloc[:, 0]-area.iloc[4, 0] + area.iloc[:, 1])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
y2 = (area.iloc[:, 0]-area.iloc[4, 0] - area.iloc[:, 1])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
# display(y1, y2)
ax.fill_between(x=np.arange(area.shape[0]), y1=y1, y2=y2, color=colors[2], alpha=0.3)


ax.set_xticks(np.arange(len(labels)), labels=labels)
plt.setp(ax.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
ax.xaxis.set_minor_locator(ticker.NullLocator())
ax.set_ylabel(r'local magnetic moment ($\mathrm{\mu _B}$)', fontsize=11)  # Total Magnetization
ax.set_ylim(2.7, 5.1)
ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.4))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.2))

ax.vlines(x=4, colors='k', ymin=1.7, ymax=5.3, linestyles='dashed')
ax.text(0.02, 0.1, r'References', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(0.58, 0.95, r'Charged States', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)
ax.text(-0.22, 1.0, r'B', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

# 图 C: Spectrum
spectrum = data_merge.iloc[:, 4:].copy().dropna()
subfig_c = fig.add_subfigure(gs[0, 2], zorder=0)
ax = subfig_c.add_axes((0.01, 0, 0.8, 0.8), zorder=0)
colormap = ListedColormap(mpl.colormaps['sunset'](np.linspace(0, 1.0, spectrum.shape[1]-1)), name=r'colormap')

# 多线叠加
for i in range(spectrum.shape[1]-1):
    ax.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 1+i], lw=1, label=labels[i], color=colormap.colors[i], zorder=5, alpha=1-0.01*i)

ax.set_xlabel(r'Energy (eV)', fontsize=11, labelpad=3)
ax.set_xlim(6460, 6510)
ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10))
ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(5))

ax.set_ylabel(ylabel=r'Intensity (a.u.)', fontsize=11, labelpad=3)
ax.set_ylim(0, 0.17)
ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(base=0.04))
ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(base=0.02))

ax.tick_params(axis='x', labelsize=9) 
ax.tick_params(axis='y', labelsize=9)

ax.legend(loc='upper left', bbox_to_anchor=(0.01, 1.0), ncols=1, frameon=False,
          labelcolor='linecolor', fontsize=8, columnspacing=0.5)
ax.text(-0.22, 1.0, r'C', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

axins = ax.inset_axes([0.78, 0.32, 0.2, 0.65])
for i in range(spectrum.shape[1]-1):
    axins.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 1+i], lw=1, label=labels[i], color=colormap.colors[i], zorder=0, alpha=1-0.01*i)
axins.set_xlim(6491, 6495)
axins.spines.right.set_visible(False)
axins.spines.bottom.set_visible(False)
axins.spines.top.set_visible(False)
axins.spines.left.set_visible(False)
axins.set(xticks=[], xlabel=None, yticks=[], ylabel=None)

plt.savefig(path.joinpath(path_file, f'all_{filetype}.tif'), pad_inches=0.05, bbox_inches='tight', dpi=600)
# plt.close()
print(r'Done')

In [None]:
# 读取数据文件夹以及文件
filetype = r'mean'
path_file = path.joinpath(path_out_folder, filetype)
path_file.mkdir(parents=True, exist_ok=True,)
path_filelist = list(path_file.glob(f'all_*_{filetype}.csv'))
path_filelist = path_filelist[0:]
display(path_filelist)

data_merge = pd.DataFrame()
for file in path_filelist:
    data = pd.read_csv(file, comment='#', sep=r',', header=0, index_col=None)
    data_merge = pd.concat([data_merge, data], axis=1, ignore_index=True,)
# display(data_merge.head(13))

# 画图
fig = plt.figure(figsize=(3.3, 2.5))
gs = gridspec.GridSpec(1, 1, width_ratios=None, height_ratios=None,
                       wspace=None, hspace=None, figure=fig)
labels = [r'R1_MnOOH', r'R2_ZnMn2O4', r'$\mathrm{Ref.MnO}$', r'$\mathrm{Ref.Mn_2O_3}$', r'$\mathrm{Ref.MnO_2}$', r'Pristine', r'Discharge',
          r'S3_1stHCh_1p53V', r'S4_1stHCh_1p63V', r'S5_1stCh', r'S6_2ndDisch_1p3V', r'S7_2ndDisch']


# 图 C: Spectrum
spectrum = data_merge.iloc[:, 4:].copy().dropna()
subfig_c = fig.add_subfigure(gs[0, 0], zorder=0)
ax = subfig_c.add_axes((0, 0, 1.0, 1.0), zorder=0)
ax.set_box_aspect(0.8)

colormap = ListedColormap(mpl.colormaps['sunset'](np.linspace(0.0, 0.5, spectrum.shape[1]-1)), name=r'colormap')

# 多线叠加
for i in range(spectrum.shape[1]-8):
    ax.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 3+i], lw=1, label=labels[i+2], color=colors[i], zorder=5, alpha=1-0.01*i)

ax.set_xlabel(r'Energy (eV)', fontsize=11, labelpad=3)
ax.set_xlim(6460, 6510)
ax.xaxis.set_major_locator(mpl.ticker.MultipleLocator(10))
ax.xaxis.set_minor_locator(mpl.ticker.MultipleLocator(5))

ax.set_ylabel(ylabel=r'Intensity (a.u.)', fontsize=11, labelpad=3)
ax.set_ylim(0, 0.17)
ax.yaxis.set_major_locator(mpl.ticker.MultipleLocator(base=0.04))
ax.yaxis.set_minor_locator(mpl.ticker.MultipleLocator(base=0.02))

ax.tick_params(axis='x', labelsize=9) 
ax.tick_params(axis='y', labelsize=9)

ax.legend(loc='upper left', bbox_to_anchor=(0.01, 1.0), ncols=1, frameon=False,
          labelcolor='linecolor', fontsize=8, columnspacing=0.5)
# ax.text(-0.22, 1.0, r'C', weight='bold', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=13)

axins = ax.inset_axes([0.78, 0.32, 0.2, 0.65])
for i in range(spectrum.shape[1]-8):
    axins.plot(spectrum.iloc[:, 0], spectrum.iloc[:, 3+i], lw=1, label=labels[i+2], color=colors[i], zorder=0, alpha=1-0.01*i)
axins.set_xlim(6491, 6495)
axins.spines.right.set_visible(False)
axins.spines.bottom.set_visible(False)
axins.spines.top.set_visible(False)
axins.spines.left.set_visible(False)
axins.set(xticks=[], xlabel=None, yticks=[], ylabel=None)

plt.savefig(path.joinpath(path_out, f'all_{filetype}abaa.tif'), pad_inches=0.05, bbox_inches='tight', dpi=600)
# plt.close()
print(r'Done')

#### 单张图

In [None]:
labels = [r'Pristine', r'$1^{st}$ Discharge', r'$1^{st}$ Charge', r'$2^{nd}$ Discharge']

fig = plt.figure(figsize=(3.3, 2.5))
ax = fig.add_subplot()
area = data_merge.iloc[:, 2:4].copy().dropna()

y= (area.iloc[[5,6,9,11], 0]-area.iloc[4, 0])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
y1 = (area.iloc[[5,6,9,11], 0]-area.iloc[4, 0] + area.iloc[:, 1])*(2/(area.iloc[2, 0] - area.iloc[4, 0]))+3
y1 = y1.dropna()
yerror = y-y1

ax.errorbar(x=np.arange(y.shape[0]), y=y.dropna().values, yerr=yerror, linewidth=1, linestyle='-', marker='o', zorder=5, color='k', capsize=6)

ax.set_xticks(np.arange(len(labels)), labels=labels)
plt.setp(ax.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
ax.xaxis.set_minor_locator(ticker.NullLocator())
ax.set_ylabel(r'local magnetic moment ($\mathrm{\mu _B}$)', fontsize=11)  # Total Magnetization
ax.set_ylim(2.4, 3.6)
ax.yaxis.set_major_locator(ticker.MultipleLocator(base=0.3))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.15))

ax.text(0.02, 0.07, r'Charged States', horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, fontsize=11)

ax2 = ax.twinx()

d = (area.iloc[[5,6,9,11], 0]-area.iloc[2, 0])*(2/(area.iloc[4, 0] - area.iloc[2, 0]))+2
d1 =(area.iloc[[5,6,9,11], 0]-area.iloc[2, 0] + area.iloc[:, 1])*(2/(area.iloc[4, 0] - area.iloc[2, 0]))+2
derror = (d1-d).dropna()
ax2.errorbar(x=np.arange(derror.shape[0]), y=d.dropna().values, yerr=derror, linewidth=1, linestyle='-', marker='o', zorder=5, color='k', capsize=6)

ax2.tick_params(axis='x', labelsize=9) 
ax2.tick_params(axis='y', labelsize=9, labelcolor='k')

ax2.set_ylim(4.6, 3.4)
ax2.yaxis.set_major_locator(ticker.MultipleLocator(base=0.2))
ax2.yaxis.set_minor_locator(ticker.MultipleLocator(base=0.1))
ax2.set_ylabel(r'Average Mn Oxidation State', fontsize=11, color='k')  # Total Magnetization

# plt.tight_layout()
plt.savefig(path.joinpath(path_out, r'Kbeta_3.tif'), transparent=False,
            pad_inches=0.05, bbox_inches='tight', dpi=300)
plt.close()
print(r'Done')

### Version-2

#### 读取数据并 denoise, 得到峰和面积的 std, 后面再平均化， NMF 和 MCR

In [None]:
# # 读取数据文件夹
# data_folder = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\XAS\ExSitu\Kbeta\2023-CLAESS\Data')
# filelist1 = []
# for item in data_folder.iterdir():
#     if item.is_dir():
#         file_dir = path.joinpath(item, r'Mn')
#         filelist1.append(file_dir)
# filelist1 = filelist1[0:]
# # print(filelist1)
# std_out = pd.DataFrame()
# for file in filelist1:
#     data = pd.DataFrame()
#     for filetxt in file.glob('*.txt'):
#         data_txt = pd.read_csv(filetxt, comment='#', sep=r'\s+', header=None)
#         data = pd.concat([data, data_txt], axis=1, ignore_index=True,)
#     data = data.to_numpy()
#     scp_data = NDDataset(data=data[:, 1::2].T,
#                          author="Cheng Liu",
#                          description="Kbeta of Mn, ALBA",
#                          history="creation",
#                          )
#     scp_data.x = Coord(data[:, 0], name='Energy', units=ur.eV,)
#     scp_data.y = Coord(np.arange((data[:, 1::2].shape[1])), name='numbers', )
#     # scp_data.plot()

#     # PCA 重构数据
#     recon_scp_data = scp.denoise(scp_data, ratio=99.8,)
#     # print(recon_scp_data.shape)
#     # recon_scp_data.plot()

#     # 基线校准
#     blc = scp.Baseline(
#             log_level="INFO",
#             model="polynomial",  # use a polynomial model
#             order='linear',  # with linear method
#             ranges=([6462., 6463.], [6510., 6511.]),
#             )

#     _ = blc.fit(scp_data)   # fit the baseline
#     scp_baseline = blc.baseline
#     scp_corrected = blc.corrected  # get the corrected dataset
#     # scp_corrected.plot()
#     # scp.plot_multiple(
#     #     method="scatter",
#     #     ms=5,
#     #     datasets=[scp_baseline[1], scp_corrected[1], scp_data[1]],
#     #     labels=["baseline", "corrected_data", 'average_raw'],
#     #     legend="best",
#     # )
    
#     # 寻峰，以及 std 分布
#     peakslist = [s.find_peaks(distance=10)[0].x.data for s in scp_corrected[:, 6490.0:6500.0]]
#     pd.DataFrame(peakslist).to_csv(path.joinpath(file, f'{file.parts[-2]}_all_peaks.csv'),index=None, header=[f'{file.parts[-2]}_peaks'])
#     peakstd = pd.DataFrame(peakslist).std(ddof=0)
#     # _ = peakslist.plot()
    
#     # 计算面积，以及 std 分布
#     inttrapz_area = scp_corrected.trapezoid(dim="x")
#     # intsimps_area = scp_corrected.simpson(dim="x")
#     pd.DataFrame(inttrapz_area.data).to_csv(path.joinpath(file, f'{file.parts[-2]}_all_areas.csv'),index=None, header=[f'{file.parts[-2]}_areas'])
#     inttrapz_area_std = pd.DataFrame(inttrapz_area.data).std(ddof=0)/np.mean(inttrapz_area.data)
    
#     # scp.plot_multiple(
#     #     method="scatter",
#     #     ms=5,
#     #     datasets=[inttrapz_area,  intsimps_area],
#     #     labels=["trapzoidal rule", "simpson' rule"],
#     #     legend="best",
#     # )
#     std = pd.concat([peakstd, inttrapz_area_std], axis=1, ignore_index=True,)
#     std_out = pd.concat([std_out, std], axis=0, ignore_index=True,)
    
#     # Evolving Factor Analysis (EFA) 计算
#     efa = scp.EFA()
#     efa.fit(recon_scp_data)
#     efa.n_components = 2
#     # C0 = efa.transform()
#     # _ = C0.T.plot()
#     St = efa.get_components()
#     # _ = St.plot(title="components", legend=St.k.labels)

#     mcr = scp.MCRALS(max_iter=100, normSpec="euclid", tol=0.0001, maxdiv=200,
#                      nonnegConc='all', nonnegSpec='all',
#                      )
#     mcr.fit(recon_scp_data, St)
#     # _ = mcr.C.T.plot()
#     # _ = (mcr.St[1]/mcr.St[1].max()).plot()
#     # _ = (St[1]/St[1].max()).plot(clear=False)
#     # _ = (scp.mean(scp_data.T, dim='y')/scp.mean(scp_data.T, dim='y').max()).plot(clear=False)
#     St[1].write_csv(path.joinpath(file, f'{file.parts[-2]}_NMF.csv'),)
#     scp.mean(scp_data.T, dim='y').write_csv(path.joinpath(file, f'{file.parts[-2]}_MEAN.csv'),)
#     mcr.St[1].write_csv(path.joinpath(file, f'{file.parts[-2]}_MCR.csv'),)
    
# std_out.to_csv(path.joinpath(data_folder, f'all_peak_std.csv'), index=False, header=[r'peak_std', r'area_std'],)
# print(r'Done')

#### 平均化， NMF 和 MCR 数据 去背景，归一化，计算 IDA

In [None]:
# # 读取 std 文件
# # data_folder = path(r'C:\Users\chengliu\OneDrive - UAB\ICMAB-Data\Zn-Mn\Uno\Result\XAS\ExSitu\Kbeta\2023-CLAESS\Data')
# std_file = list(data_folder.glob(f'*_std.csv'))
# df_std = pd.read_csv(std_file[0], index_col=None, header=0)

# # 读取所有的 NMF 文件路径
# filename = r'MEAN'
# filelist2 = []
# for item in data_folder.iterdir():
#     if item.is_dir():
#         file_dir = list(path.joinpath(item, r'Mn').glob(f'*_{filename}.csv'))
#         filelist2.append(file_dir)
# filelist2 = filelist2[0:]
# # print(len(filelist2))
# # 将所有 Raw Data 写入 NDDataset
# data = pd.DataFrame()
# for file in filelist2:
#     df = pd.read_csv(file[0], index_col=None, header=0)
#     data = pd.concat([data, df], axis=1, ignore_index=True,)
# data = data.to_numpy()
# scp_data = NDDataset(data=data[:, 1::2].T,
#                      author="Cheng Liu",
#                      description="Kbeta of Mn, ALBA",
#                      history="creation",
#                      title='Count',
#                      )
# scp_data.x = Coord(data[:, 0], title='Energy', units=ur.eV,)
# scp_data.y = Coord(np.arange((data[:, 1::2].shape[1])), title='numbers', )
# # scp_data.plot()

# # 基线校准
# blc = scp.Baseline(
#         log_level="INFO",
#         model="polynomial",  # use a polynomial model
#         order='linear',  # with linear method
#         # ranges=([6462., 6465.], [6505., 6511.]),
#         ranges=([6462., 6463.], [6510., 6511.]),
#         )

# _ = blc.fit(scp_data)   # fit the baseline
# scp_baseline = blc.baseline
# scp_corrected = blc.corrected  # get the corrected dataset

# # scp.plot_multiple(
# #     method="scatter",
# #     ms=5,
# #     datasets=[scp_baseline[1], scp_corrected[1], scp_data[1]],
# #     labels=["baseline", "corrected_data", 'average_raw'],
# #     legend="best",
# # )

# # 计算面积
# inttrapz_area = scp_corrected.trapezoid(dim="x")
# # intsimps_area = scp_corrected.simpson(dim="x")

# # scp.plot_multiple(
# #     method="scatter",
# #     ms=5,
# #     datasets=[inttrapz_area,  intsimps_area],
# #     labels=["trapzoidal rule", "simpson' rule"],
# #     legend="best",
# # )

# # 归一化
# for i in range(scp_corrected.shape[0]):
#     scp_corrected[i, :] = np.divide(scp_corrected[i, :], inttrapz_area[i])
# scp_corrected.to_xarray().to_pandas().T.to_csv(path.joinpath(data_folder, f'all_spect_{filename}.csv'), header=True,)
# _ = scp_corrected.plot(lw=1.0, figure_figsize=(3.3, 2.5), clear=True,)

# # 寻峰
# peakslist = [s.find_peaks(distance=10, )[0].x.data for s in scp_corrected[:, 6490.0:6500.0]]
# peakslist = pd.DataFrame(peakslist)
# _ = peakslist.plot(lw=1.0)

# pd.concat([peakslist, df_std['peak_std']], 
#           axis=1, ignore_index=True,).to_csv(path.joinpath(data_folder, f'all_peak_std_{filename}.csv'),
#                                              index=False, header=[r'peak', r'std'],)
# # # 面积
# # std = np.divide(df_std['area_std'].to_numpy(), inttrapz_area.data)
# # diff_std = np.sqrt(std[:]**2 + std[2]**2)

# # MNO = scp_corrected[2].copy()
# # diff = scp_corrected[:] - MNO
# # diff_area = diff.abs().trapezoid(dim="x")
# # _ = diff_area.plot()
# print(r'Done')

### Version-1

In [None]:
# import spectrochempy as scp
# from spectrochempy import Coord, ur
# import numpy as np
# import pandas as pd
# import matplotlib.pyplot as plt
# from matplotlib import ticker
# import os
# import glob
# import matplotlib.transforms as transforms
# import matplotlib.gridspec as gridspec

In [None]:
# def get_folder_names(folder_path):
#     folder_names = []
#     for entry in os.scandir(folder_path):
#         if entry.is_dir():
#             folder_names.append(entry.name)
#     return folder_names

# def create_folders(folder_out_path, folder_names):
#     for folder_name in folder_names:
#         folder_path = os.path.join(folder_out_path, folder_name)
#         try:
#             os.mkdir(folder_path)
#             print(f"Created folder: {folder_name}")
#         except OSError as e:
#             print(f"Failed to create folder: {folder_name} - Error: {e}")

In [None]:
# folder_path=r'C:\Users\chengliu\Desktop\Done\Ex Situ\AA\All\Data'
# file_key='*.txt'
# folder_out_path=r'C:\Users\chengliu\Desktop\Done\Ex Situ\AA\All\Done'

# folder_names = get_folder_names(folder_path)
# # create_folders(folder_out_path, folder_names)

In [None]:
# # Kbeta  A 框数据集

# energy_max = pd.Series()
# energy_max_A = pd.Series()
# energy_max_B = pd.Series()

# for foldername in folder_names:

#     filelist = glob.glob(os.path.join(folder_path, foldername, 'Mn', file_key))
#     # scan_out_folder = os.path.join(folder_out_path, foldername)

#     AA = pd.Series()

#     for filetxt in filelist:

#         file_name = os.path.split(filetxt)[-1][:-4]

#         # 读取单一文件
#         data_A_txt = np.loadtxt(filetxt, comments='#')
#         data_A_scp = scp.NDDataset(data=[data_A_txt[:, 1]], title='Intensity (counts)',
#                                    author="Cheng Liu",
#                                    description="Kbeta of Mn, ALBA",
#                                    history="creation")
#         data_A_scp.x = Coord(data_A_txt[:, 0], title="Energy (eV)")
#         data_A_scp.y = Coord([0], title="Spectrum Number")
#         # display(data_A_scp)
#         # _ = data_A_scp.plot(title=file_name)

#         # 基线校准
#         blc = scp.Baseline(
#                             log_level="INFO",
#                             model="polynomial",  # use a polynomial model
#                             order='linear'  # with linear method
#                             )
#         # blc = scp.Baseline(
#         #     log_level="INFO",
#         #     model="polynomial",  # use a polynomial model
#         #     order='linear',  # with linear method
#         #     ranges=([6462., 6463.], [6510., 6511.]),
#         #     )

#         _ = blc.fit(data_A_scp)   # fit the baseline
#         baseline_scp = blc.baseline
#         corrected_scp = blc.corrected  # get the corrected dataset

#         # scp.plot_multiple(
#         #     method="scatter",
#         #     ms=5,
#         #     datasets=[baseline_scp, corrected_scp, data_scp],
#         #     labels=["baseline", "corrected_data", 'data'],
#         #     legend="best",
#         # )

#         # 计算面积
#         inttrapz_A = corrected_scp.trapezoid(dim="x")
#         # intsimps_A = corrected_scp.simpson(dim="x")

#         # scp.plot_multiple(
#         #     method="scatter",
#         #     ms=5,
#         #     datasets=[inttrapz_A,  intsimps_A],
#         #     labels=["trapzoidal rule", "simpson' rule"],
#         #     legend="best",
#         # )
#         # display(inttrapz_A)

#         # 归一化，并检查归一化后的数据面积是否为一
#         data_norm = corrected_scp/inttrapz_A
#         # data_norm = corrected_scp/intsimps_A
#         # area_check = data_norm.simpson(dim="x")
#         # display(area_check)
#         # display(data_norm)

#         # 合并单一测试数据 且输出
#         energy_pd = pd.Series(data=data_A_txt[:, 0], name='energy')
#         data_norm_pd = pd.Series(data=data_norm.data.squeeze(), name='normalized_intensity')

#         # (pd.concat([energy_pd, data_norm_pd], axis=1, ignore_index=False)
#         #  .to_csv(os.path.join(scan_out_folder, f"{file_name}.dat"),
#         #          header=True, index=None, sep=' '))

#         # 合并同一样品的多组测试数据（已经归一化），名字为对应的 _norm_all.txt
#         AA = pd.concat([AA, data_norm_pd], axis=1, ignore_index=False)
#     data_norm_sum = pd.concat([energy_pd, AA.iloc[:, 1:]], axis=1, ignore_index=False)
#     data_norm_sum.to_csv(os.path.join(folder_out_path, 'sample', f"{foldername}_norm_all.dat"), header=True, index=None, sep=',')
#     # display(data_norm_sum)

#     # 将每一个样本数据平均化，然后再归一化
#     data_sum = scp.NDDataset(data=AA.iloc[:, 1:].to_numpy(), title='Intensity (counts)',
#                              name='Normalized Data sum')
#     data_sum.y = Coord(energy_pd.to_numpy(), title="Energy (eV)")
#     data_sum.x = Coord(np.arange(AA.shape[1]-1), title="Spectrum Number")
#     data_sum.swapdims('y', 'x', inplace=True)
#     # display(data_sum)
#     # _ =  data_sum.plot(title=f"{foldername}_data_sum")
    
#     # 单一样品 平均化
#     data_average_A = scp.mean(data_sum, dim='x', keepdims=True)
#     # display(data_average_A)
#     # _ = data_average_A.plot()

#     # 计算面积
#     inttrapz_A2 = data_average_A.trapezoid(dim="y")
#     # intsimps_A2 = scp.simpson(data_average_A, dim="y")

#     # scp.plot_multiple(
#     #     method="scatter",
#     #     ms=5,
#     #     datasets=[inttrapz_A2, intsimps_A2],
#     #     labels=["trapzoidal rule", "simpson' rule"],
#     #     legend="best",
#     # )

#     # 归一化，并检查归一化后的数据面积是否为一
#     data_average_norm = data_average_A/inttrapz_A2
#     # data_average_norm = data_average_A/intsimps_A2
#     # area_check2 = data_average_norm.simpson(dim="y")
#     # display(area_check2)
#     # _ = data_average_norm.plot()

#     # 每个样品单一测试的 峰位置，以及 标准偏差
#     positions = [s.find_peaks(distance=6)[0].y.values for s in data_sum[:, 6490.0: 6496.0]]
#     energy_mean = scp.NDDataset(data=positions).mean()
#     energy_std = scp.NDDataset(data=positions).std()
#     energy = pd.DataFrame({'sample': f'{foldername}', "energy_max": [energy_mean], 'std': [energy_std]})
#     energy_max = pd.concat([energy_max, energy], axis=0, ignore_index=False)

#     # 画出单一样品的峰位置
#     # peakslist = [s.find_peaks(distance=6)[0] for s in data_sum[:, 6490.0: 6496.0]]
#     # ax = data_sum[:, 6490.0: 6496.0].plot()
#     # for peaks in peakslist:
#     #   peaks.plot_scatter(
#     #       ax=ax,
#     #       marker="v",
#     #       ms=3,
#     #       color="red",
#     #       clear=False,
#     #       data_only=True,
#     #       ylim=(-0.01, 0.20),
#     #   )

#     # 对比一下与上述找到峰的 mean 的区别, 结论是有区别
#     positions_A = [s.find_peaks(distance=6)[0].y.values for s in data_average_norm[:, 6490.0: 6496.0]]
#     peak = pd.DataFrame({'sample': f'{foldername}', "energy_average": positions_A})
#     energy_max_A = pd.concat([energy_max_A, peak], axis=0, ignore_index=False)

#     # scp.plot_multiple(
#     #     method="scatter",
#     #     ms=5,
#     #     datasets=[[positions_A], energy_mean],
#     #     labels=["average_peak", "data_mean_peak"],
#     #     legend="best",
#     # )

#     # PCA 计算
#     pca = scp.PCA()
#     pca.fit(data_sum.T)
#     # pca.printev()
#     # _ = pca.screeplot()
#     scores = pca.transform()
#     # _ = pca.loadings.plot()
#     # _ = pca.scores.T.plot()

#     # Evolving Factor Analysis (EFA) 计算
#     efa = scp.EFA()
#     efa.fit(data_sum.T)
#     efa.n_components = 2
#     C0 = efa.transform()
#     # _ = C0.T.plot()
#     mcr = scp.MCRALS(max_iter=100, normSpec="euclid", tol=0.0001, maxdiv=200)
#     mcr.fit(data_sum.T, C0)

#     # 输出 mcr als 成分的图像
#     ax = mcr.C.T.plot()
#     # _ = mcr.St.plot()
#     plt.legend(loc='upper left', frameon=False, labelcolor='linecolor', labels=['component1', 'component2'])
#     plt.savefig(os.path.join(folder_out_path, 'sample', f"{foldername}_mcr_n=2.tif"), dpi=600)
#     plt.close()

#     # MCR_ALS 结果归一化
#     # 计算面积
#     inttrapz_A3 = mcr.C.T[1, :].trapezoid(dim="y")
#     # intsimps_A3 = mcr.C.T[1, :].simpson(dim='y')

#     # scp.plot_multiple(
#     #     method="scatter",
#     #     ms=5,
#     #     datasets=[inttrapz_A3, intsimps_A3],
#     #     labels=["trapzoidal rule", "simpson' rule"],
#     #     legend="best",
#     # )

#     # 检查积分面积是否为归一化结果
#     mcr_norm = mcr.C.T[1, :]/inttrapz_A3
#     # mcr_norm = mcr.C.T[1, :]/intsimps_A3
#     # area_check3 = mcr_norm.simpson(dim = 'y')
#     # display(area_check3)
#     # _ = mcr_norm.plot()

#     # 比较归一化后的 平均化 和 MCR_ALS_norm 的结果
#     ax = mcr_norm.plot(label='mcr_norm', color='blue', linewidth=1.5)
#     ax.set_xlabel('Energy (eV)')
#     ax.set_ylabel('Intensity(a.u.)')
#     ax.set_title(f"Kbeta, {foldername}")
#     _ = data_average_norm.plot(ax=ax, clear=False, data_only=True, label='average_norm', color='red', linewidth=1.5)
#     plt.legend(loc='upper left', frameon=False, labelcolor='linecolor')
#     plt.savefig(os.path.join(folder_out_path, 'sample', f"{foldername}_average+mcr_norm.tif"), dpi=600)
#     plt.close()
    
#     # 将 mcr 的结果找到它的 peak
#     positions_B = [s.find_peaks(distance=6)[0].y.values for s in  mcr_norm[:, 6490.0: 6496.0]]
#     peak = pd.DataFrame({'sample': f'{foldername}', "energy_mcr": positions_B})
#     energy_max_B = pd.concat([energy_max_B, peak], axis=0, ignore_index=False)

#     # 输出结果

#     # 输出每个样本平均化，归一化后的谱线数据， 以及 MCR-ALS 后 归一化的数据
#     (pd.DataFrame({"enegry": energy_pd.to_numpy(), "average_norm": data_average_norm.data.squeeze(), "mcr_norm": mcr_norm.data.squeeze()})
#      .to_csv(os.path.join(folder_out_path, 'sample',f"{foldername}_average+mcr_norm.dat"), header=True, index=None, sep = ' '))

#     # 输出 MCR-ALS 成分的谱线数据
#     (pd.concat([energy_pd, pd.DataFrame(mcr.C.data)], axis=1, ignore_index=False)
#      .to_csv(os.path.join(folder_out_path, 'sample', f"{foldername}_mcr_n=2.dat"), header=['energy', 'n1', 'mcr_n2'], index=None, sep = ' '))

# # 输出每个样品的峰位置 以及 标准误差，不基于 平均化的谱线
# energy_max.iloc[:, 1:].to_csv(os.path.join(folder_out_path, "energy_average_std.dat"), header=True, index=None, sep = ' ')
# # 输出每个样品的平均化的峰位置
# energy_max_A.iloc[:, 1:].to_csv(os.path.join(folder_out_path, "energy_average.dat"), header=True, index=None, sep = ' ')
# energy_max_B.iloc[:, 1:].to_csv(os.path.join(folder_out_path, "energy_mcr.dat"), header=True, index=None, sep = ' ')
# print('Done')

In [None]:
# # averaged 的 HL 以及 标准偏差

# average_path_list = glob.glob(os.path.join(folder_out_path, 'sample', r'*_average+mcr_norm.dat'))
# spectrum_list = glob.glob(os.path.join(folder_out_path, 'sample', r'*_norm_all.dat'))
# # display(average_path_list)
# # display(spectrum_list)

# BB = pd.Series(name='empty')
# area_sum = pd.Series(name='empty')

# for average_path in average_path_list:

#     # 读取单一文件
#     file_name = os.path.split(average_path)[-1][:-21]
#     # display(file_name)

#     average_pd = pd.read_csv(average_path, comment='#', header=None, skiprows=1,
#                              names=['energy', file_name, 'std_norm'], sep='\s+')
#     # display(average_pd.head())

#     # 合并
#     BB = pd.concat([BB, average_pd], axis=1, ignore_index=False)
# # display(BB.head())

# average_sum_B = scp.NDDataset(data=BB.iloc[:, 2::3].to_numpy(), title='Intensity (counts)', name='average_sum')
# average_sum_B.y = Coord(BB.iloc[:, 1].to_numpy(), title="Energy (eV)")
# average_sum_B.x = Coord(np.arange(BB.shape[1]/3-1), title="Spectrum Number")
# average_sum_B.swapdims('y', 'x', inplace=True)
# # display(average_sum_B)
# # _ = average_sum_B.plot(title=r'creay!')

# average_MnO2 = average_sum_B[4, :].copy()
# # display(average_MnO2)
# # _ = average_MnO2.plot()

# # 计算平均化谱线的差分
# average_sum_B.data -= average_sum_B.data[4]
# # display(average_sum_B)
# # ax = average_sum_B.plot()
# # plt.legend(loc='upper left', ncols=2, frameon=False, labelcolor='linecolor')
# # plt.savefig(os.path.join(folder_out_path, "average_HL_difference.tif"), dpi=600)
# # # plt.show()
# # plt.close()

# # 计算平均化后的谱线的差分面积
# inttrapz_B = (scp.abs(average_sum_B)).trapezoid(dim="y")
# intsimps_B = (scp.abs(average_sum_B)).simpson(dim='y')

# # ax = scp.plot_multiple(
# #     method="scatter",
# #     ms=5,
# #     datasets=[inttrapz_B, intsimps_B],
# #     labels=["trapzoidal rule", "simpson' rule"],
# #     legend="best",
# #     title="averaged spectrum difference area"
# # )
# # plt.savefig(os.path.join(folder_out_path, "average_HL_area.tif"), dpi=600)
# # # plt.show()
# # plt.close()

# for data_list in spectrum_list:

#     data_list_name = os.path.split(data_list)[-1][:-4]
#     data_pd = pd.read_csv(data_list, comment='#', header=0, sep=',')
#     spectrum_sum = scp.NDDataset(data=data_pd.iloc[:, 1:].to_numpy(), title='Intensity (counts)', name='spectrum_sum')
#     spectrum_sum.y = Coord(data_pd.iloc[:, 0].to_numpy(), title="Energy (eV)")
#     spectrum_sum.x = Coord(np.arange(data_pd.shape[1]-1), title="Spectrum Number")
#     spectrum_sum.swapdims('y', 'x', inplace=True)
#     # display(spectrum_sum)

#     # 每个样品的每个测试结果 减去 LS MnO2 的平均值的谱线，得到每条谱线的差分谱线
#     spectrum_sum.data = spectrum_sum.data - average_MnO2.data
#     inttrapz_B2 = (scp.abs(spectrum_sum)).trapezoid(dim="y")
#     intsimps_B2 = (scp.abs(spectrum_sum)).simpson(dim='y')

#     ax = scp.plot_multiple(
#         method="scatter",
#         ms=5,
#         datasets=[inttrapz_B2, intsimps_B2],
#         labels=["trapzoidal rule", "simpson' rule"],
#         legend="best",
#         title=f"{data_list_name}_all_difference_area"
#     )
#     plt.savefig(os.path.join(folder_out_path, 'sample', f"{data_list_name}_all_HL_area.tif"), dpi=600)
#     plt.close()

#     # 归一化，计算面积
#     area_average = inttrapz_B2.mean()
#     area_std = inttrapz_B2.std()
#     # area_average = intsimps_B2.mean()
#     # area_std = intsimps_B2.std()
#     area = pd.DataFrame({'sample': f'{data_list_name}', "area_average": [area_average], 'std': [area_std]})
#     area_sum = pd.concat([area_sum, area], axis=0, ignore_index=False)

# # 输出结果

# # 所有的样品平均化后的谱线，以备画图
# BB.iloc[:, 1:].to_csv(os.path.join(folder_out_path, "spectrum_averaged_all.dat"), header=True, index=None, sep = ' ')

# # 所有的平均化后的谱线的差分面积
# (pd.DataFrame({"area_HL": intsimps_B.data.squeeze()})
#  .to_csv(os.path.join(folder_out_path, "area_average_HL.dat"), header=True, index=None, sep=' '))
# # (pd.DataFrame({"area_HL": inttrapz_B.data.squeeze()})
# #  .to_csv(os.path.join(folder_out_path, "area_average_HL.dat"), header=True, index=None, sep=' '))

# # 所有样品的差分面积，包括标准偏差
# (area_sum.iloc[:, 1:].to_csv(os.path.join(folder_out_path, "area_average_std_HL.dat"), header=True, index=None, sep=' '))

# print('Done')

In [None]:
# # mcr 的 HL 以及 标准偏差

# average_path_list = glob.glob(os.path.join(folder_out_path, 'sample', r'*_average+mcr_norm.dat'))
# spectrum_list = glob.glob(os.path.join(folder_out_path, 'Sample', r'*_norm_all.dat'))
# # display(average_path_list)
# # display(spectrum_list)

# BB = pd.Series(name='empty')
# area_sum = pd.Series(name='empty')

# for average_path in average_path_list:

#     # 读取单一文件
#     file_name = os.path.split(average_path)[-1][:-21]
#     # display(file_name)

#     average_pd = pd.read_csv(average_path, comment='#', header=None, skiprows=1,
#                              names=['energy', file_name, 'std_norm'], sep='\s+')
#     # display(average_pd.head())

#     # 合并
#     BB = pd.concat([BB, average_pd], axis=1, ignore_index=False)
# # display(BB.head())

# average_sum_B = scp.NDDataset(data=BB.iloc[:, 3::3].to_numpy(), title='Intensity (counts)', name='mcr_sum')
# average_sum_B.y = Coord(BB.iloc[:, 1].to_numpy(), title="Energy (eV)")
# average_sum_B.x = Coord(np.arange(BB.shape[1]/3-1), title="Spectrum Number")
# average_sum_B.swapdims('y', 'x', inplace=True)
# # display(average_sum_B)
# # _ = average_sum_B.plot(title=r'creay!')

# average_MnO2 = average_sum_B[4, :].copy()
# # display(average_MnO2)
# # _ = average_MnO2.plot()

# # 计算平均化谱线的差分
# average_sum_B.data -= average_sum_B.data[4]
# # display(average_sum_B)
# # ax = average_sum_B.plot()
# # plt.legend(loc='upper left', ncols=2, frameon=False, labelcolor='linecolor')
# # plt.savefig(os.path.join(folder_out_path, "average_HL_difference.tif"), dpi=600)
# # # plt.show()
# # plt.close()

# # 计算平均化后的谱线的差分面积
# inttrapz_B = (scp.abs(average_sum_B)).trapezoid(dim="y")
# intsimps_B = (scp.abs(average_sum_B)).simpson(dim='y')

# # ax = scp.plot_multiple(
# #     method="scatter",
# #     ms=5,
# #     datasets=[inttrapz_B, intsimps_B],
# #     labels=["trapzoidal rule", "simpson' rule"],
# #     legend="best",
# #     title="averaged spectrum difference area"
# # )
# # plt.savefig(os.path.join(folder_out_path, "average_HL_area.tif"), dpi=600)
# # # plt.show()
# # plt.close()

# for data_list in spectrum_list:

#     data_list_name = os.path.split(data_list)[-1][:-4]
#     data_pd = pd.read_csv(data_list, comment='#', header=0, sep=',')
#     spectrum_sum = scp.NDDataset(data=data_pd.iloc[:, 1:].to_numpy(), title='Intensity (counts)', name='spectrum_sum')
#     spectrum_sum.y = Coord(data_pd.iloc[:, 0].to_numpy(), title="Energy (eV)")
#     spectrum_sum.x = Coord(np.arange(data_pd.shape[1]-1), title="Spectrum Number")
#     spectrum_sum.swapdims('y', 'x', inplace=True)
#     # display(spectrum_sum)

#     # 每个样品的每个测试结果 减去 LS MnO2 的平均值的谱线，得到每条谱线的差分谱线
#     spectrum_sum.data = spectrum_sum.data - average_MnO2.data
#     inttrapz_B2 = (scp.abs(spectrum_sum)).trapezoid(dim="y")
#     intsimps_B2 = (scp.abs(spectrum_sum)).simpson(dim='y')

#     ax = scp.plot_multiple(
#         method="scatter",
#         ms=5,
#         datasets=[inttrapz_B2, intsimps_B2],
#         labels=["trapzoidal rule", "simpson' rule"],
#         legend="best",
#         title=f"{data_list_name}_all_mcr_difference_area"
#     )
#     plt.savefig(os.path.join(folder_out_path, 'sample', f"{data_list_name}_all_mcr_HL_area.tif"), dpi=600)
#     plt.close()

#     # 归一化，计算面积
#     area_average = inttrapz_B2.mean()
#     area_std = inttrapz_B2.std()
#     # area_average = intsimps_B2.mean()
#     # area_std = intsimps_B2.std()
#     area = pd.DataFrame({'sample': f'{data_list_name}', "area_average": [area_average], 'std': [area_std]})
#     area_sum = pd.concat([area_sum, area], axis=0, ignore_index=False)

# # 输出结果

# # 所有的样品平均化后的谱线，以备画图
# BB.iloc[:, 1:].to_csv(os.path.join(folder_out_path, "spectrum_mcr_all.dat"), header=True, index=None, sep = ' ')

# # 所有的平均化后的谱线的差分面积
# (pd.DataFrame({"area_HL": intsimps_B.data.squeeze()})
#  .to_csv(os.path.join(folder_out_path, "area_mcr_HL.dat"), header=True, index=None, sep=' '))
# # (pd.DataFrame({"area_HL": inttrapz_B.data.squeeze()})
# #  .to_csv(os.path.join(folder_out_path, "area_average_HL.dat"), header=True, index=None, sep=' '))

# # 所有样品的差分面积，包括标准偏差
# (area_sum.iloc[:, 1:].to_csv(os.path.join(folder_out_path, "area_mcr_std_HL.dat"), header=True, index=None, sep=' '))

# print('Done')