# XRD: MSPD 2025

In [None]:
%matplotlib inline
import re
import sys
from pathlib import Path

import hyperspy.api as hs
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import openpyxl
import pandas as pd
import xarray as xr
from matplotlib import gridspec, patches, ticker
from matplotlib.colorbar import Colorbar
from matplotlib.colors import LinearSegmentedColormap, ListedColormap, to_hex, to_rgba
from matplotlib.lines import Line2D
from matplotlib.transforms import Bbox
from mpl_toolkits.mplot3d import Axes3D
from scipy import ndimage


In [None]:
# Ensure custom module Path is set before import
sys.path.append(r"D:\CHENG\OneDrive - UAB\ICMAB-Python\Figure")
from colors import tol_cmap, tol_cset  # type: ignore

# 画图的初始设置
plt.style.use(r"D:\CHENG\OneDrive - UAB\ICMAB-Python\Figure\liuchzzyy.mplstyle")
# print(plt.style.available)

# xarray setting
xr.set_options(
    cmap_sequential="viridis",
    cmap_divergent="viridis",
    display_width=150,
)  # viridis, gray

# 颜色设定
colors = tol_cset("vibrant")
if colors is not None:
    colors = list(colors)
    colors_opt = ["#b0a3d1", "#8bd0d5", "#a8e0ee", "#c5e1a3", "#ffe48b", "#f5a37d", "#e88db1"]
    colors_opt2 = list(tol_cset("bright"))
else:
    # Fallback colors in case tol_cset returns None
    colors = ["#0077BB", "#33BBEE", "#009988", "#EE7733", "#CC3311", "#EE3377", "#BBBBBB"]
if r"sunset" not in plt.colormaps():
    cmap = tol_cmap("sunset")
    if isinstance(cmap, LinearSegmentedColormap):
        plt.colormaps.register(cmap)
if r"rainbow_PuRd" not in plt.colormaps():
    cmap = tol_cmap("rainbow_PuRd")
    if isinstance(cmap, LinearSegmentedColormap):
        plt.colormaps.register(cmap)  # 备用 plasma

# 输出的文件夹
path_out = Path(r"C:\Users\chengliu\Desktop\Figure")

# Set math font
mpl.rcParams["mathtext.fontset"] = "custom"
mpl.rcParams["mathtext.rm"] = "Arial"
mpl.rcParams["mathtext.it"] = "Arial:italic"
mpl.rcParams["mathtext.bf"] = "Arial:bold"
mpl.rcParams["mathtext.sf"] = "Arial"
mpl.rcParams["mathtext.tt"] = "Arial"
mpl.rcParams["mathtext.cal"] = "Arial"
mpl.rcParams["mathtext.default"] = "regular"

## Dino 图， Cell D

In [None]:
# 读取电化学数据
path_filelist = list(Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\EMD\2025-MSPD\Results\IS17_D\Echem\B").glob(r"**\*.txt"))
echem_all = []
for path_file in path_filelist:
    with open(path_file, "r", encoding="latin_1") as file:
        for line in file:
            if line.startswith("Nb header lines"):
                line_skip = int(line.split(":")[1].strip())
                break
    df = pd.read_csv(path_file, sep="\t", comment="#", skiprows=line_skip - 1, encoding="latin_1", index_col=None, decimal=".").dropna(axis=1, how="all")
    df[["Ewe/V", "Ece/V", "<I>/mA", "Capacity/mA.h"]] = df[["Ewe/V", "Ece/V", "<I>/mA", "Capacity/mA.h"]].apply(pd.to_numeric, errors="coerce")
    df["time/s"] = df["time/s"].apply(pd.to_datetime, format="mixed", errors="coerce")
    df["cycle number"] = df["cycle number"].astype(float).astype(np.int16)
    df["Voltage/V"] = df["Ewe/V"] - df["Ece/V"]
    echem_all.append(df)
# 合并所有电化学数据为一个二维表格
echem_all = pd.concat(echem_all, axis=0, ignore_index=True).sort_values(by="time/s").reset_index(drop=True)


# 谱线上的时间
# 读取文件中时间戳
filelist = list(Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\EMD\2025-MSPD\Results\IS17_D\Data\IS17_D").glob(r"*.xye"))
range_index, wave_length, time_processed = [], [], []
for path_file in filelist:
    with open(path_file, "r") as file:
        lines = file.readlines()
        file_id = path_file.stem.split("_")[-1].split(".")[0][-5:]
        range_index.append(file_id)
        for line in lines:
            if line.startswith("# Wave"):
                wave_value = float(line.split()[3])
                wave_length.append(wave_value)
            elif line.startswith("# Date"):
                recored_time = str(line.split()[3])
                time_processed.append(recored_time)
spectrum_time_all = pd.DataFrame({
    "Range_Index": range_index,
    "time/s": time_processed,
    "Wave_Length": wave_length,
})
spectrum_time_all["time/s"] = pd.to_datetime(spectrum_time_all["time/s"], format=r"%Y-%m-%d_%H:%M:%S")
spectrum_time_all["Range_Index"] = pd.to_numeric(spectrum_time_all["Range_Index"])
spectrum_time_all["Wave_Length"] = pd.to_numeric(spectrum_time_all["Wave_Length"])

# 读取 XRD 的数据
path_xrd = Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\EMD\2025-MSPD\Results\IS17_D\Data")

spectrum_all = pd.read_csv(path_xrd.joinpath(r"spectrum_all_d_spacing.csv"), index_col=0, header=0)
spectrum_all.index = pd.to_numeric(spectrum_all.index)
spectrum_all.columns = pd.to_numeric(spectrum_all.columns)

pdf= pd.read_csv(path_xrd.joinpath(r"PDF Card - 00-030-0820.csv"), index_col=None, header=0, comment="#")

In [None]:
# 只保留第一圈的充放电与第二圈的充电数据
selected_echem = echem_all[echem_all["cycle number"].isin([0, 1, 2])]
selected_echem = selected_echem[selected_echem["Voltage/V"] >= 0.8]

# selected_echem = selected_echem.iloc[:-5, :].copy()
selected_echem = selected_echem.copy()
selected_echem["charge_time"] = (selected_echem["time/s"] - selected_echem["time/s"].iloc[0]).dt.total_seconds() / 3600

# 匹配谱线和电化学上的时间
selected_spectrum_time = (
    pd.merge_asof(
        selected_echem.sort_values(by="time/s"),
        spectrum_time_all.sort_values(by="time/s"),
        on="time/s",
        direction="nearest",  # 找最近的时间点
        tolerance=pd.Timedelta("5s"),  # 可设定允许的最大偏差
    )
    .dropna(subset=["Range_Index"], inplace=False)
    .drop_duplicates(subset=["Range_Index"], keep="first", inplace=False)
    .reset_index(drop=False, inplace=False)
)

# 选择谱线的区间
d_spacing_range = (0.5, 15.0)
spectrum_all = spectrum_all.loc[(spectrum_all.index >= d_spacing_range[0]) & (spectrum_all.index <= d_spacing_range[1])]

# 修复数据类型匹配问题：直接使用整数列表来匹配 spectrum_all 的列名
range_index_list = selected_spectrum_time["Range_Index"].tolist()
selected_spectrum = spectrum_all.loc[:, spectrum_all.columns.isin(range_index_list)]

In [None]:
%matplotlib inline
plt.close("all")

# 画图
# gridspec inside gridspec
fig = plt.figure(figsize=(3.3, 2.5))
gs = gridspec.GridSpec(1, 1, width_ratios=None, height_ratios=None, wspace=0, hspace=0, figure=fig)

# 图 B
subfig = fig.add_subfigure(gs[0, 0])
ax = subfig.add_axes((0, 0, 1, 1))
ax.set_box_aspect(0.8)

index_labels = [1, 46, 75]
labels=[r'OCV', r'Full Charge', r'Full Discharge']
for i in range(len(index_labels)):
    idx = index_labels[i]
    if idx in selected_spectrum_time["Range_Index"].values:
        value = selected_spectrum_time.loc[selected_spectrum_time["Range_Index"] == idx, ["Range_Index"]]
        ax.plot(
            selected_spectrum.index,
            selected_spectrum.loc[:, value["Range_Index"].values[0]]+idx*200,
            color=colors[i],
            lw=1.0,
            alpha=0.8,
            label=labels[i],
        )


# PDF数据的叠加
for i in range(pdf.shape[0]):
    temp = pdf.iloc[i, :]
    ax.vlines(x=temp['d_spacing'], ymin=6000, ymax=6000+temp['intensity_counts']*50, lw=1, color=colors[3], label=r'PDF#00-030-0820' if i == 0 else None, zorder=0)

# 使用字典来存储不同材料的XRD数据，避免长度不一致的问题
xrd_data = {
    r"MnO2_Exp.": {
        "positions": [2.15, 6.93, 4.89, 3.10, 2.39],
        "intensities": [50, 50, 50, 50, 50]
    },
    r"HRTEM_Exp.": {
        "positions": [2.40, 2.42, 4.75, 2.38, 2.04, 1.92, 1.84, 1.91, 2.13, 4.45, 5.24, 2.73, 2.78],
        "intensities": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
    }
}

XRD_y = [0.08, 0.15]  # 对应三种材料的y位置
XRDcolors = [colors[5], colors[6]]

# 绘制XRD峰位置
for i, (material, data) in enumerate(xrd_data.items()):
    positions = data["positions"]
    intensities = data["intensities"]

    # 确保位置和强度数组长度一致
    min_length = min(len(positions), len(intensities))
    positions = positions[:min_length]
    intensities = intensities[:min_length]

    # 归一化强度到合适的范围
    normalized_intensities = [intensity / 1000.0 for intensity in intensities]

    for j in range(len(positions)):
        ax.axvline(
            x=positions[j],
            ymin=XRD_y[i],
            ymax=XRD_y[i] + normalized_intensities[j],
            lw=1,
            c=XRDcolors[i],
            label=material if j == 0 else None,
        )

# 设置刻度线
ax.set_xlim(1.0, 8.0)
ax.set_xlabel(r"$\mathrm{d \ Spacing \ (\AA)}$", fontsize=11,)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=2, offset=0))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=1, offset=0))

ax.set_yticks([])
ax.set_ylim(6000, 60000)
ax.set_ylabel(r"$\mathrm{Intensity}$", fontsize=11,)

ax.tick_params(axis="x", which="both", direction="out", labelsize=9, top=False)

ax.legend(fontsize=9, loc="upper right", frameon=False, bbox_to_anchor=(1.5, 1.05), ncol=1)

# 保存图片
plt.savefig(
    Path.joinpath(path_out, r"opXRD_MSPD_04_dpi300.tif"),
    pad_inches=0.05,
    bbox_inches="tight",
    dpi=300,
    transparent=False,
    pil_kwargs={"compression": "tiff_lzw"},
)

plt.gcf().set_facecolor("white")
plt.show()


In [None]:
%matplotlib inline
plt.close("all")

# 画图
# gridspec inside gridspec
fig = plt.figure(figsize=(7.0, 2.5))
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 4], height_ratios=None, wspace=0, hspace=0, figure=fig)

# 图 A
subfig = fig.add_subfigure(gs[0, 0])
ax = subfig.add_axes((0, 0, 1, 1))
ax.set_box_aspect(3.0)

ax.plot(selected_echem["Voltage/V"], selected_echem["charge_time"], ls="-", lw=1.0, c=colors[0], label=r"Voltage")

# 添加索引文本标注
index_labels = [15, 25, 37, 46, 50, 61, 70, 75, 87, 99, 109, 115, 126, 137]

for i, idx in enumerate(selected_spectrum_time["Range_Index"].tolist()):
    if idx in index_labels:
        value = selected_spectrum_time.loc[selected_spectrum_time["Range_Index"] == idx, ["Voltage/V", "charge_time"]]
        ax.scatter(
            value["Voltage/V"],
            value["charge_time"],
            c=colors[0],
            edgecolors="face",
            s=50,
            zorder=5,
        )
    else:
        value = selected_spectrum_time.loc[selected_spectrum_time.index == i, ["Voltage/V", "charge_time"]]
        ax.scatter(
            value["Voltage/V"],
            value["charge_time"],
            c="lightgrey",
            edgecolors="face",
            s=30,
            zorder=1,
        )
ax.set_xlabel(
    r"Voltage (V vs. Zn/Zn$\mathrm{^{2\!+}\!)}$",
    fontsize=11,
)
ax.set_xlim(0.8, 2.0)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=0.4, offset=0))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=0.2, offset=0))

ax.set_ylabel(r"Duration Time (hour)", fontsize=11)
ax.set_ylim(-0.5, 36)
ax.yaxis.set_major_locator(ticker.MultipleLocator(base=6, offset=0))
ax.yaxis.set_minor_locator(ticker.MultipleLocator(base=3, offset=0))


ax2 = ax.twiny()
ax2.set_position((0, 0, 1, 1))
ax2.set_box_aspect(3.0)

ax2.plot(selected_echem["<I>/mA"] * 1000, selected_echem["charge_time"], ls="--", lw=1.0, c=colors[3], label=r"Current")

ax2.set_xlabel(
    r"$\mathrm{Current  \ (\mu A)}$",
    fontsize=11,
)
ax2.set_xlim(-60, 60)
ax2.xaxis.set_major_locator(ticker.MultipleLocator(base=60, offset=0))
ax2.xaxis.set_minor_locator(ticker.MultipleLocator(base=30, offset=0))
ax2.tick_params(axis="both", which="both", labelsize=9, right=True, labelright=True)
ax.text(-0.6, 1.0, r"A", transform=ax.transAxes, fontsize=13, va="center", ha="right", fontfamily="Arial", fontweight="bold")

# 图 B
subfig = fig.add_subfigure(gs[0, 1])
ax = subfig.add_axes((-0.25, 0, 1.0, 1.0))
ax.set_box_aspect(1.0)

index_labels = [15, 25, 37, 46, 50, 61, 70, 75, 87, 99, 109, 115, 126, 137]
index_labels_b = [15, 46, 75, 109, 137]
for i in range(1, len(index_labels_b)):
    intervals = index_labels_b[i] - index_labels_b[i - 1]
    color_use = ListedColormap(
    mpl.colormaps["Spectral"](np.linspace(0.15, 0.05, intervals)),
) if i%2==0 else ListedColormap(
    mpl.colormaps["Spectral"](np.linspace(0.05, 0.15, intervals)),
)
    for j in range(intervals):
        idx = index_labels_b[i - 1] + j
        if idx in selected_spectrum_time["Range_Index"].values:
            value = selected_spectrum_time.loc[selected_spectrum_time["Range_Index"] == idx, ["Range_Index"]]
            ax.plot(
                selected_spectrum.index,
                selected_spectrum.loc[:, value["Range_Index"].values[0]]+idx*000,
                color=color_use(j / (intervals - 1)),
                lw=1.0,
                alpha=0.8,
            )

ax.set_yticks([])
ax.set_xlim(0.5, 8.0)
ax.set_xlabel(
    r"$\mathrm{d \ Spacing \ (\AA)}$",
    fontsize=11,
)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=2, offset=0))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=1, offset=0))
ax.tick_params(axis="x", which="both", direction="out", labelsize=9, top=False)

# 使用字典来存储不同材料的XRD数据，避免长度不一致的问题
xrd_data = {
    "MnO2": {
        "positions": [2.15, 6.93, 4.89, 3.10, 2.39],
        "intensities": [50, 50, 50, 50, 50]
    },
    "EMD": {
        "positions": [8.01, 5.98, 4.62, 2.48, 1.43],
        "intensities": [50, 50, 50, 50, 50]
    },
    "HRTEM": {
        "positions": [2.40, 2.42, 4.75, 2.38, 2.04, 1.92, 1.84, 1.91, 2.13, 4.45, 5.24, 2.73, 2.78],
        "intensities": [50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50]
    }
}

XRD_y = [0, 0.03, 0.03]  # 对应三种材料的y位置
XRDcolors = ["blue", "k", "orange"]

# 绘制XRD峰位置
for i, (material, data) in enumerate(xrd_data.items()):
    positions = data["positions"]
    intensities = data["intensities"]

    # 确保位置和强度数组长度一致
    min_length = min(len(positions), len(intensities))
    positions = positions[:min_length]
    intensities = intensities[:min_length]

    # 归一化强度到合适的范围
    normalized_intensities = [intensity / 1000.0 for intensity in intensities]

    for j in range(len(positions)):
        ax.axvline(
            x=positions[j],
            ymin=XRD_y[i],
            ymax=XRD_y[i] + normalized_intensities[j], 
            lw=1,
            c=XRDcolors[i],
            label=material if j == 0 else None,
        )
ax.legend(fontsize=9, loc="upper right", frameon=False, bbox_to_anchor=(1.0, 1.15))
ax.text(-0.05, 1.0, r"B", transform=ax.transAxes, fontsize=13, va="center", ha="right", fontfamily="Arial", fontweight="bold")

# 保存图片
plt.savefig(
    Path.joinpath(path_out, r"Chapter_IV_Figure_XRD_05_V2_300.tif"),
    pad_inches=0.05,
    bbox_inches="tight",
    dpi=300,
    transparent=False,
    pil_kwargs={"compression": "tiff_lzw"},
)

plt.gcf().set_facecolor("white")
plt.show()

## Operando XRD, MSPD 数据的清洗

In [None]:
# XRD 角度转换为 d spacing 的函数
def angle_to_d_spacing(two_theta_degrees, wavelength_angstrom=1.5406):
    """
    将 XRD 的 2θ 角度转换为 d spacing

    Parameters:
    -----------
    two_theta_degrees : array-like
        2θ 角度值（度）
    wavelength_angstrom : float, default=1.5406
        X射线波长（埃），默认为Cu Kα辐射波长

    Returns:
    --------
    d_spacing : array-like
        d spacing 值（埃）

    Note:
    -----
    使用 Bragg 定律: d = λ / (2 * sin(θ))
    其中 θ = 2θ/2 = two_theta/2
    """  # noqa: RUF002

    # 将角度转换为弧度
    theta_radians = np.radians(two_theta_degrees / 2.0)

    # 避免除零错误
    sin_theta = np.sin(theta_radians)
    sin_theta = np.where(sin_theta == 0, np.nan, sin_theta)

    # 计算 d spacing
    d_spacing = wavelength_angstrom / (2.0 * sin_theta)

    return d_spacing


def d_spacing_to_angle(d_spacing_angstrom, wavelength_angstrom=1.5406):
    """
    将 d spacing 转换为 XRD 的 2θ 角度

    Parameters:
    -----------
    d_spacing_angstrom : array-like
        d spacing 值（埃）
    wavelength_angstrom : float, default=1.5406
        X射线波长（埃），默认为Cu Kα辐射波长

    Returns:
    --------
    two_theta_degrees : array-like
        2θ 角度值（度）
    """  # noqa: RUF002

    # 避免不合理的 d spacing 值
    d_spacing_angstrom = np.where(d_spacing_angstrom <= 0, np.nan, d_spacing_angstrom)

    # 计算 sin(θ)
    sin_theta = wavelength_angstrom / (2.0 * d_spacing_angstrom)

    # 检查 sin 值是否在合理范围内 [-1, 1]
    sin_theta = np.where(np.abs(sin_theta) > 1, np.nan, sin_theta)

    # 计算角度
    theta_radians = np.arcsin(sin_theta)
    two_theta_degrees = 2.0 * np.degrees(theta_radians)

    return two_theta_degrees

In [None]:
# 处理 XRD 的 xye 文件 - 改进版：同时生成角度和 d spacing 数据
# 1) 读取单个文件，提取波长与数据
def extract_wave_and_data(file_path: Path):
    """
    增强版数据提取函数，同时计算 d spacing
    """
    wave_value = np.nan
    recorded_time = None  # 用 str 保存也可以，下面再转 datetime
    data_rows = []

    with open(file_path, "r", encoding="utf-8", errors="ignore") as f:
        for raw in f:
            line = raw.strip()
            if not line:
                continue

            # 头部信息
            if line.startswith("#"):
                # Wave: 允许 "1.5406", "1.5406A", "1.5406 Å" 等
                if "wave" in line.lower():
                    m = re.search(r"([-+]?\d*\.?\d+)", line)
                    if m:
                        wave_value = float(m.group(1))

                # Date: 只提取时间戳，去掉其他元数据
                elif "date" in line.lower():
                    # 提取时间戳部分：Date = 2025-09-23_17:59:27
                    date_match = re.search(r'Date\s*=\s*(\S+)', line)
                    recorded_time = date_match.group(1) if date_match else line.replace("#", "").strip()
                continue

            # 数据行：空白分隔，容错不同列数
            parts = re.split(r"\s+", line)
            nums = []
            for p in parts:
                try:
                    nums.append(float(p))
                except ValueError:
                    nums.append(np.nan)

            # 只要首列 2THETA 能转成数就收
            if len(nums) >= 1 and np.isfinite(nums[0]):
                data_rows.append(nums)

    # 统一成至少三列 [2THETA, Int1, error]，不足的补 NaN
    cleaned = []
    for row in data_rows:
        # 至少有 2THETA 与一个强度
        tth = row[0]
        i1 = row[1] if len(row) > 1 else np.nan
        i2 = row[2] if len(row) > 2 else np.nan  # 有些 xye 第三列是 error；你可按需改成忽略
        cleaned.append([tth, i1, i2])

    df = pd.DataFrame(cleaned, columns=["2THETA", "Intensity1", "error"])

    # 在提取时就计算 d spacing (如果有波长信息的话)
    if not np.isnan(wave_value):
        df["d_spacing"] = angle_to_d_spacing(df["2THETA"], wave_value)
    else:
        df["d_spacing"] = np.nan

    return recorded_time, wave_value, df


# 2) 处理目录下所有文件，生成长表（同时包含角度和 d spacing）
def process_files(directory: Path):
    """
    文件处理函数，生成包含角度和 d spacing 的数据
    """
    rows = []  # 长表
    for file_path in sorted(directory.glob("*.xye")):
        recorded_time, wave_value, df = extract_wave_and_data(file_path)

        # 从文件名末尾提取数字（更健壮，如 ..._f00024 / ..._123 / ..._idx-007）
        m = re.search(r"(\d+)$", file_path.stem)
        file_id = float(m.group(1)) if m else np.nan

        # 清洗 & 叠加到长表
        df = df.dropna(subset=["2THETA"]).copy()
        for _, r in df.iterrows():
            rows.append([
                file_id,
                recorded_time,
                wave_value,
                float(r["2THETA"]),
                float(r["d_spacing"]) if pd.notna(r["d_spacing"]) else np.nan,
                float(r["Intensity1"]) if pd.notna(r["Intensity1"]) else np.nan,
                float(r["error"]) if pd.notna(r["error"]) else np.nan,
            ])

    spectrum_all = pd.DataFrame(
        rows,
        columns=["Index", "recorded_time", "wave_value", "2THETA", "d_spacing", "Intensity1", "error"],
    )
    # 数据类型转换与清洗
    spectrum_all["Index"] = pd.to_numeric(spectrum_all["Index"], errors="coerce")
    spectrum_all["2THETA"] = pd.to_numeric(spectrum_all["2THETA"], errors="coerce")
    spectrum_all["d_spacing"] = pd.to_numeric(spectrum_all["d_spacing"], errors="coerce")
    spectrum_all["Cnt2_D1"] = spectrum_all[["Intensity1", "error"]].sum(axis=1, min_count=1)

    spectrum_all = spectrum_all.dropna(subset=["Index", "d_spacing", "Cnt2_D1"])
    spectrum_all = spectrum_all.sort_values(["Index", "d_spacing"]).reset_index(drop=True)

    # 计算文件数量 - 需要转为列表才能使用 len()
    file_count = len(list(directory.glob('*.xye')))
    print(f"✓ 处理了 {file_count} 个文件")
    print(f"✓ 提取了 {len(spectrum_all)} 个数据点")
    print(f"✓ 角度范围: {spectrum_all['2THETA'].min():.2f}° - {spectrum_all['2THETA'].max():.2f}°")

    if not spectrum_all["d_spacing"].isna().all():
        print(f"✓ d spacing 范围: {spectrum_all['d_spacing'].min():.3f} - {spectrum_all['d_spacing'].max():.3f} Å")
    else:
        print("⚠ 未找到波长信息，d spacing 未计算")

    return spectrum_all

In [None]:
# 读取 XRD 的数据
file_list = Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\EMD\2025-MSPD\Results\IS17_D\Data\IS17_D")
spectrum_all_long = process_files(file_list)

# 创建以角度为索引的数据框
spectrum_all_2theta = spectrum_all_long.groupby(["Index", "2THETA"], as_index=False)["Cnt2_D1"].mean().pivot_table(
    index="2THETA", columns="Index", values="Cnt2_D1"
).sort_index()

# 创建以 d spacing 为索引的数据框
spectrum_all_d_spacing = spectrum_all_long.dropna(subset=["d_spacing"]).groupby(["Index", "d_spacing"], as_index=False)["Cnt2_D1"].mean().pivot_table(
    index="d_spacing", columns="Index", values="Cnt2_D1"
).sort_index(ascending=False)  # d spacing 通常按降序排列

print("\n数据结构:")
print(f"- 原始长表: {spectrum_all_long.shape[0]} 行 × {spectrum_all_long.shape[1]} 列")
print(f"- 角度索引表: {spectrum_all_2theta.shape[0]} 行 × {spectrum_all_2theta.shape[1]} 列")
print(f"- d spacing 索引表: {spectrum_all_d_spacing.shape[0]} 行 × {spectrum_all_d_spacing.shape[1]} 列")

# 可选：保存所有格式的数据
# spectrum_all_long.to_csv(path_out.joinpath("spectrum_all_long_format.csv"), index=False, header=True)
spectrum_all_2theta.to_csv(path_out.joinpath("spectrum_all_2theta.csv"), index=True, header=True)
spectrum_all_d_spacing.to_csv(path_out.joinpath("spectrum_all_d_spacing.csv"), index=True, header=True)

## 峰位置的判断和检测, EMD

In [None]:
path_data = Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\EMD\2025-MSPD\Results\IS17_D\Data")

data = pd.read_csv(path_data.joinpath(r"spectrum_all_d_spacing.csv"), index_col=0)

### EMD

In [None]:
# 导入峰值检测所需的库
import matplotlib.pyplot as plt
from scipy.signal import find_peaks, peak_widths

# 获取数据
x_data = data.index  # d-spacing 值
y_data = data.iloc[:, 0].values  # 强度值

# 峰值检测参数调整（平衡检测灵敏度以包含重要峰值）
# height: 最小峰高
# distance: 峰之间的最小距离（数据点数）
# prominence: 峰的显著性
# width: 峰的最小宽度
peaks, properties = find_peaks(
    y_data,
    height=1000,    # 适中的峰高阈值
    distance=25,    # 适中的峰间距离
    prominence=700,   # 适中的显著性要求
    width=1,        # 保持最小宽度要求
)  # 优化的峰值检测参数（平衡峰值数量和重要性）

# 获取峰值的位置和强度
peak_positions = x_data[peaks]  # d-spacing 位置
peak_intensities = y_data[peaks]  # 强度值

# 按强度排序（从高到低）
sorted_indices = np.argsort(peak_intensities)[::-1]
sorted_positions = peak_positions[sorted_indices]
sorted_intensities = peak_intensities[sorted_indices]

print(f"找到 {len(peaks)} 个峰值:")
print("=" * 50)
print(f"{'序号':<4} {'d-spacing (Å)':<15} {'强度 (a.u.)':<15}")
print("=" * 50)

for i, (pos, intensity) in enumerate(zip(sorted_positions, sorted_intensities, strict=False)):
    print(f"{i + 1:<4} {pos:<15.6f} {intensity:<15.2f}")

In [None]:
%matplotlib inline
plt.close("all")
# gridspec inside gridspec
fig = plt.figure(figsize=(7.0, 5.0))
gs = gridspec.GridSpec(2, 1, width_ratios=None, height_ratios=[1, 1], wspace=0, hspace=0, figure=fig)

# 图 A
subfig = fig.add_subfigure(gs[0, 0])
ax = subfig.add_axes((0, 0, 1.0, 1.0))
ax.set_box_aspect(0.3)

ax.plot(x_data, y_data, "b-", linewidth=1, alpha=0.8, label="Initial XRD Pattern")
ax.plot(peak_positions, peak_intensities, "ro", markersize=6, alpha=0.5, label=f"Peaks ({len(peaks)} detected)")

# Annotate top 20 strongest peaks
for i in range(min(20, len(sorted_positions))):
    ax.annotate(
        f"{sorted_positions[i]:.3f}Å",
        xy=(sorted_positions[i], sorted_intensities[i]),
        xytext=(-5, 20),
        textcoords="offset points",
        fontsize=8,
        ha="left",
        bbox={"boxstyle": "round,pad=0.3", "fc": "lightblue", "alpha": 0.7},
        arrowprops={"arrowstyle": "->", "connectionstyle": "arc3,rad=0"},
    )

ax.set_xlabel("d Spacing (Å)", fontsize=11)
ax.set_ylabel("Intensity (arb. u.)", fontsize=11)
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xlim(0, 20)

# 图 B
# Plot zoomed region (main peak region)
subfig = fig.add_subfigure(gs[1, 0])
ax = subfig.add_axes((0, -0.15, 1.0, 1.0))
ax.set_box_aspect(0.3)

mask = (x_data >= 1) & (x_data <= 5)  # 1-5 Å region
x_zoom = x_data[mask]
y_zoom = y_data[mask]

# Peaks in this region
mask_peaks = (peak_positions >= 1) & (peak_positions <= 5)
peaks_zoom_pos = peak_positions[mask_peaks]
peaks_zoom_int = peak_intensities[mask_peaks]

ax.plot(x_zoom, y_zoom, "b-", linewidth=1.5, label="Zoomed XRD Pattern")
ax.plot(peaks_zoom_pos, peaks_zoom_int, "ro", markersize=8, alpha=0.5, label="Peaks")

# Annotate all peaks in this region
for pos, intensity in zip(peaks_zoom_pos, peaks_zoom_int, strict=False):
    ax.annotate(
        f"{pos:.3f}Å",
        xy=(pos, intensity),
        xytext=(5, 10),
        textcoords="offset points",
        fontsize=9,
        ha="left",
        bbox={"boxstyle": "round,pad=0.3", "fc": "lightblue", "alpha": 0.7},
        arrowprops={"arrowstyle": "->", "connectionstyle": "arc3,rad=0"},
    )

ax.set_xlabel("d Spacing (Å)")
ax.set_ylabel("Intensity (arb. u.)")
ax.legend()
ax.grid(True, alpha=0.3)

# 保存图片
plt.savefig(
    Path.joinpath(path_out, r"XRD_MnO2_300.tif"),
    pad_inches=0.05,
    bbox_inches="tight",
    dpi=300,
    transparent=False,
    pil_kwargs={"compression": "tiff_lzw"},
)

plt.gcf().set_facecolor("white")
plt.show()

In [None]:
# 创建详细的峰值分析表
# 计算峰宽
widths = peak_widths(y_data, peaks, rel_height=0.5)

# 创建DataFrame来整理峰值信息（使用英文列名，便于国际发表）
peak_info = pd.DataFrame({
    "d-spacing (Å)": peak_positions,
    "Intensity (a.u.)": peak_intensities,
    "Peak Width (points)": widths[0],
    "Left Boundary": x_data[widths[2].astype(int)],
    "Right Boundary": x_data[widths[3].astype(int)],
    "Prominence": properties["prominences"],
})

# 按强度排序
peak_info_sorted = peak_info.sort_values("Intensity (a.u.)", ascending=False)

# 显示前20个最强的峰值
print("前20个最强峰值的详细信息:")
print("=" * 90)
print(peak_info_sorted.head(20).to_string(index=False, float_format="%.3f"))

# 保存峰值信息到CSV文件
output_file = path_out.joinpath("detected_peaks_mno2.csv")
peak_info_sorted.to_csv(output_file, index=False, float_format="%.6f")
print(f"\n峰值分析结果已保存到: {output_file}")

# 统计信息
print("\n统计总结:")
print(f"检测到的峰值总数: {len(peaks)}")
print(f"平均峰强度: {peak_intensities.mean():.2f}")
print(f"最强峰位置: {peak_positions[np.argmax(peak_intensities)]:.6f} Å")
print(f"最强峰强度: {peak_intensities.max():.2f}")
print(f"强度范围: {peak_intensities.min():.2f} - {peak_intensities.max():.2f}")

### ZSH

In [None]:
# 导入峰值检测所需的库
import matplotlib.pyplot as plt
from scipy.signal import find_peaks, peak_widths

# 获取数据
x_data = data.index  # d-spacing 值
y_data = data.iloc[:, 15].values  # 强度值

# 峰值检测参数调整（平衡参数以检测更多有意义的峰值）
# height: 最小峰高 - 适中阈值
# distance: 峰之间的最小距离（数据点数）- 适中间距
# prominence: 峰的显著性 - 适中要求
# width: 峰的最小宽度
peaks, properties = find_peaks(
    y_data,
    height=5000,    # 降低峰高阈值，检测更多峰值
    distance=25,    # 适中的峰间距离
    prominence=3000, # 降低显著性要求，但仍保持质量
    width=1,        # 最小宽度要求
)  # 平衡的峰值检测参数（检测更多有意义的峰值）

# 获取峰值的位置和强度
peak_positions = x_data[peaks]  # d-spacing 位置
peak_intensities = y_data[peaks]  # 强度值

# 按强度排序（从高到低）
sorted_indices = np.argsort(peak_intensities)[::-1]
sorted_positions = peak_positions[sorted_indices]
sorted_intensities = peak_intensities[sorted_indices]

print(f"找到 {len(peaks)} 个峰值:")
print("=" * 50)
print(f"{'序号':<4} {'d-spacing (Å)':<15} {'强度 (a.u.)':<15}")
print("=" * 50)

for i, (pos, intensity) in enumerate(zip(sorted_positions, sorted_intensities, strict=False)):
    print(f"{i + 1:<4} {pos:<15.6f} {intensity:<15.2f}")

In [None]:
%matplotlib inline
plt.close("all")
# gridspec inside gridspec
fig = plt.figure(figsize=(7.0, 5.0))
gs = gridspec.GridSpec(2, 1, width_ratios=None, height_ratios=[1, 1], wspace=0, hspace=0, figure=fig)

# 图 A
subfig = fig.add_subfigure(gs[0, 0])
ax = subfig.add_axes((0, 0, 1.0, 1.0))
ax.set_box_aspect(0.3)

ax.plot(x_data, y_data, "b-", linewidth=1, alpha=0.8, label="Discharged XRD Pattern")
ax.plot(peak_positions, peak_intensities, "ro", markersize=6, alpha=0.5, label=f"Peaks ({len(peaks)} detected)")

# Annotate top 20 strongest peaks
for i in range(min(20, len(sorted_positions))):
    ax.annotate(
        f"{sorted_positions[i]:.3f}Å",
        xy=(sorted_positions[i], sorted_intensities[i]),
        xytext=(-5, 20),
        textcoords="offset points",
        fontsize=8,
        ha="left",
        bbox={"boxstyle": "round,pad=0.3", "fc": "lightblue", "alpha": 0.7},
        arrowprops={"arrowstyle": "->", "connectionstyle": "arc3,rad=0"},
    )

ax.set_xlabel("d Spacing (Å)", fontsize=11)
ax.set_ylabel("Intensity (arb. u.)", fontsize=11)
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xlim(0, 20)

# 图 B
# Plot zoomed region (main peak region)
subfig = fig.add_subfigure(gs[1, 0])
ax = subfig.add_axes((0, -0.15, 1.0, 1.0))
ax.set_box_aspect(0.3)

mask = (x_data >= 1) & (x_data <= 5)  # 1-5 Å region
x_zoom = x_data[mask]
y_zoom = y_data[mask]

# Peaks in this region
mask_peaks = (peak_positions >= 1) & (peak_positions <= 5)
peaks_zoom_pos = peak_positions[mask_peaks]
peaks_zoom_int = peak_intensities[mask_peaks]

ax.plot(x_zoom, y_zoom, "b-", linewidth=1.5, label="Zoomed XRD Pattern")
ax.plot(peaks_zoom_pos, peaks_zoom_int, "ro", markersize=8, alpha=0.5, label="Peaks")

# Annotate all peaks in this region
for pos, intensity in zip(peaks_zoom_pos, peaks_zoom_int, strict=False):
    ax.annotate(
        f"{pos:.3f}Å",
        xy=(pos, intensity),
        xytext=(5, 10),
        textcoords="offset points",
        fontsize=9,
        ha="left",
        bbox={"boxstyle": "round,pad=0.3", "fc": "lightblue", "alpha": 0.7},
        arrowprops={"arrowstyle": "->", "connectionstyle": "arc3,rad=0"},
    )

ax.set_xlabel("d Spacing (Å)")
ax.set_ylabel("Intensity (arb. u.)")
ax.legend()
ax.grid(True, alpha=0.3)

# 保存图片
plt.savefig(
    Path.joinpath(path_out, r"XRD_ZSH_300.tif"),
    pad_inches=0.05,
    bbox_inches="tight",
    dpi=300,
    transparent=False,
    pil_kwargs={"compression": "tiff_lzw"},
)

plt.gcf().set_facecolor("white")
plt.show()

In [None]:
# 创建详细的峰值分析表
# 计算峰宽
widths = peak_widths(y_data, peaks, rel_height=0.5)

# 创建DataFrame来整理峰值信息（使用英文列名，便于国际发表）
peak_info = pd.DataFrame({
    "d-spacing (Å)": peak_positions,
    "Intensity (a.u.)": peak_intensities,
    "Peak Width (points)": widths[0],
    "Left Boundary": x_data[widths[2].astype(int)],
    "Right Boundary": x_data[widths[3].astype(int)],
    "Prominence": properties["prominences"],
})

# 按强度排序
peak_info_sorted = peak_info.sort_values("Intensity (a.u.)", ascending=False)

# 显示前20个最强的峰值
print("前20个最强峰值的详细信息:")
print("=" * 90)
print(peak_info_sorted.head(20).to_string(index=False, float_format="%.3f"))

# 保存峰值信息到CSV文件
output_file = path_out.joinpath("detected_peaks_zsh.csv")
peak_info_sorted.to_csv(output_file, index=False, float_format="%.6f")
print(f"\n峰值分析结果已保存到: {output_file}")

# 统计信息
print("\n统计总结:")
print(f"检测到的峰值总数: {len(peaks)}")
print(f"平均峰强度: {peak_intensities.mean():.2f}")
print(f"最强峰位置: {peak_positions[np.argmax(peak_intensities)]:.6f} Å")
print(f"最强峰强度: {peak_intensities.max():.2f}")
print(f"强度范围: {peak_intensities.min():.2f} - {peak_intensities.max():.2f}")

## 特征峰位置范围内的强度变化

In [None]:
# 读取电化学数据
path_filelist = list(Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\EMD\2025-MSPD\Results\IS17_D\Echem\B").glob(r"**\*.txt"))
echem_all = []
for path_file in path_filelist[0:1]:
    with open(path_file, "r", encoding="latin_1") as file:
        for line in file:
            if line.startswith("Nb header lines"):
                line_skip = int(line.split(":")[1].strip())
                break
    df = pd.read_csv(path_file, sep="\t", comment="#", skiprows=line_skip - 1, encoding="latin_1", index_col=None, decimal=".").dropna(axis=1, how="all")
    df[["Ewe/V", "Ece/V", "<I>/mA", "Capacity/mA.h"]] = df[["Ewe/V", "Ece/V", "<I>/mA", "Capacity/mA.h"]].apply(pd.to_numeric, errors="coerce")
    df["time/s"] = df["time/s"].apply(pd.to_datetime, format="mixed", errors="coerce")
    df["cycle number"] = df["cycle number"].astype(float).astype(np.int16)
    df["Voltage/V"] = df["Ewe/V"] - df["Ece/V"]
    echem_all.append(df)
# 合并所有电化学数据为一个二维表格
echem_all = pd.concat(echem_all, axis=0, ignore_index=True).sort_values(by="time/s").reset_index(drop=True)


# 谱线上的时间
# 读取文件中时间戳
filelist = list(Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\EMD\2025-MSPD\Results\IS17_D\Data\IS17_D").glob(r"*.xye"))
range_index, wave_length, time_processed = [], [], []
for path_file in filelist:
    with open(path_file, "r") as file:
        lines = file.readlines()
        file_id = path_file.stem.split("_")[-1].split(".")[0][-5:]
        range_index.append(file_id)
        for line in lines:
            if line.startswith("# Wave"):
                wave_value = float(line.split()[3])
                wave_length.append(wave_value)
            elif line.startswith("# Date"):
                recored_time = str(line.split()[3])
                time_processed.append(recored_time)
spectrum_time_all = pd.DataFrame({
    "Range_Index": range_index,
    "time/s": time_processed,
    "Wave_Length": wave_length,
})
spectrum_time_all["time/s"] = pd.to_datetime(spectrum_time_all["time/s"], format=r"%Y-%m-%d_%H:%M:%S")
spectrum_time_all["Range_Index"] = pd.to_numeric(spectrum_time_all["Range_Index"])
spectrum_time_all["Wave_Length"] = pd.to_numeric(spectrum_time_all["Wave_Length"])

# 读取 XRD 的数据
path_xrd = Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\EMD\2025-MSPD\Results\IS17_D\Data")

spectrum_all = pd.read_csv(path_xrd.joinpath(r"spectrum_all_d_spacing.csv"), index_col=0, header=0)
spectrum_all.index = pd.to_numeric(spectrum_all.index)
spectrum_all.columns = pd.to_numeric(spectrum_all.columns)

In [None]:
# 只保留第一圈的充放电与第二圈的充电数据
selected_echem = echem_all[echem_all["cycle number"].isin([1, 2])]
selected_echem = selected_echem[selected_echem["Voltage/V"] >= 0.8]

# selected_echem = selected_echem.iloc[:-5, :].copy()
selected_echem = selected_echem.copy()
selected_echem["charge_time"] = (selected_echem["time/s"] - selected_echem["time/s"].iloc[0]).dt.total_seconds() / 3600

# 匹配谱线和电化学上的时间
selected_spectrum_time = (
    pd.merge_asof(
        selected_echem.sort_values(by="time/s"),
        spectrum_time_all.sort_values(by="time/s"),
        on="time/s",
        direction="nearest",  # 找最近的时间点
        tolerance=pd.Timedelta("5s"),  # 可设定允许的最大偏差
    )
    .dropna(subset=["Range_Index"], inplace=False)
    .drop_duplicates(subset=["Range_Index"], keep="first", inplace=False)
    .reset_index(drop=False, inplace=False)
)

# 选择谱线的区间
d_spacing_range = (0.5, 15.0)
spectrum_all = spectrum_all[(spectrum_all.index >= d_spacing_range[0]) & (spectrum_all.index <= d_spacing_range[1])]

# 修复数据类型匹配问题：直接使用整数列表来匹配 spectrum_all 的列名
range_index_list = selected_spectrum_time["Range_Index"].tolist()
selected_spectrum = spectrum_all.loc[:, spectrum_all.columns.isin(range_index_list)]

### EMD

In [None]:
# 添加索引文本标注, EMD Phase
# index_labels = [15, 25, 37, 46, 50, 61, 70, 75, 87, 99, 109, 115, 126, 137]
index_labels = [15, 25, 37, 46, 61, 75]
selected_spectrum = selected_spectrum.iloc[:, selected_spectrum.columns.isin(index_labels)]

opxrd_ref_d_spacing = [1.414916, 2.44884, 3.365342, 5.967425, 8.042391]
peak_width = [(0.05, 0.05), (0.05, 0.05), (0.4, 0.4), (0.4, 0.4), (0.4, 0.4)]

intensity_dict = {}
position_dict = {}
for i in range(len(opxrd_ref_d_spacing)):
    d_spacing_value = opxrd_ref_d_spacing[i]
    temp = selected_spectrum.loc[(selected_spectrum.index >= d_spacing_value - peak_width[i][0]) & (selected_spectrum.index <= d_spacing_value + peak_width[i][1])]
    intensity_temp = temp.max(axis=0, numeric_only=True)
    position_temp = temp.idxmax(axis=0, numeric_only=True)
    intensity_dict[d_spacing_value] = intensity_temp
    position_dict[d_spacing_value] = position_temp

data_intensity = pd.DataFrame(intensity_dict)
data_position = pd.DataFrame(position_dict)

# 保存峰强度数据到Excel文件
output_file = path_out.joinpath("XRD_MSPD_2025_PeakIntensity_EMD.xlsx")
with pd.ExcelWriter(output_file, engine='openpyxl', mode='w') as writer:
    data_intensity.to_excel(writer, sheet_name='Intensity', index=True)
    data_position.to_excel(writer, sheet_name='Position', index=True)

### ZSH

In [None]:
# 添加索引文本标注, ZSH Phase
# index_labels = [15, 25, 37, 46, 50, 61, 70, 75, 87, 99, 109, 115, 126, 137]
index_labels = [15, 25, 37, 46, 61, 75]
selected_spectrum = selected_spectrum.iloc[:, selected_spectrum.columns.isin(index_labels)]

opxrd_ref_d_spacing = [1.578523, 1.821636, 2.32571, 2.473, 2.536224, 2.671445, 2.714108, 3.128303, 3.230361, 3.408803, 3.51733, 3.644164, 4.172575, 5.465435, 10.9373]
peak_width = [
    (0.1, 0.1), (0.1, 0.1), (0.1, 0.1), (0.1, 0.1), (0.1, 0.1), (0.1, 0.1),
    (0.1, 0.1), (0.1, 0.1), (0.1, 0.1), (0.1, 0.1), (0.1, 0.1), (0.1, 0.1),
    (0.1, 0.1), (0.1, 0.1), (0.3, 0.3)
]

intensity_dict = {}
position_dict = {}
for i in range(len(opxrd_ref_d_spacing)):
    d_spacing_value = opxrd_ref_d_spacing[i]
    temp = selected_spectrum.loc[(selected_spectrum.index >= d_spacing_value - peak_width[i][0]) & (selected_spectrum.index <= d_spacing_value + peak_width[i][1])]
    intensity_temp = temp.max(axis=0, numeric_only=True)
    position_temp = temp.idxmax(axis=0, numeric_only=True)
    intensity_dict[d_spacing_value] = intensity_temp
    position_dict[d_spacing_value] = position_temp

data_intensity = pd.DataFrame(intensity_dict)
data_position = pd.DataFrame(position_dict)

# 保存峰强度数据到Excel文件
output_file = path_out.joinpath("XRD_MSPD_2025_PeakIntensity_ZSH.xlsx")
with pd.ExcelWriter(output_file, engine='openpyxl', mode='w') as writer:
    data_intensity.to_excel(writer, sheet_name='Intensity', index=True)
    data_position.to_excel(writer, sheet_name='Position', index=True)

## 峰位置的判断和检测， MnO2

In [None]:
path_data = Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\αMnO2\2022-ICMAB\Results\Coin1\Overivew")

data = pd.read_csv(path_data.joinpath(r"Spectum_all_d spacing.csv"), index_col=0)

### MnO2

In [None]:
# 导入峰值检测所需的库
import matplotlib.pyplot as plt
from scipy.signal import find_peaks, peak_widths

# 获取数据
x_data = data['d_spacing'].values  # d-spacing 值
y_data = data.iloc[:, 1].values  # 强度值

# 峰值检测参数调整（平衡检测灵敏度以包含重要峰值）
# height: 最小峰高
# distance: 峰之间的最小距离（数据点数）
# prominence: 峰的显著性
# width: 峰的最小宽度
peaks, properties = find_peaks(
    y_data,
    height=500,    # 适中的峰高阈值
    distance=2,    # 适中的峰间距离
    prominence=170,   # 适中的显著性要求
    width=1,        # 保持最小宽度要求
)  # 优化的峰值检测参数（平衡峰值数量和重要性）

# 获取峰值的位置和强度
peak_positions = x_data[peaks]  # d-spacing 位置
peak_intensities = y_data[peaks]  # 强度值

# 按强度排序（从高到低）
sorted_indices = np.argsort(peak_intensities)[::-1]
sorted_positions = peak_positions[sorted_indices]
sorted_intensities = peak_intensities[sorted_indices]

print(f"找到 {len(peaks)} 个峰值:")
print("=" * 50)
print(f"{'序号':<4} {'d-spacing (Å)':<15} {'强度 (a.u.)':<15}")
print("=" * 50)

for i, (pos, intensity) in enumerate(zip(sorted_positions, sorted_intensities, strict=False)):
    print(f"{i + 1:<4} {pos:<15.6f} {intensity:<15.2f}")

In [None]:
%matplotlib inline
plt.close("all")
# gridspec inside gridspec
fig = plt.figure(figsize=(7.0, 2.5))
gs = gridspec.GridSpec(1, 1, width_ratios=None, height_ratios=None, wspace=0, hspace=0, figure=fig)

# 图 A
subfig = fig.add_subfigure(gs[0, 0])
ax = subfig.add_axes((0, 0, 1.0, 1.0))
ax.set_box_aspect(0.3)

ax.plot(x_data, y_data, "b-", linewidth=1, alpha=0.8, label="Initial XRD Pattern")
ax.plot(peak_positions, peak_intensities, "ro", markersize=6, alpha=0.5, label=f"Peaks ({len(peaks)} detected)")

# Annotate top 20 strongest peaks
for i in range(min(20, len(sorted_positions))):
    ax.annotate(
        f"{sorted_positions[i]:.3f}Å",
        xy=(sorted_positions[i], sorted_intensities[i]),
        xytext=(-5, 20),
        textcoords="offset points",
        fontsize=8,
        ha="left",
        bbox={"boxstyle": "round,pad=0.3", "fc": "lightblue", "alpha": 0.7},
        arrowprops={"arrowstyle": "->", "connectionstyle": "arc3,rad=0"},
    )

ax.set_xlabel("d Spacing (Å)", fontsize=11)
ax.set_ylabel("Intensity (arb. u.)", fontsize=11)
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xlim(2, 8)

# 保存图片
plt.savefig(
    Path.joinpath(path_out, r"XRD_MnO2_300.tif"),
    pad_inches=0.05,
    bbox_inches="tight",
    dpi=300,
    transparent=False,
    pil_kwargs={"compression": "tiff_lzw"},
)

plt.gcf().set_facecolor("white")
plt.show()

In [None]:
# 创建详细的峰值分析表
# 计算峰宽
widths = peak_widths(y_data, peaks, rel_height=0.5)

# 创建DataFrame来整理峰值信息（使用英文列名，便于国际发表）
peak_info = pd.DataFrame({
    "d-spacing (Å)": peak_positions,
    "Intensity (a.u.)": peak_intensities,
    "Peak Width (points)": widths[0],
    "Left Boundary": x_data[widths[2].astype(int)],
    "Right Boundary": x_data[widths[3].astype(int)],
    "Prominence": properties["prominences"],
})

# 按强度排序
peak_info_sorted = peak_info.sort_values("Intensity (a.u.)", ascending=False)

# 显示前20个最强的峰值
print("前20个最强峰值的详细信息:")
print("=" * 90)
print(peak_info_sorted.head(20).to_string(index=False, float_format="%.3f"))

# 保存峰值信息到CSV文件
output_file = path_out.joinpath("detected_peaks_mno2.csv")
peak_info_sorted.to_csv(output_file, index=False, float_format="%.6f")
print(f"\n峰值分析结果已保存到: {output_file}")

# 统计信息
print("\n统计总结:")
print(f"检测到的峰值总数: {len(peaks)}")
print(f"平均峰强度: {peak_intensities.mean():.2f}")
print(f"最强峰位置: {peak_positions[np.argmax(peak_intensities)]:.6f} Å")
print(f"最强峰强度: {peak_intensities.max():.2f}")
print(f"强度范围: {peak_intensities.min():.2f} - {peak_intensities.max():.2f}")

### ZSH

In [None]:
# 导入峰值检测所需的库
import matplotlib.pyplot as plt
from scipy.signal import find_peaks, peak_widths

# 获取数据
x_data = data['d_spacing'].values  # d-spacing 值
y_data = data.iloc[:, 18].values  # 强度值

# 峰值检测参数调整（平衡参数以检测更多有意义的峰值）
# height: 最小峰高 - 适中阈值
# distance: 峰之间的最小距离（数据点数）- 适中间距
# prominence: 峰的显著性 - 适中要求
# width: 峰的最小宽度
peaks, properties = find_peaks(
    y_data,
    height=500,    # 降低峰高阈值，检测更多峰值
    distance=2,    # 适中的峰间距离
    prominence=150, # 降低显著性要求，但仍保持质量
    width=1,        # 最小宽度要求
)  # 平衡的峰值检测参数（检测更多有意义的峰值）

# 获取峰值的位置和强度
peak_positions = x_data[peaks]  # d-spacing 位置
peak_intensities = y_data[peaks]  # 强度值

# 按强度排序（从高到低）
sorted_indices = np.argsort(peak_intensities)[::-1]
sorted_positions = peak_positions[sorted_indices]
sorted_intensities = peak_intensities[sorted_indices]

print(f"找到 {len(peaks)} 个峰值:")
print("=" * 50)
print(f"{'序号':<4} {'d-spacing (Å)':<15} {'强度 (a.u.)':<15}")
print("=" * 50)

for i, (pos, intensity) in enumerate(zip(sorted_positions, sorted_intensities, strict=False)):
    print(f"{i + 1:<4} {pos:<15.6f} {intensity:<15.2f}")

In [None]:
%matplotlib inline
plt.close("all")
# gridspec inside gridspec
fig = plt.figure(figsize=(7.0, 2.5))
gs = gridspec.GridSpec(1, 1, width_ratios=None, height_ratios=None, wspace=0, hspace=0, figure=fig)

# 图 A
subfig = fig.add_subfigure(gs[0, 0])
ax = subfig.add_axes((0, 0, 1.0, 1.0))
ax.set_box_aspect(0.3)

ax.plot(x_data, y_data, "b-", linewidth=1, alpha=0.8, label="Discharged XRD Pattern")
ax.plot(peak_positions, peak_intensities, "ro", markersize=6, alpha=0.5, label=f"Peaks ({len(peaks)} detected)")

# Annotate top 21 strongest peaks
for i in range(min(21, len(sorted_positions))):
    ax.annotate(
        f"{sorted_positions[i]:.3f}Å",
        xy=(sorted_positions[i], sorted_intensities[i]),
        xytext=(-5, 20),
        textcoords="offset points",
        fontsize=8,
        ha="left",
        bbox={"boxstyle": "round,pad=0.3", "fc": "lightblue", "alpha": 0.7},
        arrowprops={"arrowstyle": "->", "connectionstyle": "arc3,rad=0"},
    )

ax.set_xlabel("d Spacing (Å)", fontsize=11)
ax.set_ylabel("Intensity (arb. u.)", fontsize=11)
ax.legend()
ax.grid(True, alpha=0.3)
ax.set_xlim(2, 8)

# 保存图片
plt.savefig(
    Path.joinpath(path_out, r"XRD_ZSH_300.tif"),
    pad_inches=0.05,
    bbox_inches="tight",
    dpi=300,
    transparent=False,
    pil_kwargs={"compression": "tiff_lzw"},
)

plt.gcf().set_facecolor("white")
plt.show()

In [None]:
# 创建详细的峰值分析表
# 计算峰宽
widths = peak_widths(y_data, peaks, rel_height=0.5)

# 创建DataFrame来整理峰值信息（使用英文列名，便于国际发表）
peak_info = pd.DataFrame({
    "d-spacing (Å)": peak_positions,
    "Intensity (a.u.)": peak_intensities,
    "Peak Width (points)": widths[0],
    "Left Boundary": x_data[widths[2].astype(int)],
    "Right Boundary": x_data[widths[3].astype(int)],
    "Prominence": properties["prominences"],
})

# 按强度排序
peak_info_sorted = peak_info.sort_values("Intensity (a.u.)", ascending=False)

# 显示前20个最强的峰值
print("前20个最强峰值的详细信息:")
print("=" * 90)
print(peak_info_sorted.head(20).to_string(index=False, float_format="%.3f"))

# 保存峰值信息到CSV文件
output_file = path_out.joinpath("detected_peaks_zsh.csv")
peak_info_sorted.to_csv(output_file, index=False, float_format="%.6f")
print(f"\n峰值分析结果已保存到: {output_file}")

# 统计信息
print("\n统计总结:")
print(f"检测到的峰值总数: {len(peaks)}")
print(f"平均峰强度: {peak_intensities.mean():.2f}")
print(f"最强峰位置: {peak_positions[np.argmax(peak_intensities)]:.6f} Å")
print(f"最强峰强度: {peak_intensities.max():.2f}")
print(f"强度范围: {peak_intensities.min():.2f} - {peak_intensities.max():.2f}")

## 特征峰位置范围内的强度变化, MnO2

In [None]:
path_xrd = Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\αMnO2\2022-ICMAB\Results")

# 读取 PDF card 的数据
pdfmo = pd.read_csv(
    Path.joinpath(path_xrd, r"PDFCards", "PDF_1stDischarge-d-spacing.csv"),
    sep=",",
    index_col=None,
    header=0,
    skiprows=1,
    comment="#",
)

# 读取 spectrum 的数据
spectrum_all = pd.read_csv(
    Path.joinpath(path_xrd, r"Coin1", r"1stCharge+2ndDischarge", "Spectum_all_d spacing.csv"),
    sep=r",",
    index_col=None,
    header=0,
    comment="#",
)
spectrum_all.drop(columns=['2THETA'], inplace=True)
spectrum_all.set_index(keys=["d_spacing"], inplace=True)
spectrum_all.index = pd.to_numeric(spectrum_all.index, errors="coerce")
spectrum_all.columns = pd.to_numeric(spectrum_all.columns, errors="coerce")

# 电化学上的时间
with open(Path.joinpath(path_xrd, r"Coin1", r"1stCharge+2ndDischarge", r"EchemOperando1_c10_1544mg_C02.txt"), "r") as file:
    lines = file.readlines()
for line in lines:
    if line.startswith("Nb header lines"):
        line_skip = int(line.split(":")[1].strip())

echem_all = pd.read_csv(
    Path.joinpath(path_xrd, r"Coin1", r"1stCharge+2ndDischarge", r"EchemOperando1_c10_1544mg_C02.txt"),
    sep="\t",
    index_col=None,
    header=0,
    comment="#",
    skiprows=line_skip - 1,
    encoding="latin_1",
    date_format="%m/%d/%y %H:%M:%S.%f",
    parse_dates=[1, 2],
).dropna(axis=1, how="all", inplace=False)
echem_all["time/s"] = pd.to_datetime(
    echem_all["time/s"],
)
echem_all["Ewe/V"] = pd.to_numeric(
    echem_all["Ewe/V"],
)
echem_all["<I>/mA"] = pd.to_numeric(
    echem_all["<I>/mA"],
)
# echem_all.info()

# 谱线上的时间
spectrum_time_all = pd.read_csv(
    Path.joinpath(path_xrd, r"Coin1", r"1stCharge+2ndDischarge", r"Time_index_spectrum.csv"),
    sep=",",
    index_col=0,
    header=0,
    comment="#",
    date_format="%m/%d/%y %H:%M:%S.%f",
    parse_dates=[1],
)
spectrum_time_all["Time"] = pd.to_datetime(spectrum_time_all["Time"])
# spectrum_time_all.info()

# 匹配谱线和电化学上的时间
spectrum_time = [abs(echem_all["time/s"] - t).idxmin() for t in spectrum_time_all["Time"]]
spectrum_time = (echem_all.loc[spectrum_time, [r"Ewe/V", r"<I>/mA"]].reset_index(drop=False).sort_values(by="Ewe/V", ascending=True)).reset_index(drop=False, inplace=False)

In [None]:
# 选择需要的电化学数据以及对应的谱线
index_labels = [17, 26, 31, 37, 40, 45, 50, 54]
selected_spectrum_time = (
    spectrum_time[(spectrum_time["level_0"] <= index_labels[-1] + 1) & (spectrum_time["level_0"] >= index_labels[0])].sort_values(by="level_0", ascending=True).reset_index(drop=True)
)

selected_echem = echem_all[(echem_all.index <= selected_spectrum_time["index"].iloc[-1]) & (echem_all.index >= selected_spectrum_time["index"].iloc[0])]
selected_echem = selected_echem.copy()
selected_echem["FD1st_time"] = (selected_echem["time/s"] - selected_echem["time/s"].iloc[0]).dt.total_seconds() / 3600

# 修复：使用正确的列选择方式
# spectrum_all 的列是数字1-88，对应不同的谱线编号
column_range_start = selected_spectrum_time["level_0"].iloc[0] + 1
column_range_end = selected_spectrum_time["level_0"].iloc[-1] + 1

# 选择对应范围的列
selected_columns = [col for col in spectrum_all.columns if col >= column_range_start and col <= column_range_end]
selected_spectrum = spectrum_all[selected_columns]

### αMnO2

In [None]:
# 添加索引文本标注, EMD Phase
index_labels = [17, 26, 31, 37, 40, 45, 54]
selected_spectrum = selected_spectrum.iloc[:, selected_spectrum.columns.isin(index_labels)]

opxrd_ref_d_spacing = [2.150451, 2.390493, 3.099262, 4.895775, 6.93]
peak_widths = [(0.02, 0.02), (0.02, 0.02), (0.05, 0.05), (0.05, 0.05), (0.1, 0.1)]

intensity_dict = {}
position_dict = {}
for i in range(len(opxrd_ref_d_spacing)):
    d_spacing_value = opxrd_ref_d_spacing[i]
    temp = selected_spectrum.loc[(selected_spectrum.index >= d_spacing_value - peak_widths[i][0]) & (selected_spectrum.index <= d_spacing_value + peak_widths[i][1])]
    intensity_temp = temp.max(axis=0, numeric_only=True)
    position_temp = temp.idxmax(axis=0, numeric_only=True)
    intensity_dict[d_spacing_value] = intensity_temp
    position_dict[d_spacing_value] = position_temp

data_intensity = pd.DataFrame(intensity_dict)
data_position = pd.DataFrame(position_dict)

# 保存峰强度数据到Excel文件
output_file = path_out.joinpath("XRD_ICMAB_MnO2_PeakIntensity_MnO2.xlsx")
with pd.ExcelWriter(output_file, engine='openpyxl', mode='w') as writer:
    data_intensity.to_excel(writer, sheet_name='Intensity', index=True)
    data_position.to_excel(writer, sheet_name='Position', index=True)

### ZSH

In [None]:
# 添加索引文本标注, ZSH Phase
index_labels = [17, 26, 31, 37, 40, 45, 54]
selected_spectrum = selected_spectrum.iloc[:, selected_spectrum.columns.isin(index_labels)]
opxrd_ref_d_spacing = [2.212224, 2.282709, 2.31839, 2.46728, 2.530539, 2.558665, 2.663994, 2.705785, 2.73078, 3.190244, 3.235243, 3.40052, 3.510488, 3.634241, 4.162956, 5.454092]
peak_widths = [(0.1, 0.1)] * len(opxrd_ref_d_spacing)

intensity_dict = {}
position_dict = {}
for i in range(len(opxrd_ref_d_spacing)):
    d_spacing_value = opxrd_ref_d_spacing[i]
    temp = selected_spectrum.loc[(selected_spectrum.index >= d_spacing_value - peak_widths[i][0]) & (selected_spectrum.index <= d_spacing_value + peak_widths[i][1])]
    intensity_temp = temp.max(axis=0, numeric_only=True)
    position_temp = temp.idxmax(axis=0, numeric_only=True)
    intensity_dict[d_spacing_value] = intensity_temp
    position_dict[d_spacing_value] = position_temp

data_intensity = pd.DataFrame(intensity_dict)
data_position = pd.DataFrame(position_dict)

# 保存峰强度数据到Excel文件
output_file = path_out.joinpath("XRD_ICMAB_MnO2_PeakIntensity_ZSH.xlsx")
with pd.ExcelWriter(output_file, engine='openpyxl', mode='w') as writer:
    data_intensity.to_excel(writer, sheet_name='Intensity', index=True)
    data_position.to_excel(writer, sheet_name='Position', index=True)