# XRD Operando

In [None]:
# -*- coding: utf-8 -*-
import sys
from pathlib import Path

import matplotlib as mpl
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr
from matplotlib import dates, gridspec, ticker, transforms
from matplotlib.colorbar import Colorbar
from matplotlib.colors import LinearSegmentedColormap, ListedColormap

In [None]:
# Ensure custom module Path is set before import
sys.path.append(r"D:\CHENG\OneDrive - UAB\ICMAB-Python\Figure")
from colors import tol_cmap, tol_cset  # type: ignore

# 画图的初始设置
plt.style.use(r"D:\CHENG\OneDrive - UAB\ICMAB-Python\Figure\liuchzzyy.mplstyle")
# print(plt.style.available)  # noqa: ERA001

# xarray setting
xr.set_options(
    cmap_sequential="viridis",
    cmap_divergent="viridis",
    display_width=150,
)  # viridis, gray

# 颜色设定
colors = tol_cset("vibrant")
if colors is not None:
    colors = list(colors)
else:
    # Fallback colors in case tol_cset returns None
    colors = ["#b0a3d1", "#8bd0d5", "#a8e0ee", "#c5e1a3", "#ffe48b", "#f5a37d", "#e88db1"]
if r"sunset" not in plt.colormaps():
    cmap = tol_cmap("sunset")
    if isinstance(cmap, LinearSegmentedColormap):
        plt.colormaps.register(cmap)
if r"rainbow_PuRd" not in plt.colormaps():
    cmap = tol_cmap("rainbow_PuRd")
    if isinstance(cmap, LinearSegmentedColormap):
        plt.colormaps.register(cmap)  # 备用 plasma

# 输出的文件夹
path_out = Path(r"C:\Users\chengliu\Desktop\Figure")

# Set math font
mpl.rcParams["mathtext.fontset"] = "custom"
mpl.rcParams["mathtext.rm"] = "Arial"
mpl.rcParams["mathtext.it"] = "Arial:italic"
mpl.rcParams["mathtext.bf"] = "Arial:bold"
mpl.rcParams["mathtext.sf"] = "Arial"
mpl.rcParams["mathtext.tt"] = "Arial"
mpl.rcParams["mathtext.cal"] = "Arial"
mpl.rcParams["mathtext.default"] = "regular"

## ICMAB 2022

In [None]:
filelist = list(
    Path(r"D:\CHENG\OneDrive - UAB\ICMAB-Data\Zn-Mn\PaperDos\XRD\Operando\αMnO2\2022-ICMAB\Data\Coin1\Data\Operando").glob("*.xy")
    )
filelist

In [None]:

for file in filelist:
    df = pd.read_csv(
        file,
        index_col=None,
        header=None,
        sep="\t",
        names=["THETA", file.stem.split("_")[1].split(".")[0]],
        dtype={"THETA": float, file.stem.split("_")[1].split(".")[0]: float},
        engine="python",
        skiprows=0,
    )
    spectrum_all = pd.merge(spectrum_all, df, how="outer", on="2THETA") if "spectrum_all" in locals() else df

In [None]:
%matplotlib inline
Letters = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
# 画图
# gridspec inside gridspec
fig = plt.figure(figsize=(7.0, 5.0))
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 2], height_ratios=None, wspace=0, hspace=0, figure=fig)

# 图 A
subfig = fig.add_subfigure(gs[0, 0])
ax = subfig.add_axes((0, 0, 1, 1))
ax.set_box_aspect(3.0)

ax.plot(echem["Voltage/V"], echem["time/s"], ls="-", lw=1.0, c=colors[0], label=r"Voltage")
ax.scatter(echem_spectrum["Voltage/V"], echem_spectrum["time/s"], c=colors[1], marker="o", s=15, label=None)

# 添加索引文本标注
values = [1, 18, 34, 54, 72, 89, 109, 126, 142, 152]
row_index = echem_spectrum.index[echem_spectrum["Range_Index"].isin(values)].tolist()
for i, idx in enumerate(row_index):
    ax.scatter(echem_spectrum["Voltage/V"].iloc[idx], echem_spectrum["time/s"].iloc[idx], c='k', marker="*", s=50)
    ax.text(
        echem_spectrum["Voltage/V"].iloc[idx]-0.1,
        echem_spectrum["time/s"].iloc[idx]-pd.Timedelta(minutes=10),
        f'{Letters[i]}',
        fontsize=12,
        verticalalignment="bottom",
        horizontalalignment="center",
        rotation=90,
    )

ax.set_xlabel(
    r"Voltage (V vs. Zn/Zn$\mathrm{^{2\!+}\!)}$",
    fontsize=11,
)
ax.set_xlim(0.75, 1.95)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=0.4, offset=-0.05))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=0.2, offset=-0.05))

# 确保时间刻度从数据最开始时间显示
ax.set_ylim(echem["time/s"].min() - pd.Timedelta(minutes=20), echem["time/s"].max())
ax.set_ylabel(r"Duration Time (hour)", fontsize=11, labelpad=5)
ax.yaxis.set_major_formatter(mdates.DateFormatter("%b-%d %H:%M"))  # 设定日期格式
ax.yaxis.set_major_locator(mdates.HourLocator(byhour=range(0, 24, 2)))
ax.yaxis.set_minor_locator(mdates.HourLocator(interval=1))

plt.xticks(rotation=0, horizontalalignment="center")

ax2 = ax.twiny()
ax2.set_position((0, 0, 1, 1))
ax2.set_box_aspect(3.0)

ax2.plot(echem["<I>/mA"], echem["time/s"], ls="--", lw=1.0, c=colors[3], label=r"Current")

ax2.set_xlabel(
    r"Current (mA)",
    fontsize=11,
)
ax2.set_xlim(-0.15, 0.15)
ax2.xaxis.set_major_locator(ticker.MultipleLocator(base=0.06, offset=0))
ax2.xaxis.set_minor_locator(ticker.MultipleLocator(base=0.03, offset=0))
ax2.tick_params(axis="both", which="both", labelsize=9, right=True, labelright=True)

# 图 B
subfig = fig.add_subfigure(gs[0, 1])
ax = subfig.add_axes((0, 0, 1, 1))
ax.set_box_aspect(1.0)

for i, col in enumerate(spectrum_all.columns[1:], start=0):
    ax.plot(
        spectrum_all.iloc[:, 0],
        spectrum_all[col] + 2000 * i,
        ls="-", lw=1.0,
        c='grey' if float(col) not in values else colors[3],
        label=None, alpha=0.4 if float(col) not in values else 1.0, zorder=0 if float(col) not in values else 5,
    )
    for j, value in enumerate(values):
        # 找到 2THETA 最接近 24 的点
        target_theta = 24
        if i == value:
            idx = (spectrum_all["2THETA"] - target_theta).abs().idxmin()
            val = spectrum_all.loc[idx, col] + 2000 * i
            ax.text(
                target_theta + 0.3,
                val,
                f'{Letters[j]}',
                va="center", ha="left", fontsize=10,
            )

ax.set_ylabel(
    r"Intensity (arb.u.)",
    fontsize=11,
)
ax.set_ylim(6000, 350000)

ax.set_xlabel(
    r"2Theta ($ \mathrm{\theta}$)",
    fontsize=11,
)
ax.set_xlim(4, 24)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=4, offset=0))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=2, offset=0))
ax.tick_params(axis="both", which="both", labelsize=9, top=False, right=False, left=False, labelleft=False)


plt.savefig(
    Path.joinpath(path_out, r"opXRD_MSDP_XRD_01_300.tif"),
    pad_inches=0.01,
    bbox_inches="tight",
    dpi=300,
    transparent=False,
    pil_kwargs={"compression": "tiff_lzw"},
)

plt.gcf().set_facecolor('white')
plt.show()

## IS11_A

### Echem data

In [None]:
path_filelist = list(
    Path(r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS11_A\Echem").glob( # noqa: E501, RUF001
        r"**\*.txt"
    )
)
path_filelist

In [None]:
# 读取电化学数据
echem = []
for path_file in path_filelist:
    with open(path_file, "r", encoding="latin_1") as file:
        for line in file:
            if line.startswith("Nb header lines"):
                line_skip = int(line.split(":")[1].strip())
                break  # 发现后立即退出循环，提高效率

    df = pd.read_csv(
        path_file, sep="\t", comment="#", skiprows=line_skip - 1, encoding="latin_1", index_col=None, decimal="."
    ).dropna(axis=1, how="all")  # noqa: E501
    # # 转换数据格式
    df[["Ewe/V", "Ece/V", "<I>/mA", "Capacity/mA.h"]] = df[["Ewe/V", "Ece/V", "<I>/mA", "Capacity/mA.h"]].apply(
            pd.to_numeric, errors="coerce"
        )  # noqa: E501
    df["time/s"] = df["time/s"].apply(pd.to_datetime, format="mixed", errors="coerce")
    df["cycle number"] = df["cycle number"].astype(float).astype(np.int16)
    df["Voltage/V"] = df["Ewe/V"] - df["Ece/V"]
    echem.append(df)

In [None]:
# 合并所有电化学数据为一个二维表格
echem_all = pd.concat(echem, axis=0, ignore_index=True).sort_values(by="time/s").reset_index(drop=True)
echem_all.to_csv(path_out.joinpath(r"echem_all.csv"), index=False, header=True)

### XRD data

#### 谱线上的时间

In [None]:
# 读取文件中时间戳
filelist = list(
    Path(
        r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS11_A\Data\IS11_A"
    ).glob(r"*.xye")
)

range_index, wave_length, time_processed = [], [], []

for path_file in filelist:
    with open(path_file, "r") as file:
        lines = file.readlines()
        file_id = path_file.stem.split("_")[-1].split(".")[0][-5:]
        range_index.append(file_id)

        # 解析文件内容
        for line in lines:
            if line.startswith("# Wave"):
                wave_value = float(line.split()[3])
                wave_length.append(wave_value)
            elif line.startswith('# Date'):
                recored_time = str(line.split()[3])
                time_processed.append(recored_time)

df = pd.DataFrame({
    "Range_Index": range_index,
    'Time': time_processed,
    "Wave_Length": wave_length,
})
df['Time'] = pd.to_datetime(df['Time'], format=r'%Y-%m-%d_%H:%M:%S')
df["Range_Index"] = pd.to_numeric(df["Range_Index"])
df["Wave_Length"] = pd.to_numeric(df["Wave_Length"])
df.to_csv(
    path_out.joinpath("Time_index_spectrum.csv"),
    sep=",",
    header=["Range_Index", "Wave_Length", "time/s"],
    columns=["Range_Index", "Wave_Length", "Time"],
    index=False,
)

#### 读取 XRD 数据

In [None]:
def extract_wave_and_data(file_path):
    wave_value = None
    recored_time = None
    data = []

    with open(file_path, "r") as file:
        for line in file:
            if line.startswith("# Wave"):
                wave_value = float(line.split()[3])
            elif line.startswith("# Date"):
                recored_time = line.split()[3]
            elif not line.startswith("#"):
                data.append([float(x) for x in line.split()])

    return recored_time, wave_value, data


def process_files(directory):
    results = []

    for file_path in Path(directory).glob("*.xye"):
        recored_time, wave_value, data = extract_wave_and_data(file_path)
        file_id = float(file_path.stem.split("_")[-1][-5:])
        for row in data:
            results.append([file_id, recored_time, wave_value, *row])

    return results

# 读取 XRD 数据
file_list = Path(
    r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS11_A\Data\IS11_A"
)
spectrum_all = process_files(file_list)

# 转换为 DataFrame 并进行数据处理
spectrum_all = pd.DataFrame(
    spectrum_all, columns=["Index", "recored_time", "wave_value", "2THETA", "Intensity1", "Intensity2"]
)
spectrum_all["Cnt2_D1"] = spectrum_all["Intensity1"] + spectrum_all["Intensity2"]
spectrum_all = spectrum_all[["Index", "2THETA", "Cnt2_D1"]]
spectrum_all = pd.concat(
    [s.set_index("2THETA")["Cnt2_D1"].rename(i) for i, s in spectrum_all.groupby("Index")],
    axis=1
).reset_index(drop=False)

spectrum_all.to_csv(
    path_out.joinpath("spectrum_all.csv"),
    sep=",",
    header=True,
    index=False,
)

#### 合并图

In [None]:
# 读取电化学数据
echem_file = Path(r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS11_A\Echem")
path_file = Path(r'C:\Users\chengliu\Desktop\XRDdATA\CLData\IS11_A\Data')
echem = pd.read_csv(
    echem_file.joinpath(r'echem_all.csv'),
    sep=r",",
    comment="#",
    parse_dates=[1],
    index_col=None,
).dropna(axis=1, how="all")

# echem.info()

# 谱线上的时间
time_spectrum = pd.read_csv(
    path_file.joinpath(r"Time_index_spectrum.csv"),
    sep=",",
    index_col=None,
    header=0,
    comment="#",
    parse_dates=[2],
)
# time_spectrum.info()

# 匹配谱线和电化学上的时间
df_echem = echem.sort_values(by="time/s")
df_spectrum = time_spectrum.sort_values(by="time/s")

# 就近匹配
echem_spectrum = pd.merge_asof(
    df_echem, df_spectrum,
    on="time/s",
    direction="nearest",   # 找最近的时间点
    tolerance=pd.Timedelta("5s")  # 可设定允许的最大偏差
).dropna(subset=['Range_Index'], inplace=False).drop_duplicates(subset=['Range_Index'], keep='first', inplace=False).reset_index(drop=False, inplace=False)

In [None]:
# 读取 XRD 数据
spectrum_all = pd.read_csv(
    path_file.joinpath("spectrum_all.csv"),
    index_col=0,
    header=0,
    sep=",",
)
spectrum_all = spectrum_all.sort_values(by="2THETA", inplace=False).reset_index(drop=False, inplace=False)

In [None]:
%matplotlib inline
Letters = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
# 画图
# gridspec inside gridspec
fig = plt.figure(figsize=(7.0, 5.0))
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 2], height_ratios=None, wspace=0, hspace=0, figure=fig)

# 图 A
subfig = fig.add_subfigure(gs[0, 0])
ax = subfig.add_axes((0, 0, 1, 1))
ax.set_box_aspect(3.0)

ax.plot(echem["Voltage/V"], echem["time/s"], ls="-", lw=1.0, c=colors[0], label=r"Voltage")
ax.scatter(echem_spectrum["Voltage/V"], echem_spectrum["time/s"], c=colors[1], marker="o", s=15, label=None)

# 添加索引文本标注
values = [1, 18, 34, 54, 72, 89, 109, 126, 142, 152]
row_index = echem_spectrum.index[echem_spectrum["Range_Index"].isin(values)].tolist()
for i, idx in enumerate(row_index):
    ax.scatter(echem_spectrum["Voltage/V"].iloc[idx], echem_spectrum["time/s"].iloc[idx], c='k', marker="*", s=50)
    ax.text(
        echem_spectrum["Voltage/V"].iloc[idx]-0.1,
        echem_spectrum["time/s"].iloc[idx]-pd.Timedelta(minutes=10),
        f'{Letters[i]}',
        fontsize=12,
        verticalalignment="bottom",
        horizontalalignment="center",
        rotation=90,
    )

ax.set_xlabel(
    r"Voltage (V vs. Zn/Zn$\mathrm{^{2\!+}\!)}$",
    fontsize=11,
)
ax.set_xlim(0.75, 1.95)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=0.4, offset=-0.05))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=0.2, offset=-0.05))

# 确保时间刻度从数据最开始时间显示
ax.set_ylim(echem["time/s"].min() - pd.Timedelta(minutes=20), echem["time/s"].max())
ax.set_ylabel(r"Duration Time (hour)", fontsize=11, labelpad=5)
ax.yaxis.set_major_formatter(mdates.DateFormatter("%b-%d %H:%M"))  # 设定日期格式
ax.yaxis.set_major_locator(mdates.HourLocator(byhour=range(0, 24, 2)))
ax.yaxis.set_minor_locator(mdates.HourLocator(interval=1))

plt.xticks(rotation=0, horizontalalignment="center")

ax2 = ax.twiny()
ax2.set_position((0, 0, 1, 1))
ax2.set_box_aspect(3.0)

ax2.plot(echem["<I>/mA"], echem["time/s"], ls="--", lw=1.0, c=colors[3], label=r"Current")

ax2.set_xlabel(
    r"Current (mA)",
    fontsize=11,
)
ax2.set_xlim(-0.15, 0.15)
ax2.xaxis.set_major_locator(ticker.MultipleLocator(base=0.06, offset=0))
ax2.xaxis.set_minor_locator(ticker.MultipleLocator(base=0.03, offset=0))
ax2.tick_params(axis="both", which="both", labelsize=9, right=True, labelright=True)

# 图 B
subfig = fig.add_subfigure(gs[0, 1])
ax = subfig.add_axes((0, 0, 1, 1))
ax.set_box_aspect(1.0)

for i, col in enumerate(spectrum_all.columns[1:], start=0):
    ax.plot(
        spectrum_all.iloc[:, 0],
        spectrum_all[col] + 2000 * i,
        ls="-", lw=1.0,
        c='grey' if float(col) not in values else colors[3],
        label=None, alpha=0.4 if float(col) not in values else 1.0, zorder=0 if float(col) not in values else 5,
    )
    for j, value in enumerate(values):
        # 找到 2THETA 最接近 24 的点
        target_theta = 24
        if i == value:
            idx = (spectrum_all["2THETA"] - target_theta).abs().idxmin()
            val = spectrum_all.loc[idx, col] + 2000 * i
            ax.text(
                target_theta + 0.3,
                val,
                f'{Letters[j]}',
                va="center", ha="left", fontsize=10,
            )

ax.set_ylabel(
    r"Intensity (arb.u.)",
    fontsize=11,
)
ax.set_ylim(6000, 350000)

ax.set_xlabel(
    r"2Theta ($ \mathrm{\theta}$)",
    fontsize=11,
)
ax.set_xlim(4, 24)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=4, offset=0))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=2, offset=0))
ax.tick_params(axis="both", which="both", labelsize=9, top=False, right=False, left=False, labelleft=False)


plt.savefig(
    Path.joinpath(path_out, r"opXRD_MSDP_XRD_01_300.tif"),
    pad_inches=0.01,
    bbox_inches="tight",
    dpi=300,
    transparent=False,
    pil_kwargs={"compression": "tiff_lzw"},
)

plt.gcf().set_facecolor('white')
plt.show()

## IS16_C

### Echem data

In [None]:
path_filelist = list(
    Path(r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS16_C\Echem").glob( # noqa: E501, RUF001
        r"**\*.txt"
    )
)
path_filelist

In [None]:
# 读取电化学数据
echem = []
for path_file in path_filelist:
    with open(path_file, "r", encoding="latin_1") as file:
        for line in file:
            if line.startswith("Nb header lines"):
                line_skip = int(line.split(":")[1].strip())
                break  # 发现后立即退出循环，提高效率

    df = pd.read_csv(
        path_file, sep="\t", comment="#", skiprows=line_skip - 1, encoding="latin_1", index_col=None, decimal="."
    ).dropna(axis=1, how="all")  # noqa: E501
    # # 转换数据格式
    df[["Ewe/V", "Ece/V", "<I>/mA", "Capacity/mA.h"]] = df[["Ewe/V", "Ece/V", "<I>/mA", "Capacity/mA.h"]].apply(
            pd.to_numeric, errors="coerce"
        )  # noqa: E501
    df["time/s"] = df["time/s"].apply(pd.to_datetime, format="mixed", errors="coerce")
    df["cycle number"] = df["cycle number"].astype(float).astype(np.int16)
    df["Voltage/V"] = df["Ewe/V"] - df["Ece/V"]
    echem.append(df)

In [None]:
# 合并所有电化学数据为一个二维表格
echem_all = pd.concat(echem, axis=0, ignore_index=True).sort_values(by="time/s").reset_index(drop=True)
echem_all.to_csv(path_out.joinpath(r"echem_all.csv"), index=False, header=True)

### XRD data

#### 谱线上的时间

In [None]:
# 读取文件中时间戳
filelist = list(
    Path(
        r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS16_C\Data\IS16_C"
    ).glob(r"*.xye")
)

range_index, wave_length, time_processed = [], [], []

for path_file in filelist:
    with open(path_file, "r") as file:
        lines = file.readlines()
        file_id = path_file.stem.split("_")[-1].split(".")[0][-5:]
        range_index.append(file_id)

        # 解析文件内容
        for line in lines:
            if line.startswith("# Wave"):
                wave_value = float(line.split()[3])
                wave_length.append(wave_value)
            elif line.startswith('# Date'):
                recored_time = str(line.split()[3])
                time_processed.append(recored_time)

df = pd.DataFrame({
    "Range_Index": range_index,
    'Time': time_processed,
    "Wave_Length": wave_length,
})
df['Time'] = pd.to_datetime(df['Time'], format=r'%Y-%m-%d_%H:%M:%S')
df["Range_Index"] = pd.to_numeric(df["Range_Index"])
df["Wave_Length"] = pd.to_numeric(df["Wave_Length"])
df.to_csv(
    path_out.joinpath("Time_index_spectrum.csv"),
    sep=",",
    header=["Range_Index", "Wave_Length", "time/s"],
    columns=["Range_Index", "Wave_Length", "Time"],
    index=False,
)

#### 读取 XRD 数据

In [None]:
def extract_wave_and_data(file_path):
    wave_value = None
    recored_time = None
    data = []

    with open(file_path, "r") as file:
        for line in file:
            if line.startswith("# Wave"):
                wave_value = float(line.split()[3])
            elif line.startswith("# Date"):
                recored_time = line.split()[3]
            elif not line.startswith("#"):
                data.append([float(x) for x in line.split()])

    return recored_time, wave_value, data


def process_files(directory):
    results = []

    for file_path in Path(directory).glob("*.xye"):
        recored_time, wave_value, data = extract_wave_and_data(file_path)
        file_id = float(file_path.stem.split("_")[-1][-5:])
        for row in data:
            results.append([file_id, recored_time, wave_value, *row])

    return results

# 读取 XRD 数据
file_list = Path(
    r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS16_C\Data\IS16_C"
)
spectrum_all = process_files(file_list)

# 转换为 DataFrame 并进行数据处理
spectrum_all = pd.DataFrame(
    spectrum_all, columns=["Index", "recored_time", "wave_value", "2THETA", "Intensity1", "Intensity2"]
)
spectrum_all["Cnt2_D1"] = spectrum_all["Intensity1"] + spectrum_all["Intensity2"]
spectrum_all = spectrum_all[["Index", "2THETA", "Cnt2_D1"]]
spectrum_all = pd.concat(
    [s.set_index("2THETA")["Cnt2_D1"].rename(i) for i, s in spectrum_all.groupby("Index")],
    axis=1
).reset_index(drop=False)

spectrum_all.to_csv(
    path_out.joinpath("spectrum_all.csv"),
    sep=",",
    header=True,
    index=False,
)

#### 合并图

In [None]:
# 读取电化学数据
echem_file = Path(r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS16_C\Echem")
path_file = Path(r'C:\Users\chengliu\Desktop\XRDdATA\CLData\IS16_C\Data')
echem = pd.read_csv(
    echem_file.joinpath(r'echem_all.csv'),
    sep=r",",
    comment="#",
    parse_dates=[1],
    index_col=None,
).dropna(axis=1, how="all")

# echem.info()

# 谱线上的时间
time_spectrum = pd.read_csv(
    path_file.joinpath(r"Time_index_spectrum.csv"),
    sep=",",
    index_col=None,
    header=0,
    comment="#",
    parse_dates=[2],
)
# time_spectrum.info()

# 匹配谱线和电化学上的时间
df_echem = echem.sort_values(by="time/s")
df_spectrum = time_spectrum.sort_values(by="time/s")

# 就近匹配
echem_spectrum = pd.merge_asof(
    df_echem, df_spectrum,
    on="time/s",
    direction="nearest",   # 找最近的时间点
    tolerance=pd.Timedelta("5s")  # 可设定允许的最大偏差
).dropna(subset=['Range_Index'], inplace=False).drop_duplicates(subset=['Range_Index'], keep='first', inplace=False).reset_index(drop=False, inplace=False)

In [None]:
# 读取 XRD 数据
spectrum_all = pd.read_csv(
    path_file.joinpath("spectrum_all.csv"),
    index_col=0,
    header=0,
    sep=",",
)
spectrum_all = spectrum_all.sort_values(by="2THETA", inplace=False).reset_index(drop=False, inplace=False)

In [None]:
%matplotlib inline
Letters = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
# 画图
# gridspec inside gridspec
fig = plt.figure(figsize=(7.0, 5.0))
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 2], height_ratios=None, wspace=0, hspace=0, figure=fig)

# 图 A
subfig = fig.add_subfigure(gs[0, 0])
ax = subfig.add_axes((0, 0, 1, 1))
ax.set_box_aspect(3.0)

ax.plot(echem["Voltage/V"], echem["time/s"], ls="-", lw=1.0, c=colors[0], label=r"Voltage")
ax.scatter(echem_spectrum["Voltage/V"], echem_spectrum["time/s"], c=colors[1], marker="o", s=15, label=None)

# 添加索引文本标注
values = [1, 18, 34, 54, 72, 89, 109, 126, 142, 152]
row_index = echem_spectrum.index[echem_spectrum["Range_Index"].isin(values)].tolist()
for i, idx in enumerate(row_index):
    ax.scatter(echem_spectrum["Voltage/V"].iloc[idx], echem_spectrum["time/s"].iloc[idx], c='k', marker="*", s=50)
    ax.text(
        echem_spectrum["Voltage/V"].iloc[idx]-0.1,
        echem_spectrum["time/s"].iloc[idx]-pd.Timedelta(minutes=10),
        f'{Letters[i]}',
        fontsize=12,
        verticalalignment="bottom",
        horizontalalignment="center",
        rotation=90,
    )

ax.set_xlabel(
    r"Voltage (V vs. Zn/Zn$\mathrm{^{2\!+}\!)}$",
    fontsize=11,
)
ax.set_xlim(0.75, 1.95)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=0.4, offset=-0.05))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=0.2, offset=-0.05))

# 确保时间刻度从数据最开始时间显示
ax.set_ylim(echem["time/s"].min() - pd.Timedelta(minutes=20), echem["time/s"].max())
ax.set_ylabel(r"Duration Time (hour)", fontsize=11, labelpad=5)
ax.yaxis.set_major_formatter(mdates.DateFormatter("%b-%d %H:%M"))  # 设定日期格式
ax.yaxis.set_major_locator(mdates.HourLocator(byhour=range(0, 24, 2)))
ax.yaxis.set_minor_locator(mdates.HourLocator(interval=1))

plt.xticks(rotation=0, horizontalalignment="center")

ax2 = ax.twiny()
ax2.set_position((0, 0, 1, 1))
ax2.set_box_aspect(3.0)

ax2.plot(echem["<I>/mA"], echem["time/s"], ls="--", lw=1.0, c=colors[3], label=r"Current")

ax2.set_xlabel(
    r"Current (mA)",
    fontsize=11,
)
ax2.set_xlim(-0.15, 0.15)
ax2.xaxis.set_major_locator(ticker.MultipleLocator(base=0.06, offset=0))
ax2.xaxis.set_minor_locator(ticker.MultipleLocator(base=0.03, offset=0))
ax2.tick_params(axis="both", which="both", labelsize=9, right=True, labelright=True)

# 图 B
subfig = fig.add_subfigure(gs[0, 1])
ax = subfig.add_axes((0, 0, 1, 1))
ax.set_box_aspect(1.0)

for i, col in enumerate(spectrum_all.columns[1:], start=0):
    ax.plot(
        spectrum_all.iloc[:, 0],
        spectrum_all[col] + 2000 * i,
        ls="-", lw=1.0,
        c='grey' if float(col) not in values else colors[3],
        label=None, alpha=0.4 if float(col) not in values else 1.0, zorder=0 if float(col) not in values else 5,
    )
    for j, value in enumerate(values):
        # 找到 2THETA 最接近 24 的点
        target_theta = 24
        if i == value:
            idx = (spectrum_all["2THETA"] - target_theta).abs().idxmin()
            val = spectrum_all.loc[idx, col] + 2000 * i
            ax.text(
                target_theta + 0.3,
                val,
                f'{Letters[j]}',
                va="center", ha="left", fontsize=10,
            )

ax.set_ylabel(
    r"Intensity (arb.u.)",
    fontsize=11,
)
ax.set_ylim(6000, 350000)

ax.set_xlabel(
    r"2Theta ($ \mathrm{\theta}$)",
    fontsize=11,
)
ax.set_xlim(4, 24)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=4, offset=0))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=2, offset=0))
ax.tick_params(axis="both", which="both", labelsize=9, top=False, right=False, left=False, labelleft=False)


plt.savefig(
    Path.joinpath(path_out, r"opXRD_MSDP_XRD_01_300.tif"),
    pad_inches=0.01,
    bbox_inches="tight",
    dpi=300,
    transparent=False,
    pil_kwargs={"compression": "tiff_lzw"},
)

plt.gcf().set_facecolor('white')
plt.show()

## IS17_D

### Echem data

In [None]:
path_filelist = list(
    Path(r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS17_D\Echem").glob( # noqa: E501, RUF001
        r"**\*.txt"
    )
)
path_filelist

In [None]:
# 读取电化学数据
echem = []
for path_file in path_filelist:
    with open(path_file, "r", encoding="latin_1") as file:
        for line in file:
            if line.startswith("Nb header lines"):
                line_skip = int(line.split(":")[1].strip())
                break  # 发现后立即退出循环，提高效率

    df = pd.read_csv(
        path_file, sep="\t", comment="#", skiprows=line_skip - 1, encoding="latin_1", index_col=None, decimal="."
    ).dropna(axis=1, how="all")  # noqa: E501
    # # 转换数据格式
    df[["Ewe/V", "Ece/V", "<I>/mA", "Capacity/mA.h"]] = df[["Ewe/V", "Ece/V", "<I>/mA", "Capacity/mA.h"]].apply(
            pd.to_numeric, errors="coerce"
        )  # noqa: E501
    df["time/s"] = df["time/s"].apply(pd.to_datetime, format="mixed", errors="coerce")
    df["cycle number"] = df["cycle number"].astype(float).astype(np.int16)
    df["Voltage/V"] = df["Ewe/V"] - df["Ece/V"]
    echem.append(df)

In [None]:
# 合并所有电化学数据为一个二维表格
echem_all = pd.concat(echem, axis=0, ignore_index=True).sort_values(by="time/s").reset_index(drop=True)
echem_all.to_csv(path_out.joinpath(r"echem_all.csv"), index=False, header=True)

### XRD data

#### 谱线上的时间

In [None]:
# 读取文件中时间戳
filelist = list(
    Path(
        r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS17_D\Data\IS17_D"
    ).glob(r"*.xye")
)

range_index, wave_length, time_processed = [], [], []

for path_file in filelist:
    with open(path_file, "r") as file:
        lines = file.readlines()
        file_id = path_file.stem.split("_")[-1].split(".")[0][-5:]
        range_index.append(file_id)

        # 解析文件内容
        for line in lines:
            if line.startswith("# Wave"):
                wave_value = float(line.split()[3])
                wave_length.append(wave_value)
            elif line.startswith('# Date'):
                recored_time = str(line.split()[3])
                time_processed.append(recored_time)

df = pd.DataFrame({
    "Range_Index": range_index,
    'Time': time_processed,
    "Wave_Length": wave_length,
})
df['Time'] = pd.to_datetime(df['Time'], format=r'%Y-%m-%d_%H:%M:%S')
df["Range_Index"] = pd.to_numeric(df["Range_Index"])
df["Wave_Length"] = pd.to_numeric(df["Wave_Length"])
df.to_csv(
    path_out.joinpath("Time_index_spectrum.csv"),
    sep=",",
    header=["Range_Index", "Wave_Length", "time/s"],
    columns=["Range_Index", "Wave_Length", "Time"],
    index=False,
)

#### 读取 XRD 数据

In [None]:
def extract_wave_and_data(file_path):
    wave_value = None
    recored_time = None
    data = []

    with open(file_path, "r") as file:
        for line in file:
            if line.startswith("# Wave"):
                wave_value = float(line.split()[3])
            elif line.startswith("# Date"):
                recored_time = line.split()[3]
            elif not line.startswith("#"):
                data.append([float(x) for x in line.split()])

    return recored_time, wave_value, data


def process_files(directory):
    results = []

    for file_path in Path(directory).glob("*.xye"):
        recored_time, wave_value, data = extract_wave_and_data(file_path)
        file_id = float(file_path.stem.split("_")[-1][-5:])
        for row in data:
            results.append([file_id, recored_time, wave_value, *row])

    return results

# 读取 XRD 数据
file_list = Path(
    r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS17_D\Data\IS17_D"
)
spectrum_all = process_files(file_list)

# 转换为 DataFrame 并进行数据处理
spectrum_all = pd.DataFrame(
    spectrum_all, columns=["Index", "recored_time", "wave_value", "2THETA", "Intensity1", "Intensity2"]
)
spectrum_all["Cnt2_D1"] = spectrum_all["Intensity1"] + spectrum_all["Intensity2"]
spectrum_all = spectrum_all[["Index", "2THETA", "Cnt2_D1"]]
spectrum_all = pd.concat(
    [s.set_index("2THETA")["Cnt2_D1"].rename(i) for i, s in spectrum_all.groupby("Index")],
    axis=1
).reset_index(drop=False)

spectrum_all.to_csv(
    path_out.joinpath("spectrum_all.csv"),
    sep=",",
    header=True,
    index=False,
)

#### 合并图

In [None]:
# 读取电化学数据
echem_file = Path(r"C:\Users\chengliu\Desktop\XRDdATA\CLData\IS17_D\Echem")
path_file = Path(r'C:\Users\chengliu\Desktop\XRDdATA\CLData\IS17_D\Data')
echem = pd.read_csv(
    echem_file.joinpath(r'echem_all.csv'),
    sep=r",",
    comment="#",
    parse_dates=[1],
    index_col=None,
).dropna(axis=1, how="all")

# echem.info()

# 谱线上的时间
time_spectrum = pd.read_csv(
    path_file.joinpath(r"Time_index_spectrum.csv"),
    sep=",",
    index_col=None,
    header=0,
    comment="#",
    parse_dates=[2],
)
# time_spectrum.info()

# 匹配谱线和电化学上的时间
df_echem = echem.sort_values(by="time/s")
df_spectrum = time_spectrum.sort_values(by="time/s")

# 就近匹配
echem_spectrum = pd.merge_asof(
    df_echem, df_spectrum,
    on="time/s",
    direction="nearest",   # 找最近的时间点
    tolerance=pd.Timedelta("5s")  # 可设定允许的最大偏差
).dropna(subset=['Range_Index'], inplace=False).drop_duplicates(subset=['Range_Index'], keep='first', inplace=False).reset_index(drop=False, inplace=False)

In [None]:
# 读取 XRD 数据
spectrum_all = pd.read_csv(
    path_file.joinpath("spectrum_all.csv"),
    index_col=0,
    header=0,
    sep=",",
)
spectrum_all = spectrum_all.sort_values(by="2THETA", inplace=False).reset_index(drop=False, inplace=False)

In [None]:
%matplotlib inline
Letters = list("ABCDEFGHIJKLMNOPQRSTUVWXYZ")
# 画图
# gridspec inside gridspec
fig = plt.figure(figsize=(7.0, 5.0))
gs = gridspec.GridSpec(1, 2, width_ratios=[1, 2], height_ratios=None, wspace=0, hspace=0, figure=fig)

# 图 A
subfig = fig.add_subfigure(gs[0, 0])
ax = subfig.add_axes((0, 0, 1, 1))
ax.set_box_aspect(3.0)

ax.plot(echem["Voltage/V"], echem["time/s"], ls="-", lw=1.0, c=colors[0], label=r"Voltage")
ax.scatter(echem_spectrum["Voltage/V"], echem_spectrum["time/s"], c=colors[1], marker="o", s=15, label=None)

# 添加索引文本标注
values = [1, 15, 27, 37, 46, 61, 75, 89, 99, 109, 126, 137, 148, 153]
row_index = echem_spectrum.index[echem_spectrum["Range_Index"].isin(values)].tolist()
for i, idx in enumerate(row_index):
    ax.scatter(echem_spectrum["Voltage/V"].iloc[idx], echem_spectrum["time/s"].iloc[idx], c='k', marker="*", s=50)
    ax.text(
        echem_spectrum["Voltage/V"].iloc[idx]-0.1,
        echem_spectrum["time/s"].iloc[idx]-pd.Timedelta(minutes=10),
        f'{Letters[i]}',
        fontsize=12,
        verticalalignment="bottom",
        horizontalalignment="center",
        rotation=90,
    )

ax.set_xlabel(
    r"Voltage (V vs. Zn/Zn$\mathrm{^{2\!+}\!)}$",
    fontsize=11,
)
ax.set_xlim(0.75, 1.95)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=0.4, offset=-0.05))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=0.2, offset=-0.05))

# 确保时间刻度从数据最开始时间显示
ax.set_ylim(echem["time/s"].min() - pd.Timedelta(minutes=20), echem["time/s"].max())
ax.set_ylabel(r"Duration Time (hour)", fontsize=11, labelpad=5)
ax.yaxis.set_major_formatter(mdates.DateFormatter("%b-%d %H:%M"))  # 设定日期格式
ax.yaxis.set_major_locator(mdates.HourLocator(byhour=range(0, 24, 2)))
ax.yaxis.set_minor_locator(mdates.HourLocator(interval=1))

plt.xticks(rotation=0, horizontalalignment="center")

ax2 = ax.twiny()
ax2.set_position((0, 0, 1, 1))
ax2.set_box_aspect(3.0)

ax2.plot(echem["<I>/mA"], echem["time/s"], ls="--", lw=1.0, c=colors[3], label=r"Current")

ax2.set_xlabel(
    r"Current (mA)",
    fontsize=11,
)
ax2.set_xlim(-0.15, 0.15)
ax2.xaxis.set_major_locator(ticker.MultipleLocator(base=0.06, offset=0))
ax2.xaxis.set_minor_locator(ticker.MultipleLocator(base=0.03, offset=0))
ax2.tick_params(axis="both", which="both", labelsize=9, right=True, labelright=True)

# 图 B
subfig = fig.add_subfigure(gs[0, 1])
ax = subfig.add_axes((0, 0, 1, 1))
ax.set_box_aspect(1.0)

for i, col in enumerate(spectrum_all.columns[1:], start=0):
    ax.plot(
        spectrum_all.iloc[:, 0],
        spectrum_all[col] + 2000 * i,
        ls="-", lw=1.0,
        c='grey' if float(col) not in values else colors[3],
        label=None, alpha=0.4 if float(col) not in values else 1.0, zorder=0 if float(col) not in values else 5,
    )
    for j, value in enumerate(values):
        # 找到 2THETA 最接近 24 的点
        target_theta = 24
        if i == value:
            idx = (spectrum_all["2THETA"] - target_theta).abs().idxmin()
            val = spectrum_all.loc[idx, col] + 2000 * i
            ax.text(
                target_theta + 0.3,
                val,
                f'{Letters[j]}',
                va="center", ha="left", fontsize=10,
            )

ax.set_ylabel(
    r"Intensity (arb.u.)",
    fontsize=11,
)
ax.set_ylim(6000, 350000)

ax.set_xlabel(
    r"2Theta ($ \mathrm{\theta}$)",
    fontsize=11,
)
ax.set_xlim(4, 24)
ax.xaxis.set_major_locator(ticker.MultipleLocator(base=4, offset=0))
ax.xaxis.set_minor_locator(ticker.MultipleLocator(base=2, offset=0))
ax.tick_params(axis="both", which="both", labelsize=9, top=False, right=False, left=False, labelleft=False)


plt.savefig(
    Path.joinpath(path_out, r"opXRD_MSDP_XRD_01_300.tif"),
    pad_inches=0.01,
    bbox_inches="tight",
    dpi=300,
    transparent=False,
    pil_kwargs={"compression": "tiff_lzw"},
)

plt.gcf().set_facecolor('white')
plt.show()