In [1]:
import pandas as pd
import numpy as np
from scipy.io import loadmat


In [3]:
data = loadmat('Oxford_Battery_Degradation_Dataset_1.mat')
print(data.keys())  # 查看數據的主要結構和變數名稱


dict_keys(['__header__', '__version__', '__globals__', 'Cell1', 'Cell2', 'Cell3', 'Cell4', 'Cell5', 'Cell6', 'Cell7', 'Cell8'])


In [4]:
import os
import pandas as pd
from scipy.io import loadmat
import numpy as np

# 读入 .mat 文件
data = loadmat('Oxford_Battery_Degradation_Dataset_1.mat')

# 获取数据中的主要 keys（去掉系统保留的 keys）
keys = [key for key in data.keys() if not key.startswith('__')]
print("Keys in the dataset:", keys)

# 创建顶层数据文件夹
base_dir = "data"
os.makedirs(base_dir, exist_ok=True)

# 遍历每个 Cell (例如 Cell1, Cell2 等)
for cell_key in keys:
    print(f"Processing {cell_key}...")
    cell_data = data[cell_key]

    # 创建对应的 Cell 文件夹
    cell_dir = os.path.join(base_dir, cell_key)
    os.makedirs(cell_dir, exist_ok=True)

    # 获取 cell_data 的字段名称（例如 cyc0000, cyc0100 等）
    fields = cell_data.dtype.names
    print(f"Fields in {cell_key}:", fields)

    # 遍历每个 field (例如 cyc0000, cyc0100)
    for field in fields:
        print(f"Processing field: {field}")
        field_data = cell_data[field]

        # 创建对应的子文件夹 (e.g., data/Cell1/cyc0000/)
        field_dir = os.path.join(cell_dir, field)
        os.makedirs(field_dir, exist_ok=True)

        # 展开数据
        for i in range(field_data.size):
            element = field_data[0, i]

            # 检查子字段
            if element.dtype.names:
                sub_fields = element.dtype.names
                print(f"Sub-fields in element: {sub_fields}")

                # 遍历每个子字段 (C1ch, C1dc, OCVch, OCVdc)
                for sub_field in sub_fields:
                    print(f"Processing sub-field: {sub_field}")
                    sub_data = np.squeeze(element[sub_field])  # 提取子字段数据

                    # 初始化列表存储子字段数据
                    sub_field_data = []

                    # 进一步展开子字段的嵌套结构
                    for record in sub_data.flatten()[0].flatten():
                        # 提取时间 (t), 电压 (v), 电荷 (q), 温度 (T)
                        t = record[0].flatten()  # 时间
                        v = record[1].flatten()  # 电压
                        q = record[2].flatten()  # 电荷
                        T = record[3].flatten()  # 温度

                        # 将每组数据存储为 DataFrame 的列
                        cycle_data = pd.DataFrame({
                            "Time": t,
                            "Voltage": v,
                            "Charge": q,
                            "Temperature": T
                        })

                        # 添加到列表中
                        sub_field_data.append(cycle_data)

                    # 合并子字段数据为单个 DataFrame
                    if sub_field_data:
                        final_df = pd.concat(sub_field_data, ignore_index=True)

                        # 保存为 CSV 文件
                        file_path = os.path.join(field_dir, f"{sub_field}.csv")
                        final_df.to_csv(file_path, index=False)
                        print(f"{sub_field} data saved to '{file_path}'")
                    else:
                        print(f"No data extracted for {sub_field} in {field}")


print("Data extraction and saving complete.")


Keys in the dataset: ['Cell1', 'Cell2', 'Cell3', 'Cell4', 'Cell5', 'Cell6', 'Cell7', 'Cell8']
Processing Cell1...
Fields in Cell1: ('cyc0000', 'cyc0100', 'cyc0200', 'cyc0300', 'cyc0400', 'cyc0500', 'cyc0600', 'cyc0700', 'cyc0800', 'cyc0900', 'cyc1000', 'cyc1100', 'cyc1200', 'cyc1300', 'cyc1400', 'cyc1600', 'cyc1800', 'cyc1900', 'cyc2000', 'cyc2100', 'cyc2200', 'cyc2300', 'cyc2400', 'cyc2500', 'cyc2600', 'cyc2700', 'cyc2800', 'cyc2900', 'cyc3000', 'cyc3100', 'cyc3200', 'cyc3300', 'cyc3500', 'cyc3600', 'cyc3700', 'cyc3800', 'cyc3900', 'cyc4000', 'cyc4100', 'cyc4200', 'cyc4300', 'cyc4400', 'cyc4500', 'cyc4600', 'cyc4800', 'cyc5000', 'cyc5100', 'cyc5200', 'cyc5300', 'cyc5400', 'cyc5500', 'cyc5600', 'cyc5700', 'cyc5800', 'cyc5900', 'cyc6000', 'cyc6100', 'cyc6200', 'cyc6300', 'cyc6400', 'cyc6500', 'cyc6600', 'cyc6700', 'cyc6800', 'cyc6900', 'cyc7000', 'cyc7100', 'cyc7200', 'cyc7300', 'cyc7400', 'cyc7500', 'cyc7600', 'cyc7700', 'cyc7800', 'cyc7900', 'cyc8000', 'cyc8100', 'cyc8200')
Processing