# 一、配置基本环境

## 下载ZIP模式

In [1]:
import os
import sys

# 1. 确认数据集路径
dataset_path = '/kaggle/input/batteryml-main/BatteryML-main'
print("数据集内容:")
!ls -l {dataset_path}

# 2. 直接使用解压后的目录（无需解压）
extract_path = dataset_path  # 数据集已经是解压后的目录

# 3. 添加到 Python 路径
if extract_path not in sys.path:
    sys.path.insert(0, extract_path)
    print(f"\n已添加路径: {extract_path}")

# 4. 验证导入
try:
    # 尝试导入主包
    import batteryml
    print("\n✅ 成功导入 batteryml!")
    
    # 尝试导入特定模块
    from batteryml.builders import data_builder
    print("✅ 成功导入 data_builder!")
    
    # 测试使用
    db = data_builder.DataBuilder()
    data = db.load_data()
    print("\n示例数据:")
    print(data.head() if hasattr(data, 'head') else data[:5])
    
except ImportError as e:
    print(f"\n❌ 导入失败: {e}")
    print("尝试备用导入方法...")
    
    # 备用方法：直接导入子模块
    try:
        import sys
        # 添加子目录路径
        sys.path.append(f"{extract_path}/batteryml")
        
        from builders import data_builder
        print("✅ 通过子路径导入成功!")
        
    except Exception as e:
        print(f"❌ 最终导入失败: {e}")
        print("\n您可以手动导航到目录:")
        print(f"!ls {extract_path}/batteryml")

数据集内容:
total 700
-rw-r--r--  1 nobody nogroup 642332 Aug  2 04:02 baseline.ipynb
drwxr-xr-x 10 nobody nogroup      0 Aug  2 04:02 batteryml
drwxr-xr-x  2 nobody nogroup      0 Aug  2 04:02 bin
-rw-r--r--  1 nobody nogroup    444 Aug  2 04:02 CODE_OF_CONDUCT.md
drwxr-xr-x  5 nobody nogroup      0 Aug  2 04:02 configs
-rw-r--r--  1 nobody nogroup   4903 Aug  2 04:02 dataprepare.md
drwxr-xr-x  2 nobody nogroup      0 Aug  2 04:02 image
-rw-r--r--  1 nobody nogroup   1141 Aug  2 04:02 LICENSE
-rw-r--r--  1 nobody nogroup   9366 Aug  2 04:02 README.md
-rw-r--r--  1 nobody nogroup    116 Aug  2 04:02 requirements.txt
-rw-r--r--  1 nobody nogroup  13682 Aug  2 04:02 result.ipynb
-rw-r--r--  1 nobody nogroup   1123 Aug  2 04:02 run_all_rul_baseline.sh
-rw-r--r--  1 nobody nogroup   2757 Aug  2 04:02 SECURITY.md
-rw-r--r--  1 nobody nogroup   1073 Aug  2 04:02 setup.py
-rw-r--r--  1 nobody nogroup   9971 Aug  2 04:02 soh_example.ipynb

已添加路径: /kaggle/input/batteryml-main/Battery

## 导入模式

In [2]:
# 克隆仓库
!git clone https://github.com/microsoft/BatteryML.git

# 查看文件结构
!ls BatteryML/

# 安装依赖
!cd BatteryML && pip install -r requirements.txt

# 安装BatteryML包
!cd BatteryML && pip install .

# 验证安装（检查是否有batteryml命令）
!which batteryml

Cloning into 'BatteryML'...
fatal: unable to access 'https://github.com/microsoft/BatteryML.git/': Could not resolve host: github.com
ls: cannot access 'BatteryML/': No such file or directory
/bin/bash: line 1: cd: BatteryML: No such file or directory
/bin/bash: line 1: cd: BatteryML: No such file or directory


## 运行

In [3]:
import pickle
import pandas as pd
import os
import numpy as np
import torch
import matplotlib.pyplot as plt
import inspect
import glob

from scipy import signal
from scipy.interpolate import interp1d
from scipy.signal import medfilt
from typing import List, Tuple, Optional, Dict, Any


from batteryml.builders import FEATURE_EXTRACTORS
from batteryml.data.battery_data import BatteryData
from batteryml.feature.base import BaseFeatureExtractor
from batteryml.visualization import plot_helper
from batteryml.visualization.plot_helper import inner_plot_capacity_degradation, plot_capacity_degradation

import copy
import warnings
warnings.filterwarnings('ignore')

In [4]:
import pprint

# 指定文件路径
file_path = '/kaggle/input/nasa-battery-data/B0005.pkl'

# 加载pickle文件,把所有.pkl文件塞进battery_data里
with open(file_path, 'rb') as f:
    battery_data = pickle.load(f)

# 打印对象类型
print(f"数据类型: {type(battery_data)}")

# 如果是字典，显示键列表
if isinstance(battery_data, dict):
    print("\n字典的键:")
    print(list(battery_data.keys()))
    
    # 检查是否有cycle_data
    if 'cycle_data' in battery_data and battery_data['cycle_data']:
        print("\n前5个循环数据:")
        for i, cycle in enumerate(battery_data['cycle_data'][:5]):
            print(f"\n循环 #{i+1}:")
            # 如果cycle是字典，打印其键
            if isinstance(cycle, dict):
                cycle_keys = list(cycle.keys())
                print(f"  循环数据键: {cycle_keys}")
                
                # 查看每个循环数据的内容摘要
                for key in cycle_keys:
                    if isinstance(cycle[key], list) and len(cycle[key]) > 0:
                        print(f"  {key}: 长度={len(cycle[key])}, 前3个值={cycle[key][:3]}")
                    else:
                        print(f"  {key}: {cycle[key]}")
            else:
                print(f"  {type(cycle)}")
    
    # 显示电池基本信息
    print("\n电池基本信息:")
    basic_info = {k: v for k, v in battery_data.items() if k != 'cycle_data' and not isinstance(v, list)}
    pprint.pprint(basic_info)
    
# 如果是BatteryData对象，显示其属性
else:
    print("\n电池ID:", getattr(battery_data, 'cell_id', 'N/A'))
    
    # 显示电池对象的属性
    print("\n电池对象属性:")
    for attr in dir(battery_data):
        if not attr.startswith('_') and not callable(getattr(battery_data, attr)):
            value = getattr(battery_data, attr)
            if isinstance(value, list) and attr == 'cycle_data' and len(value) > 0:
                print(f"{attr}: {len(value)}个循环")
                
                # 显示前10个循环数据的摘要
                print("\n前10个循环数据:")
                for i, cycle in enumerate(value[:10]):
                    print(f"\n循环 #{i+1} (循环编号: {getattr(cycle, 'cycle_number', 'N/A')}):")
                    for cycle_attr in dir(cycle):
                        if not cycle_attr.startswith('_') and not callable(getattr(cycle, cycle_attr)):
                            cycle_value = getattr(cycle, cycle_attr)
                            if isinstance(cycle_value, list) and len(cycle_value) > 0:
                                print(f"  {cycle_attr}: 长度={len(cycle_value)}, 前3个值={cycle_value[:3]}")
                            else:
                                print(f"  {cycle_attr}: {cycle_value}")
            elif not isinstance(value, list) or len(value) < 10:
                print(f"{attr}: {value}")
            else:
                print(f"{attr}: 长度={len(value)}, 类型={type(value[0])}")

数据类型: <class 'dict'>

字典的键:
['cell_id', 'cycle_data', 'form_factor', 'anode_material', 'cathode_material', 'electrolyte_material', 'nominal_capacity_in_Ah', 'depth_of_charge', 'depth_of_discharge', 'already_spent_cycles', 'max_voltage_limit_in_V', 'min_voltage_limit_in_V', 'max_current_limit_in_A', 'min_current_limit_in_A', 'reference', 'description', 'charge_protocol', 'discharge_protocol']

前5个循环数据:

循环 #1:
  循环数据键: ['cycle_number', 'current_in_A', 'voltage_in_V', 'charge_capacity_in_Ah', 'discharge_capacity_in_Ah', 'time_in_s', 'temperature_in_C', 'internal_resistance_in_ohm']
  cycle_number: 1
  current_in_A: 长度=197, 前3个值=[-0.0049015892074626, -0.0014780055516425, -2.0125283240860368]
  voltage_in_V: 长度=197, 前3个值=[4.191491807505295, 4.190749067776103, 3.9748709122299895]
  charge_capacity_in_Ah: None
  discharge_capacity_in_Ah: 长度=197, 前3个值=[0.0, 1.4868886062853149e-05, 0.00530778940959121]
  time_in_s: 长度=197, 前3个值=[0.0, 16.781, 35.703]
  temperature_in_C: 长度=197, 前3个值=[24.3300338