In [1]:
# --- Cell 1: Imports and Setup ---
import pandas as pd
import numpy as np
import os
from IPython.display import display
import matplotlib.pyplot as plt
import seaborn as sns

# --- Import functions from your process_data.py file ---
try:
    from process_data import (
        load_battery_data,
        segment_discharge_cycles,
        filter_invalid_cycles,
        extract_cycle_features, # Includes core and new statistical features
        calculate_q_initial_and_soh,
        add_health_indicators,
        process_battery_dataset, # Simplified orchestrator
        DATASET_ROOT_DIRECTORY,
        FOLDERS_TO_PROCESS,
        NOMINAL_CAPACITY_AH
    )
    print("Successfully imported functions from process_data.py (simplified version)")
except ImportError as e:
    print(f"Error importing from process_data.py: {e}")
    # Add sys.path.append if needed

# --- Configuration for this Notebook Run ---
SINGLE_BATTERY_TO_PROCESS = None # Or None for all

TARGET_BATTERY_SUBFOLDER = None
if SINGLE_BATTERY_TO_PROCESS:
    for folder in FOLDERS_TO_PROCESS:
        potential_path = os.path.join(DATASET_ROOT_DIRECTORY, folder, f"{SINGLE_BATTERY_TO_PROCESS}.csv")
        if os.path.exists(potential_path):
            TARGET_BATTERY_SUBFOLDER = folder
            print(f"Target battery '{SINGLE_BATTERY_TO_PROCESS}' found in: '{TARGET_BATTERY_SUBFOLDER}'")
            break
    if not TARGET_BATTERY_SUBFOLDER and SINGLE_BATTERY_TO_PROCESS: # Check if still None after loop AND single_batt was set
        print(f"WARNING: Target '{SINGLE_BATTERY_TO_PROCESS}.csv' not found in specified folders.")

NOTEBOOK_DATASET_ROOT = DATASET_ROOT_DIRECTORY
print(f"Dataset root (from process_data.py): {NOTEBOOK_DATASET_ROOT}")
pd.set_option('display.max_columns', 100)
pd.set_option('display.width', 1000)

Successfully imported functions from process_data.py (simplified version)
Dataset root (from process_data.py): battery_alt_dataset


In [2]:
# --- Cell 2: Detailed Test of extract_cycle_features for ONE cycle ---
if SINGLE_BATTERY_TO_PROCESS and TARGET_BATTERY_SUBFOLDER:
    test_battery_id = SINGLE_BATTERY_TO_PROCESS
    test_file_path = os.path.join(NOTEBOOK_DATASET_ROOT, TARGET_BATTERY_SUBFOLDER, f"{test_battery_id}.csv")
    print(f"--- Testing extract_cycle_features for battery: {test_battery_id} ---")
    raw_df_single = load_battery_data(test_file_path)
    if raw_df_single is not None and not raw_df_single.empty:
        segmented_cycles_single = segment_discharge_cycles(raw_df_single, test_battery_id)
        if segmented_cycles_single:
            valid_cycles_single = filter_invalid_cycles(segmented_cycles_single, test_battery_id)
            if valid_cycles_single:
                cycle_to_test_info = valid_cycles_single[0]
                print(f"\nTesting feature extraction for cycle: {cycle_to_test_info['cycle_number']}")
                extracted_features_dict = extract_cycle_features(cycle_to_test_info)
                print("\n--- Extracted Features for Test Cycle (Simplified) ---")
                for key, value in extracted_features_dict.items():
                    print(f"  {key}: {value}")
            else: print("No valid cycles for test battery.")
        else: print("No segmented cycles for test battery.")
    else: print(f"Failed to load raw data for {test_battery_id}.")
elif SINGLE_BATTERY_TO_PROCESS:
    print(f"Cannot run detailed test for '{SINGLE_BATTERY_TO_PROCESS}', subfolder not identified or load failed.")
else:
    print("Skipping detailed test (SINGLE_BATTERY_TO_PROCESS not set).")

Skipping detailed test (SINGLE_BATTERY_TO_PROCESS not set).


In [3]:
# --- Cell 3: Process Dataset (Simplified Features) ---
print(f"\n--- Starting Main Dataset Processing (Simplified Features) ---")
if SINGLE_BATTERY_TO_PROCESS:
    print(f"Mode: Processing SINGLE battery ID: '{SINGLE_BATTERY_TO_PROCESS}'")
else:
    print(f"Mode: Processing ALL batteries in folders: {FOLDERS_TO_PROCESS}")

# Call the simplified main processing function
master_df_simplified = process_battery_dataset(
    root_dir=DATASET_ROOT_DIRECTORY,
    folders_list=FOLDERS_TO_PROCESS,
    single_battery_id=SINGLE_BATTERY_TO_PROCESS
)

# --- Diagnostics for Cell 3 (Simplified) ---
if not master_df_simplified.empty:
    print("\n--- Master DataFrame Diagnostics (Simplified Features) ---")
    print(f"Master DataFrame shape: {master_df_simplified.shape}")
    print(f"Unique batteries processed: {master_df_simplified['battery_id'].nunique()}")
    print("\nMaster DataFrame info():")
    master_df_simplified.info(verbose=True, show_counts=True)
    print("\nMaster DataFrame head:")
    display(master_df_simplified.head(10))

    # Check for some of the new statistical features
    new_stat_features_to_check = [
        'voltage_std_V', 'voltage_skewness', 'voltage_kurtosis', 'voltage_p50_V',
        'current_std_A', 'current_skewness', 'current_kurtosis', 'current_p50_A',
        'temp_std_C', 'temp_skewness', 'temp_kurtosis', 'temp_p50_C'
    ]
    print("\nChecking new statistical features:")
    for feat in new_stat_features_to_check:
        if feat in master_df_simplified.columns:
            non_nan = master_df_simplified[feat].notna().sum()
            print(f"  Feature '{feat}': Present (Non-NaN: {non_nan} / {len(master_df_simplified)})")
        else:
            print(f"  Feature '{feat}': MISSING")
else:
    print("\nMaster DataFrame (Simplified) is empty. No data processed or error.")


--- Starting Main Dataset Processing (Simplified Features) ---
Mode: Processing ALL batteries in folders: ['regular_alt_batteries', 'recommissioned_batteries']
Scanning folder: battery_alt_dataset/regular_alt_batteries

Processing battery: battery00 from file battery00.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery01 from file battery01.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery10 from file battery10.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery11 from file battery11.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery20 from file battery20.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery21 from file battery21.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery22 from file battery22.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery23 from file battery23.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery30 from file battery30.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery31 from file battery31.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery40 from file battery40.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery41 from file battery41.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery50 from file battery50.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery51 from file battery51.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery52 from file battery52.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).

Scanning folder: battery_alt_dataset/recommissioned_batteries

Processing battery: battery02 from file battery02.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery03 from file battery03.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery12 from file battery12.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery24 from file battery24.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery25 from file battery25.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery32 from file battery32.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery33 from file battery33.csv


  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).


Processing battery: battery53 from file battery53.csv

Concatenating data from all processed batteries...
Feature extraction and SOH calculation complete.

--- Master DataFrame Diagnostics (Simplified Features) ---
Master DataFrame shape: (8220, 200)
Unique batteries processed: 23

Master DataFrame info():
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8220 entries, 0 to 8219
Data columns (total 200 columns):
 #    Column                                Non-Null Count  Dtype  
---   ------                                --------------  -----  
 0    battery_id                            8220 non-null   object 
 1    cycle_number                          8220 non-null   int64  
 2    is_reference_cycle                    8220 non-null   bool   
 3    discharge_duration_s                  8220 non-null   float64
 4    capacity_Ah                           8220 non-null   float64
 5    energy_Wh                             8220 non-null   float64
 6    avg_current_A                    

  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).mean()
  df_out[f'{col}_roll_std_{w}'] = df_out[col].rolling(window=w, min_periods=1).std()
  df_out[f'{col}_diff_{w}'] = df_out[col].diff(periods=w)
  df_out[f'{col}_roll_mean_{w}'] = df_out[col].rolling(window=w, min_periods=1).

Unnamed: 0,battery_id,cycle_number,is_reference_cycle,discharge_duration_s,capacity_Ah,energy_Wh,avg_current_A,avg_voltage_V,start_voltage_V,end_voltage_V,delta_voltage_V,avg_power_W,avg_temp_C,start_temp_C,end_temp_C,delta_temp_C,max_temp_C,internal_resistance_ohm,voltage_std_V,voltage_variance_V2,voltage_skewness,voltage_kurtosis,voltage_p10_V,voltage_p25_V,voltage_p50_V,voltage_p75_V,voltage_p90_V,current_std_A,current_variance_A2,current_skewness,current_kurtosis,current_p10_A,current_p25_A,current_p50_A,current_p75_A,current_p90_A,temp_std_C,temp_variance_C2,temp_skewness,temp_kurtosis,temp_p10_C,temp_p25_C,temp_p50_C,temp_p75_C,temp_p90_C,dVdQ_mean_V_mAh,dVdQ_std_V_mAh,dVdQ_min_V_mAh,dVdQ_max_V_mAh,dVdQ_skewness,...,current_skewness_roll_std_5,current_skewness_diff_5,current_skewness_roll_mean_10,current_skewness_roll_std_10,current_skewness_diff_10,current_kurtosis_roll_mean_3,current_kurtosis_roll_std_3,current_kurtosis_diff_3,current_kurtosis_roll_mean_5,current_kurtosis_roll_std_5,current_kurtosis_diff_5,current_kurtosis_roll_mean_10,current_kurtosis_roll_std_10,current_kurtosis_diff_10,temp_skewness_roll_mean_3,temp_skewness_roll_std_3,temp_skewness_diff_3,temp_skewness_roll_mean_5,temp_skewness_roll_std_5,temp_skewness_diff_5,temp_skewness_roll_mean_10,temp_skewness_roll_std_10,temp_skewness_diff_10,temp_kurtosis_roll_mean_3,temp_kurtosis_roll_std_3,temp_kurtosis_diff_3,temp_kurtosis_roll_mean_5,temp_kurtosis_roll_std_5,temp_kurtosis_diff_5,temp_kurtosis_roll_mean_10,temp_kurtosis_roll_std_10,temp_kurtosis_diff_10,dVdQ_mean_V_mAh_roll_mean_3,dVdQ_mean_V_mAh_roll_std_3,dVdQ_mean_V_mAh_diff_3,dVdQ_mean_V_mAh_roll_mean_5,dVdQ_mean_V_mAh_roll_std_5,dVdQ_mean_V_mAh_diff_5,dVdQ_mean_V_mAh_roll_mean_10,dVdQ_mean_V_mAh_roll_std_10,dVdQ_mean_V_mAh_diff_10,V_slope_seg2_V_s_roll_mean_3,V_slope_seg2_V_s_roll_std_3,V_slope_seg2_V_s_diff_3,V_slope_seg2_V_s_roll_mean_5,V_slope_seg2_V_s_roll_std_5,V_slope_seg2_V_s_diff_5,V_slope_seg2_V_s_roll_mean_10,V_slope_seg2_V_s_roll_std_10,V_slope_seg2_V_s_diff_10
0,battery00,0,True,3502.292,2.452221,17.799597,2.520635,7.259734,8.329,4.814,3.515,18.299208,25.917209,23.155,29.161,6.006,29.161,0.014277,0.583853,0.340884,-0.586329,0.44892,6.561,6.851,7.281,7.747,7.989,0.000584,3.407039e-07,0.778159,2.500026,2.52,2.52,2.521,2.521,2.521,1.053858,1.110617,-0.074045,0.197747,24.3624,25.35,26.04,26.538,27.179,-0.001445,0.003581,-0.039693,0.001521,-6.611756,...,,,0.778159,,,2.500026,,,2.500026,,,2.500026,,,-0.074045,,,-0.074045,,,-0.074045,,,0.197747,,,0.197747,,,0.197747,,,-0.001445,,,-0.001445,,,-0.001445,,,-0.000516,,,-0.000516,,,-0.000516,,
1,battery00,1,False,537.423,2.432716,15.685112,16.281695,6.451526,8.657,4.921,3.736,104.977693,64.833192,22.35,94.618,72.268,94.618,0.107324,0.485558,0.235767,-0.315447,0.235715,5.8378,6.106,6.472,6.8815,7.031,0.66216,0.4384563,-23.621412,556.647479,16.286718,16.298038,16.307927,16.324139,16.336095,19.297919,372.409687,-0.366493,-0.974483,36.1252,49.9145,67.513,82.3365,88.9124,-0.002052,0.025265,-0.598146,0.004871,-23.369586,...,17.253102,,-11.421627,17.253102,,279.573753,391.841422,,279.573753,391.841422,,279.573753,391.841422,,-0.220269,0.206792,,-0.220269,0.206792,,-0.220269,0.206792,,-0.388368,0.828891,,-0.388368,0.828891,,-0.388368,0.828891,,-0.001748,0.000429,,-0.001748,0.000429,,-0.001748,0.000429,,-0.001757,0.001755,,-0.001757,0.001755,,-0.001757,0.001755,
2,battery00,2,False,539.307,2.440109,15.797369,16.274049,6.476988,8.703,4.914,3.789,105.341179,67.258282,26.872,96.305,69.433,96.305,0.100823,0.508305,0.258374,-0.233617,0.0799,5.8464,6.1135,6.485,6.91175,7.1062,0.663641,0.4404188,-23.549647,553.389937,16.27728,16.287931,16.301396,16.31907,16.32965,18.547374,344.005092,-0.308959,-0.997889,40.4253,52.777,69.2865,82.449,91.7131,-0.002028,0.023328,-0.550627,0.004129,-23.265318,...,14.066428,,-15.4643,14.066428,,370.845814,319.000968,,370.845814,319.000968,,370.845814,319.000968,,-0.249833,0.15493,,-0.249833,0.15493,,-0.249833,0.15493,,-0.591542,0.683644,,-0.591542,0.683644,,-0.591542,0.683644,,-0.001841,0.000344,,-0.001841,0.000344,,-0.001841,0.000344,,-0.002192,0.001452,,-0.002192,0.001452,,-0.002192,0.001452,
3,battery00,3,False,538.141,2.435604,15.759755,16.278854,6.47588,8.694,4.916,3.778,105.35212,67.717884,27.179,97.005,69.826,97.022,0.100951,0.505258,0.255285,-0.225844,0.095573,5.8439,6.10925,6.4855,6.91,7.0989,0.6748,0.4553547,-23.170907,535.594586,16.282823,16.294834,16.306894,16.32176,16.33786,18.648074,347.750666,-0.303099,-1.00203,40.6002,53.14275,69.6505,84.402,91.1269,-0.002014,0.022954,-0.533127,0.003778,-22.899129,...,12.114353,,-17.390952,12.114353,,548.544001,11.332184,533.09456,412.033007,273.178729,,412.033007,273.178729,,-0.326184,0.035032,-0.229053,-0.263149,0.129273,,-0.263149,0.129273,,-0.991467,0.014854,-1.199777,-0.694164,0.594731,,-0.694164,0.594731,,-0.002031,1.9e-05,-0.000569,-0.001885,0.000294,,-0.001885,0.000294,,-0.003044,3.9e-05,-0.002554,-0.002412,0.001264,,-0.002412,0.001264,
4,battery00,4,False,493.872,2.42177,15.352919,17.635017,6.332612,8.632,4.924,3.708,111.778393,70.938025,23.201,102.171,78.97,102.308,0.107995,0.443397,0.196601,-0.050101,1.108483,5.8191,6.018,6.332,6.6195,6.9573,1.359718,1.848834,-3.535282,45.368936,16.32404,16.523991,17.0787,18.865229,19.346378,21.64121,468.341988,-0.534372,-0.897791,37.4003,53.79825,76.752,89.383,95.535,-0.002149,0.026935,-0.609652,0.03655,-22.190629,...,12.184584,,-14.619818,12.184584,,378.11782,288.306318,-511.278543,338.700193,287.851475,,338.700193,287.851475,,-0.382143,0.131866,-0.167879,-0.317394,0.165064,,-0.317394,0.165064,,-0.965904,0.059023,0.076692,-0.734889,0.523041,,-0.734889,0.523041,,-0.002064,7.4e-05,-9.8e-05,-0.001938,0.00028,,-0.001938,0.00028,,-0.00277,0.000513,0.00082,-0.002365,0.0011,,-0.002365,0.0011,
5,battery00,5,False,533.243,2.415025,15.578081,16.289831,6.453694,8.664,4.93,3.734,105.064583,67.67809,25.923,97.005,71.082,97.005,0.103036,0.491889,0.241955,-0.260488,0.179526,5.8385,6.1025,6.4745,6.87475,7.0506,0.665654,0.4430948,-23.512076,551.545511,16.292339,16.305164,16.316541,16.332861,16.346027,19.091942,364.50226,-0.327952,-0.990085,39.9475,52.39625,69.87,84.746,91.9955,-0.002043,0.024128,-0.568899,0.003485,-23.280964,...,8.913862,-24.290235,-16.101861,11.486948,,377.503011,287.747095,-1.844425,448.50929,225.508467,549.045485,374.174413,271.730195,,-0.388474,0.126961,-0.018993,-0.368175,0.096155,-0.253907,-0.319153,0.1477,,-0.963302,0.057048,0.007804,-0.972456,0.043044,-1.187832,-0.777422,0.479282,,-0.002069,7.1e-05,-1.5e-05,-0.002057,5.4e-05,-0.000598,-0.001955,0.000255,,-0.002755,0.0005,4.6e-05,-0.002865,0.000385,-0.002501,-0.002473,0.001019,
6,battery00,6,False,533.946,2.419074,15.632017,16.29566,6.466291,8.674,4.92,3.754,105.307397,68.703045,27.404,98.856,71.452,98.856,0.1016,0.500178,0.250178,-0.251527,0.115844,5.839,6.105,6.478,6.895,7.0818,0.668898,0.447425,-23.407173,546.596583,16.301457,16.312864,16.322117,16.337035,16.350502,19.07534,363.868615,-0.277896,-1.000629,41.3402,53.555,69.935,86.334,93.3524,-0.002026,0.023466,-0.550771,0.004088,-23.166994,...,8.889447,0.214239,-17.145477,10.843535,,381.170344,290.823077,11.001997,446.499111,224.345336,-10.050896,398.806151,256.472494,,-0.380073,0.13595,0.025203,-0.350455,0.104357,0.088597,-0.313259,0.13573,,-0.962835,0.056576,0.001402,-0.977685,0.0449,-0.026146,-0.809309,0.445582,,-0.002073,6.7e-05,-1.2e-05,-0.002052,5.5e-05,2.6e-05,-0.001965,0.000234,,-0.00276,0.000505,-1.6e-05,-0.002882,0.000395,-8.8e-05,-0.002561,0.000959,
7,battery00,7,False,532.485,2.413071,15.584998,16.299306,6.461557,8.667,4.925,3.742,105.252368,68.59887,27.416,98.694,71.278,98.694,0.101557,0.499485,0.249485,-0.240696,0.119704,5.8368,6.103,6.475,6.888,7.0734,0.677661,0.4592239,-23.10652,532.606535,16.303472,16.316324,16.328439,16.342661,16.355434,19.090183,364.435087,-0.288157,-1.013631,40.8224,53.303,70.529,84.0395,93.1644,-0.002044,0.023572,-0.546022,0.004441,-22.845508,...,8.840243,0.443127,-17.890607,10.257999,,543.582877,9.822577,487.237599,442.34243,222.05054,-20.783402,415.531199,242.113777,,-0.298002,0.02644,0.246214,-0.346295,0.10681,0.020802,-0.310122,0.125975,,-1.001448,0.011794,-0.11584,-0.980833,0.047166,-0.015742,-0.834849,0.418806,,-0.002038,1e-05,0.000105,-0.002055,5.4e-05,-1.7e-05,-0.001975,0.000218,,-0.00306,3.8e-05,-0.000899,-0.002885,0.000396,-1.4e-05,-0.002625,0.000906,
8,battery00,8,False,530.997,2.40686,15.537347,16.302917,6.45848,8.661,4.942,3.719,105.224819,68.836159,27.373,99.037,71.664,99.037,0.101871,0.496723,0.246734,-0.235451,0.122391,5.8362,6.097,6.468,6.884,7.0652,0.680355,0.4628825,-23.024809,528.761618,16.306947,16.321246,16.331949,16.346903,16.358272,19.285546,371.932302,-0.290441,-1.03025,40.973,53.4425,70.82,85.326,93.44,-0.002034,0.023482,-0.542132,0.00779,-22.786091,...,8.824669,0.146098,-18.461074,9.746903,,535.988245,9.386079,-22.783894,440.975837,221.353079,-6.832969,428.112357,229.60023,,-0.285498,0.006682,0.037512,-0.343763,0.108231,0.012658,-0.307935,0.118021,,-1.014837,0.014848,-0.040165,-0.986477,0.051798,-0.02822,-0.85656,0.397135,,-0.002035,9e-06,8e-06,-0.002059,5.1e-05,-2e-05,-0.001982,0.000205,,-0.003078,7e-06,-5.5e-05,-0.002886,0.000397,-1e-06,-0.002675,0.00086,
9,battery00,9,False,529.941,2.402294,15.497477,16.304387,6.454008,8.654,4.932,3.722,105.160951,68.801629,27.408,98.64,71.232,98.64,0.104272,0.493977,0.244014,-0.244253,0.175672,5.8349,6.10075,6.467,6.875,7.057,0.684963,0.4691738,-22.868798,521.654658,16.306157,16.323347,16.334782,16.349047,16.361146,19.073454,363.796641,-0.344551,-1.017824,40.7675,53.6295,71.5215,85.59475,91.9258,-0.002024,0.02305,-0.52879,0.003557,-22.648142,...,0.268392,-19.333516,-18.901846,9.294575,,527.67427,5.556316,-24.941925,536.232981,12.483767,476.285722,437.466587,218.480935,,-0.307716,0.03192,-0.066655,-0.305799,0.028802,0.189821,-0.311596,0.111872,,-1.020569,0.008643,-0.017195,-1.010484,0.015554,-0.120033,-0.872687,0.377879,,-0.002034,1e-05,2e-06,-0.002034,9e-06,0.000125,-0.001986,0.000194,,-0.003071,6e-06,2.1e-05,-0.003063,2.7e-05,-0.000887,-0.002714,0.000821,



Checking new statistical features:
  Feature 'voltage_std_V': Present (Non-NaN: 8220 / 8220)
  Feature 'voltage_skewness': Present (Non-NaN: 8220 / 8220)
  Feature 'voltage_kurtosis': Present (Non-NaN: 8220 / 8220)
  Feature 'voltage_p50_V': Present (Non-NaN: 8220 / 8220)
  Feature 'current_std_A': Present (Non-NaN: 8220 / 8220)
  Feature 'current_skewness': Present (Non-NaN: 8220 / 8220)
  Feature 'current_kurtosis': Present (Non-NaN: 8220 / 8220)
  Feature 'current_p50_A': Present (Non-NaN: 8220 / 8220)
  Feature 'temp_std_C': Present (Non-NaN: 8220 / 8220)
  Feature 'temp_skewness': Present (Non-NaN: 8219 / 8220)
  Feature 'temp_kurtosis': Present (Non-NaN: 8219 / 8220)
  Feature 'temp_p50_C': Present (Non-NaN: 8220 / 8220)


In [4]:
# --- Cell 4: Save Simplified Processed DataFrame ---
if not master_df_simplified.empty:
    output_filename = f"New_Features_Added"
    if SINGLE_BATTERY_TO_PROCESS:
        sanitized_id = SINGLE_BATTERY_TO_PROCESS.replace('.', '_').replace('/', '_')
        output_filename += f"_{sanitized_id}"
    else:
        output_filename += "_ALL"
    output_filename += ".csv"
    try:
        master_df_simplified.to_csv(output_filename, index=False)
        print(f"\nSaved simplified data to: {output_filename} (Shape: {master_df_simplified.shape})")
    except Exception as e:
        print(f"\nError saving simplified DataFrame: {e}")
else:
    print("\nMaster DataFrame (Simplified) is empty. Nothing to save.")


Saved simplified data to: New_Features_Added_ALL.csv (Shape: (8220, 200))


In [13]:
# Master_df_simplified columns
print("\n--- Master DataFrame (Simplified) Columns ---")
print(master_df_simplified.columns.tolist())


--- Master DataFrame (Simplified) Columns ---
['battery_id', 'cycle_number', 'is_reference_cycle', 'discharge_duration_s', 'capacity_Ah', 'energy_Wh', 'avg_current_A', 'avg_voltage_V', 'start_voltage_V', 'end_voltage_V', 'delta_voltage_V', 'avg_power_W', 'avg_temp_C', 'start_temp_C', 'end_temp_C', 'delta_temp_C', 'max_temp_C', 'internal_resistance_ohm', 'voltage_std_V', 'voltage_variance_V2', 'voltage_skewness', 'voltage_kurtosis', 'voltage_p10_V', 'voltage_p25_V', 'voltage_p50_V', 'voltage_p75_V', 'voltage_p90_V', 'current_std_A', 'current_variance_A2', 'current_skewness', 'current_kurtosis', 'current_p10_A', 'current_p25_A', 'current_p50_A', 'current_p75_A', 'current_p90_A', 'temp_std_C', 'temp_variance_C2', 'temp_skewness', 'temp_kurtosis', 'temp_p10_C', 'temp_p25_C', 'temp_p50_C', 'temp_p75_C', 'temp_p90_C', 'dVdQ_mean_V_mAh', 'dVdQ_std_V_mAh', 'dVdQ_min_V_mAh', 'dVdQ_max_V_mAh', 'dVdQ_skewness', 'dVdQ_kurtosis', 'V_slope_seg1_V_s', 'V_slope_seg2_V_s', 'V_slope_seg3_V_s', 'q_initi

In [12]:
import platform


platform.machine()

'arm64'