In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/split-soh/canonical_checkpoint_features_with_soh_and_split.parquet
/kaggle/input/b-18-series/B0018_numeric_raw.csv


In [2]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy.stats import spearmanr


In [3]:
DATA_DIR = "/kaggle/input/split-soh"
FILE_NAME = os.listdir(DATA_DIR)[0]

df = pd.read_parquet(f"{DATA_DIR}/{FILE_NAME}")

df = df.sort_values(["asset_id", "cycle_id"]).reset_index(drop=True)
print(df.shape)


(118770, 27)


In [4]:
soh_df = df[df["split"] == "test"][[
    "cycle_id", "SOH_proxy"
]].copy()

soh_df = soh_df.sort_values("cycle_id").reset_index(drop=True)
soh_df.head()


Unnamed: 0,cycle_id,SOH_proxy
0,44,0.500859
1,44,0.51181
2,44,0.524252
3,45,0.521471
4,45,0.503095


In [5]:
print(eis_df.columns.tolist())
print(eis_df.shape)


NameError: name 'eis_df' is not defined

In [None]:
EIS_DIR = "/kaggle/input/b-18-series"
EIS_FILE = os.listdir(EIS_DIR)[0]

eis_df = pd.read_csv(f"{EIS_DIR}/{EIS_FILE}")
eis_df.head()


In [None]:
# Assume last 5 rows = lowest frequency region
low_freq_block = eis_df.tail(5)

eis_severity_value = low_freq_block["Rectified_Impedance_real"].mean()

print("EIS severity:", eis_severity_value)


In [None]:
print(eis_df.shape)
print(eis_df.isna().sum())
print(eis_df.dtypes)


In [None]:
low_freq_block = eis_df.tail(5)
low_freq_block


In [None]:
# Force numeric conversion
eis_df["Rectified_Impedance_real"] = pd.to_numeric(
    eis_df["Rectified_Impedance_real"],
    errors="coerce"
)

# Recompute low-frequency block
low_freq_block = eis_df.tail(5)

# Drop NaNs explicitly
low_freq_block = low_freq_block.dropna(
    subset=["Rectified_Impedance_real"]
)

eis_severity_value = low_freq_block["Rectified_Impedance_real"].mean()

print("EIS severity:", eis_severity_value)


In [None]:
# Take last 20% of sweep as low-frequency region
n = len(eis_df)
low_freq_block = eis_df.iloc[int(0.8 * n):]

low_freq_block


In [None]:
eis_severity_value = low_freq_block["Battery_impedance_real"].mean()

print("EIS severity (Battery_impedance_real):", eis_severity_value)


In [None]:
# 1. Check EIS severity validity
assert not np.isnan(eis_severity_value), "EIS severity is NaN ❌"
assert eis_severity_value > 0, "EIS severity must be positive ❌"

# 2. Check impedance trend (aging should increase impedance)
impedance_tail = eis_df["Battery_impedance_real"].iloc[int(0.8*len(eis_df)):]
assert impedance_tail.is_monotonic_increasing or impedance_tail.mean() > eis_df["Battery_impedance_real"].mean(), \
    "Impedance does not show degradation trend ❌"

# 3. Check SOH range sanity
assert soh_df["SOH_proxy"].between(0,1).all(), "SOH out of bounds ❌"

# 4. Physics consistency check
mean_soh = soh_df["SOH_proxy"].mean()
print("Mean SOH near EIS:", mean_soh)
print("EIS severity:", eis_severity_value)

assert mean_soh < 0.7 and eis_severity_value > 0.15, \
    "SOH–EIS relationship not physically consistent ❌"

print("✅ Phase 6 sanity check PASSED — physics consistent")
