In [1]:
import os
import joblib
import torch
import torch.nn as nn
import pandas as pd
import numpy as np

print("CUDA available:", torch.cuda.is_available())


CUDA available: False


In [2]:
for root, dirs, files in os.walk("/kaggle/input"):
    print(root)
    for f in files:
        print("  ", f)


/kaggle/input
/kaggle/input/baseline-xgb-soh-model-jobl
/kaggle/input/baseline-xgb-soh-model-jobl/other
/kaggle/input/baseline-xgb-soh-model-jobl/other/default
/kaggle/input/baseline-xgb-soh-model-jobl/other/default/1
   lstm_scaler.joblib
   soh_lstm_model.pt
   baseline_xgb_soh_model.joblib
/kaggle/input/split-data
   canonical_checkpoint_features_with_soh_and_split.parquet


In [3]:

df = pd.read_parquet("/kaggle/input/split-data/canonical_checkpoint_features_with_soh_and_split.parquet")

df = df.sort_values(["asset_id", "cycle_id"]).reset_index(drop=True)
print(df.shape)


(118770, 27)


In [4]:
FEATURES = [
    "V_mean","V_std","V_min","V_max","V_range",
    "dV_dt_mean","dV_dt_max",
    "T_mean","T_max","T_delta",
    "duration_s"
]

TARGET = "SOH_proxy"
WINDOW = 20


In [5]:
df_val = df[df["split"] == "val"].copy()
print("Validation rows:", len(df_val))


Validation rows: 17816


In [6]:
X_tab = df_val[FEATURES]
soh_xgb = xgb_model.predict(X_tab)


NameError: name 'xgb_model' is not defined

In [None]:
feature_medians = df[df["split"] == "train"][FEATURES].median()


In [None]:
def lstm_predict(df, window, medians, scaler, model):
    preds, targets = [], []

    for asset_id, g in df.groupby("asset_id"):
        g = g.reset_index(drop=True)

        if len(g) < window:
            continue

        X = g[FEATURES].fillna(medians).values.astype("float32")
        y = g[TARGET].values.astype("float32")

        X2d = scaler.transform(X)
        X_seq = X2d.reshape(len(X), -1)

        for i in range(len(g) - window + 1):
            x = torch.tensor(X_seq[i:i+window]).unsqueeze(0)

            with torch.no_grad():
                pred = model(x).item()

            preds.append(pred)
            targets.append(y[i+window-1])

    return np.array(preds), np.array(targets)


In [None]:
soh_lstm, soh_true = lstm_predict(
    df_val, WINDOW, feature_medians, lstm_scaler, lstm
)

print(soh_lstm.shape, soh_true.shape)


In [None]:
soh_xgb_aligned = soh_xgb[-len(soh_lstm):]


In [None]:
X_meta = np.column_stack([soh_xgb_aligned, soh_lstm])
y_meta = soh_true


In [None]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error

meta_model = Ridge(alpha=1.0)
meta_model.fit(X_meta, y_meta)


In [None]:
mse_xgb  = mean_squared_error(y_meta, X_meta[:,0])
mse_lstm = mean_squared_error(y_meta, X_meta[:,1])
mse_meta = mean_squared_error(y_meta, meta_model.predict(X_meta))

print("XGB MSE :", mse_xgb)
print("LSTM MSE:", mse_lstm)
print("META MSE:", mse_meta)


In [None]:
joblib.dump(meta_model, "meta_soh_model.pkl")
