In [1]:
from tensorflow import keras
import pandas as pd
import shap
import numpy as np

# ==========================
# paths
# ==========================
model_path = r"exp\keras\2025_12_31_00_06_25\e256__ep20__oa_sigmoid__779994\model"
data_path  = r"exp\keras\2025_12_31_00_06_25\e256__ep20__oa_sigmoid__779994\normalized_df-e256__ep20__oa_sigmoid__779994.csv"

# ==========================
# load
# ==========================
model = keras.models.load_model(model_path)
df = pd.read_csv(data_path)

# ==========================
# numeric features only
# ==========================
drop_cols = ["itemId","didBuy_target","date","source","item"]
numeric_feature_cols = [c for c in df.columns if c not in drop_cols]

X_num = df[numeric_feature_cols].astype(float).values
X_id  = df["itemId"].astype(int).values.reshape(-1,1)

print("numeric_feature_count:", len(numeric_feature_cols))

# ==========================
# background + samples
# ==========================
numeric_background = X_num[:200]
numeric_samples    = X_num[:100]   # <= change freely

print("numeric_samples shape:", numeric_samples.shape)

# ==========================
# wrapper
# ==========================
def model_numeric_only(x):
    fixed_ids = X_id[:len(x)]
    return model([x, fixed_ids]).numpy()

# ==========================
# SHAP compute
# ==========================
explainer = shap.KernelExplainer(model_numeric_only, numeric_background)
raw_sv    = explainer.shap_values(numeric_samples, nsamples=200)[0]

print("raw_sv shape:", raw_sv.shape)

# ==========================
# normalize shapes
# ==========================
sv = raw_sv
n_samples  = numeric_samples.shape[0]
n_features = len(numeric_feature_cols)

# --- trim bias
if sv.shape[1] == n_features + 1:
    sv = sv[:, :-1]
if sv.shape[0] == n_features + 1:
    sv = sv[:-1, :]

# --- handle collapses
# case 1: (n_features,1) => broadcast to (n_samples,n_features)
if sv.shape == (n_features, 1):
    sv = np.repeat(sv.T, n_samples, axis=0)

# case 2: transpose if needed
elif sv.shape == (n_features, n_samples):
    sv = sv.T

# case 3: already correct
elif sv.shape == (n_samples, n_features):
    pass

else:
    raise RuntimeError(f"Unhandled SHAP shape: {sv.shape}")

print("final sv shape:", sv.shape, "expected:", numeric_samples.shape)
assert sv.shape == numeric_samples.shape

# ==========================
# plot
# ==========================
shap.summary_plot(sv, numeric_samples, feature_names=numeric_feature_cols)


numeric_feature_count: 39
numeric_samples shape: (100, 39)
Provided model function fails when applied to the provided data set.


ValueError: Input 0 of layer "model_7" is incompatible with the layer: expected shape=(None, 38), found shape=(200, 39)

In [3]:
from tensorflow import keras
import pandas as pd
import numpy as np

# ===== paths =====
model_path = r"exp\keras\2025_12_29_12_14_33\e32__l1024-1024-512-256-64-8__ep15__oa_sigmoid__214212\model"
data_path  = r"exp\keras\2025_12_29_12_14_33\e32__l1024-1024-512-256-64-8__ep15__oa_sigmoid__214212\normalized_df-e32__l1024-1024-512-256-64-8__ep15__oa_sigmoid__214212.csv"

# ===== load =====
model = keras.models.load_model(model_path)
df = pd.read_csv(data_path)

# ===== extract embedding layer =====
emb_layer = None
for layer in model.layers:
    if "Embedding" in layer.__class__.__name__:
        emb_layer = layer
        break

if emb_layer is None:
    raise RuntimeError("No embedding layer found")

emb_matrix = emb_layer.get_weights()[0]  # (num_items, embed_dim)

print("embedding matrix shape:", emb_matrix.shape)

# ===== compute per-item norm =====
item_strength = np.linalg.norm(emb_matrix, axis=1)  # L2 norm

# ===== map back to item names =====
# we assume df["itemId"] aligns with embedding index
id_to_item = df.drop_duplicates("itemId").set_index("itemId")["item"]

results = pd.DataFrame({
    "itemId": range(len(item_strength)),
    "embedding_strength": item_strength
}).merge(id_to_item, on="itemId", how="left").sort_values("embedding_strength", ascending=False)

print(results.head(25))


embedding matrix shape: (730, 32)
     itemId  embedding_strength  \
639     639            3.851353   
374     374            3.128069   
427     427            3.085204   
340     340            2.982383   
447     447            2.960709   
561     561            2.950172   
710     710            2.942443   
26       26            2.910446   
562     562            2.896022   
605     605            2.868653   
662     662            2.868228   
22       22            2.863777   
239     239            2.859609   
3         3            2.854246   
421     421            2.816644   
480     480            2.780931   
77       77            2.776706   
123     123            2.748934   
403     403            2.716920   
314     314            2.684021   
530     530            2.682203   
83       83            2.681296   
426     426            2.645577   
315     315            2.630602   
64       64            2.621546   

                                                  item 

Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\ProgramData\miniconda3\envs\grocery-ml\lib\asyncio\events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "C:\ProgramData\miniconda3\envs\grocery-ml\lib\asyncio\selector_events.py", line 115, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionResetError: [WinError 10054] An existing connection was forcibly closed by the remote host
Exception in callback BaseSelectorEventLoop._read_from_self()
handle: <Handle BaseSelectorEventLoop._read_from_self()>
Traceback (most recent call last):
  File "C:\ProgramData\miniconda3\envs\grocery-ml\lib\asyncio\events.py", line 80, in _run
    self._context.run(self._callback, *self._args)
  File "C:\ProgramData\miniconda3\envs\grocery-ml\lib\asyncio\selector_events.py", line 115, in _read_from_self
    data = self._ssock.recv(4096)
ConnectionReset