In [2]:
import pandas as pd

# Load results
df = pd.read_csv("result/mse_engression_losses.csv")

# Filter large MSE cases
bad = df[df["mse"] > 1].copy()

print(f"Total rows with MSE > 1: {len(bad)}")

# Summarize how often each setting causes large error
summary = (
    bad.groupby(["model", "dx", "dy", "k", "h", "noise_dist", "noise_std", "loss_phi"])
        .size()
        .reset_index(name="count")
        .sort_values("count", ascending=False)
)
print(summary)



Total rows with MSE > 1: 224
      model  dx  dy  k       h noise_dist  noise_std loss_phi  count
74   preANM   2   2  2   cubic   gaussian        2.0  power_1      8
53   preANM   2   2  1   cubic   gaussian        2.0  power_1      7
84   preANM   2   2  2   cubic    uniform        2.0  power_1      6
14  postANM   2   2  1   cubic    uniform        2.0  power_1      5
79   preANM   2   2  2   cubic    uniform        1.0  power_1      5
..      ...  ..  .. ..     ...        ...        ...      ...    ...
67   preANM   2   2  1  square    uniform        1.0  power_1      1
33   preANM   1   1  1   cubic   gaussian        2.0    log1p      1
32   preANM   1   1  1   cubic   gaussian        2.0     frac      1
31   preANM   1   1  1   cubic   gaussian        2.0      exp      1
46   preANM   1   1  1  square   gaussian        1.0  power_1      1

[93 rows x 9 columns]


In [3]:
# Exclude 'power_1' from the summary
bad = bad[bad["loss_phi"] != "power_1"]

print(f"Total rows with MSE > 1 (excluding 'power_1'): {len(bad)}")

# Summarize how often each setting causes large error
summary = (
    bad.groupby(["model", "dx", "dy", "k", "h", "noise_dist", "noise_std", "loss_phi"])
        .size()
        .reset_index(name="count")
        .sort_values("count", ascending=False)
)

print(summary.head(20))

# Optional: save full list
summary.to_csv("result/high_mse_cases.csv", index=False)
print("Saved: result/high_mse_cases.csv")


Total rows with MSE > 1 (excluding 'power_1'): 62
     model  dx  dy  k      h noise_dist  noise_std   loss_phi  count
17  preANM   2   2  1  cubic    uniform        2.0        exp      3
29  preANM   2   2  2  cubic    uniform        2.0        exp      3
19  preANM   2   2  1  cubic    uniform        2.0      log1p      3
23  preANM   2   2  2  cubic   gaussian        2.0      log1p      3
11  preANM   2   2  1  cubic   gaussian        2.0        exp      3
21  preANM   2   2  2  cubic   gaussian        2.0        exp      3
20  preANM   2   2  1  cubic    uniform        2.0  power_0.5      3
31  preANM   2   2  2  cubic    uniform        2.0      log1p      3
32  preANM   2   2  2  cubic    uniform        2.0  power_0.5      3
28  preANM   2   2  2  cubic    uniform        1.0  power_0.5      2
27  preANM   2   2  2  cubic    uniform        1.0      log1p      2
9   preANM   1   1  1  cubic    uniform        2.0      log1p      2
30  preANM   2   2  2  cubic    uniform        2.0   

In [4]:


# Filter large MSE cases
bad = df[df["mse"] > 1].copy()

# Exclude 'power_1' from the summary
bad = bad[bad["loss_phi"] != "power_1"]

print(f"Total rows with MSE > 1 (excluding 'power_1'): {len(bad)}")

# Summarize: how often and how large the errors are
summary = (
    bad.groupby(["model", "dx", "dy", "k", "h", "noise_dist", "noise_std", "loss_phi"])
        .agg(
            count=("mse", "size"),        # number of bad replications
            mse_mean=("mse", "mean"),     # average MSE among bad ones
            mse_max=("mse", "max"),       # worst (largest) MSE
            mse_min=("mse", "min")        # smallest MSE above 1
        )
        .reset_index()
        .sort_values("count", ascending=False)
)

# Print top rows
print(summary.head(20).to_string(index=False))

# Save full table
summary.to_csv("result/high_mse_cases.csv", index=False)
print("Saved: result/high_mse_cases.csv")


Total rows with MSE > 1 (excluding 'power_1'): 62
 model  dx  dy  k     h noise_dist  noise_std  loss_phi  count  mse_mean   mse_max  mse_min
preANM   2   2  1 cubic    uniform        2.0       exp      3  7.141188 17.359615 1.562499
preANM   2   2  2 cubic    uniform        2.0       exp      3  8.879680 19.355223 1.362544
preANM   2   2  1 cubic    uniform        2.0     log1p      3 12.021448 30.465862 1.423477
preANM   2   2  2 cubic   gaussian        2.0     log1p      3  2.248705  3.316519 1.021222
preANM   2   2  1 cubic   gaussian        2.0       exp      3  1.993759  2.972634 1.342486
preANM   2   2  2 cubic   gaussian        2.0       exp      3  3.665657  4.662938 1.707406
preANM   2   2  1 cubic    uniform        2.0 power_0.5      3  8.258326 21.899113 1.399566
preANM   2   2  2 cubic    uniform        2.0     log1p      3 18.495193 33.600685 2.617011
preANM   2   2  2 cubic    uniform        2.0 power_0.5      3 22.038718 49.859886 2.169361
preANM   2   2  2 cubic    uni