In [199]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from artemis.interactions_methods.model_agnostic import FriedmanHStatisticMethod, GreenwellMethod, SejongOhMethod
from tqdm import tqdm
from sklearn.linear_model import LinearRegression
from artemis.utilities.domain import InteractionMethod
from sklearn.neural_network import MLPRegressor
from artemis.additivity import AdditivityMeter

$$
Y = B_1 * X_1 + B_2 * X2 + B_3 * X_3 + B4 (X1 * X2) + \(\epsilon\)
$$

In [182]:
N = 100
lower = -5
upper = 5

betas = np.array([3, -7, 10, 2])

X = pd.DataFrame(np.random.uniform(lower, upper, size=(N, 3)), columns=["x1", "x2", "x3"])
eps = np.random.uniform(size=(N,))
y = X.apply(lambda row: np.dot(np.append(row, row["x1"]*row["x2"]), betas), axis=1)
y = y + eps


In [191]:
models = [("linear", LinearRegression()), ("random_forest", RandomForestRegressor()), ("neural_network", MLPRegressor(hidden_layer_sizes=(5, 2), max_iter=20000))]

In [192]:
for name, model in models:
    model.fit(X, y)

In [193]:
result = list()
for name, model in tqdm(models):
    methods_model_agnostic = [FriedmanHStatisticMethod(), GreenwellMethod(), SejongOhMethod()]

    for method in tqdm(methods_model_agnostic):

        if method.method == InteractionMethod.PERFORMANCE_BASED:
            method.fit(model, X, y_true=y)
        else:
            method.fit(model, X)

        result.append({"model": name, "method": method.method, "ovo": method.ovo})


  0%|          | 0/3 [00:00<?, ?it/s]
  0%|          | 0/3 [00:00<?, ?it/s][A
 33%|███▎      | 1/3 [00:00<00:01,  1.54it/s][A
 67%|██████▋   | 2/3 [00:21<00:12, 12.41s/it][A
100%|██████████| 3/3 [00:21<00:00,  7.16s/it][A
 33%|███▎      | 1/3 [00:21<00:42, 21.48s/it]
  0%|          | 0/3 [00:00<?, ?it/s][A
 33%|███▎      | 1/3 [00:01<00:02,  1.00s/it][A
 67%|██████▋   | 2/3 [00:38<00:22, 22.35s/it][A
100%|██████████| 3/3 [00:39<00:00, 13.21s/it][A
 67%|██████▋   | 2/3 [01:01<00:32, 32.16s/it]
  0%|          | 0/3 [00:00<?, ?it/s][A
 33%|███▎      | 1/3 [00:00<00:01,  1.62it/s][A
 67%|██████▋   | 2/3 [00:21<00:12, 12.78s/it][A
100%|██████████| 3/3 [00:22<00:00,  7.37s/it][A
100%|██████████| 3/3 [01:23<00:00, 27.75s/it]


In [194]:
result = pd.DataFrame.from_records(result)

Due to its nature, linear model is unable to detect feature interactions.
Therefore, as a sanity-check of interaction method correctness, we check if all methods have interaction values that are close to 0.

In [195]:
THRESHOLD = 10**(-6)

In [196]:
for method in [InteractionMethod.H_STATISTIC, InteractionMethod.VARIABLE_INTERACTION, InteractionMethod.PERFORMANCE_BASED]:

    print(f"\n\nMethod: {method}")
    df = result.loc[(result["model"] == "linear") & (result["method"] == method), "ovo"].iloc[0].copy()
    df["is_below_threshold"] = np.abs(df[method]) < THRESHOLD
    print(df[["Feature 1", "Feature 2", "is_below_threshold"]])
    print("-"*50)



Method: Friedman H-statistic Interaction Measure
  Feature 1 Feature 2  is_below_threshold
0        x1        x2                True
1        x1        x3                True
2        x2        x3                True
--------------------------------------------------


Method: Greenwell Variable Interaction Measure
  Feature 1 Feature 2  is_below_threshold
0        x1        x2                True
1        x2        x3                True
2        x1        x3                True
--------------------------------------------------


Method: Sejong Oh Performance Based Interaction Measure
  Feature 1 Feature 2  is_below_threshold
0        x2        x3               False
1        x1        x2               False
2        x1        x3               False
--------------------------------------------------


As we can see, `Sejong Oh Performance Based Interaction` falsely detects interactions in the linear model. It suggests its high limitations.

In [197]:
for method in [InteractionMethod.H_STATISTIC, InteractionMethod.VARIABLE_INTERACTION, InteractionMethod.PERFORMANCE_BASED]:

    print(f"\n\nMethod: {method}")
    df = result.loc[(result["model"] == "random_forest") & (result["method"] == method), "ovo"].iloc[0].copy()
    print(df)
    print("-"*50)



Method: Friedman H-statistic Interaction Measure
  Feature 1 Feature 2  Friedman H-statistic Interaction Measure
0        x1        x2                                  0.167411
1        x2        x3                                  0.010174
2        x1        x3                                  0.003444
--------------------------------------------------


Method: Greenwell Variable Interaction Measure
  Feature 1 Feature 2  Greenwell Variable Interaction Measure
0        x1        x2                                7.798291
1        x2        x3                                2.054487
2        x1        x3                                1.346485
--------------------------------------------------


Method: Sejong Oh Performance Based Interaction Measure
  Feature 1 Feature 2  Sejong Oh Performance Based Interaction Measure
0        x1        x3                                        11.629588
1        x2        x3                                        11.547316
2        x1        x2  

In case of ` RandomForest`, both `Friedman H-statistic` and `Greenwell Variable interaction` seem to correctly capture `x1 - x2` non-additive influence (interaction). Again, `Sejong Oh Performance Based Interaction` falsely claims that `x1 - x3` interaction is the strongest. What's more, it puts `x1 - x2` lowest in the interaction ranking. This suggests it's high limitations to correctly detect feature interactions.

In [198]:
for method in [InteractionMethod.H_STATISTIC, InteractionMethod.VARIABLE_INTERACTION, InteractionMethod.PERFORMANCE_BASED]:

    print(f"\n\nMethod: {method}")
    df = result.loc[(result["model"] == "neural_network") & (result["method"] == method), "ovo"].iloc[0].copy()
    print(df)
    print("-"*50)



Method: Friedman H-statistic Interaction Measure
  Feature 1 Feature 2  Friedman H-statistic Interaction Measure
0        x1        x2                                  0.313712
1        x1        x3                                  0.000631
2        x2        x3                                  0.000348
--------------------------------------------------


Method: Greenwell Variable Interaction Measure
  Feature 1 Feature 2  Greenwell Variable Interaction Measure
0        x1        x2                               11.961972
1        x1        x3                                0.438776
2        x2        x3                                0.368430
--------------------------------------------------


Method: Sejong Oh Performance Based Interaction Measure
  Feature 1 Feature 2  Sejong Oh Performance Based Interaction Measure
0        x1        x2                                        21.175629
1        x1        x3                                        17.295130
2        x2        x3  

In case of `MLP`, all of the methods correctly capture `x1 - x2` as the highest interaction. `Friedman H-statistic` and `Greenwell Variable interaction` values are negligible for pairs different to the actual interaction (`x1 - x2`). It may suggest their capability to correctly capture feature interactions. For well-performing model such as `MLP`, performance-based `Sejong Oh Performance Based Interaction` correctly indicates `x1 - x2` as the most relevant. This may suggest, that for well-performing models such as neural networks, performance-based methods may produce good results. Nevertheless, non-existing interaction values between `x1 - x3` and `x2 - x3` have high interaction values.

In [200]:
additivity_meter = AdditivityMeter()

In [201]:
additivity = list()
for name, model in tqdm(models):
    additivity_meter.fit(model, X)
    additivity.append({"model": name, "additivity": additivity_meter.additivity_index})

100%|██████████| 3/3 [00:00<00:00,  4.88it/s]


In [202]:
pd.DataFrame.from_records(additivity)

Unnamed: 0,model,additivity
0,linear,1.0
1,random_forest,0.913495
2,neural_network,0.867561


As we can see, the more complex the model is, the less additive nature it has (with linear model having perfect 1.0 additivity). This measure clearly depicts how much variance in each model is explained by interactions between features.