In [57]:
from game import GlobalFeatureImportance, LocalFeatureImportance, UnsupervisedFeatureImportance
from environment import Environment
from algorithms.SVARM import StratSVARM
from algorithms.CMCS import CMCS, CMCS_Dependent, CMCS_Independent, CMCS_Length, Selective_CMCS, Variance_CMCS, CMCS_at_K
from algorithms.KernelSHAP import KernelSHAP
from algorithms.ApproShapley import ApproShapley
from algorithms.shap_k import SHAP_K
import pandas as pd
import os
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [58]:
n=14
k=3
budget=1500
rounds=1000
step_interval=50
metric = "ratio"
env = Environment(n=n, budget=budget, metric=metric)

In [59]:
filepath_global = "datasets/Global feature importance/Bike sharing regression random forest.csv"
filepath_unsupervised = "datasets/Unsupervised Feature Importance/vf_Fifa.csv"
directory_local = "datasets/adult_classification"
# game = GlobalFeatureImportance(filepath=filepath_global, num_players=n, use_cached=True)
game = LocalFeatureImportance(directory=directory_local, num_players=n)
# game = UnsupervisedFeatureImportance(filepath=filepath_unsupervised, num_players=n)
# game = SumUnanimityGames()

In [60]:
algorithms = [("Selective_CMCS", Selective_CMCS()),
              ("ApproShapley", ApproShapley(optimize=True)), 
              ("CMCS", CMCS()), 
              ("KernelSHAP", KernelSHAP()), 
              ("StratSVARM(False,False)", StratSVARM(start_exact=False, theoretical_distribution=False)),
              ("SHAPatK_30_001", SHAP_K(t_min=30, delta=0.01)),
              ("CMCSatK_30_001", CMCS_at_K(t_min=30, delta=0.01)),
              ("Variance_CMCS(T_min=30)", Variance_CMCS(t_min=30)),
              ]

In [61]:
# algorithms += [("CMCS_Dependent", CMCS_Dependent()),
#               ("CMCS_Independent", CMCS_Independent()),
#               ("CMCS_Length", CMCS_Length())]

In [62]:
results = []
for name, algorithm in algorithms:
    print(name)
    results += [(name, env.evaluate(game, algorithm, k, step_interval, rounds))]
columns = ["budget", "topk_ratio", "topk_ratio_SE", "mse", "mse_SE", "topk_sum", "topk_sum_SE", "epsilon", "epsilon_SE"]

Selective_CMCS
ApproShapley
CMCS
KernelSHAP
StratSVARM(False,False)
SHAPatK_30_001
CMCSatK_30_001
Variance_CMCS(T_min=30)


  cdf_values = scipy.stats.norm.cdf(sqrt_mij * (-diff_ij) / sigma_ij, loc=0, scale=1) # the probability that P(phi_i < phi_j), i.e. current estimation is wrong


In [63]:
for algorithm, result in results:
    df = pd.DataFrame(data={column: result[index] for index, column in enumerate(columns)})
    dir = f"results/data/{game.__class__.__name__}/{game.name}/budget={budget}_rounds={rounds}/k={k}"
    print(dir, algorithm)
    df["topk_ratio_SE_plus"] = df["topk_ratio"] + df["topk_ratio_SE"]
    df["topk_ratio_SE_minus"] = df["topk_ratio"] - df["topk_ratio_SE"]


    df["mse_SE_plus"] = df["mse"] + df["mse_SE"]
    df["mse_SE_minus"] = df["mse"] - df["mse_SE"]

    df["topk_sum_SE_plus"] = df["topk_sum"] + df["topk_sum_SE"]
    df["topk_sum_SE_minus"] = df["topk_sum"] - df["topk_sum_SE"]

    df["epsilon_SE_plus"] = df["epsilon"] + df["epsilon_SE"]
    df["epsilon_SE_minus"] = df["epsilon"] - df["epsilon_SE"]

    df = df[["budget", "topk_ratio", "topk_ratio_SE_plus", "topk_ratio_SE_minus", "mse", "mse_SE_plus", "mse_SE_minus", "epsilon", "epsilon_SE_plus", "epsilon_SE_minus", "topk_sum", "topk_sum_SE_plus", "topk_sum_SE_minus"]]
    if not os.path.isdir(dir):
        os.makedirs(dir)
    df.to_csv(f"{dir}/{algorithm}.csv", index=False)

results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=3 Selective_CMCS
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=3 ApproShapley
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=3 CMCS
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=3 KernelSHAP
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=3 StratSVARM(False,False)
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=3 SHAPatK_30_001
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=3 CMCSatK_30_001
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=3 Variance_CMCS(T_min=30)
