In [16]:
from game import GlobalFeatureImportance, LocalFeatureImportance, UnsupervisedFeatureImportance
from environment import FixedBudgetEnvironment
from algorithms.SVARM import StratSVARM
from algorithms.CMCS import CMCS, CMCS_Dependent, CMCS_Independent, CMCS_Length, Selective_CMCS, CMCS_at_K, Greedy_CMCS
from algorithms.KernelSHAP import KernelSHAP
from algorithms.ApproShapley import ApproShapley
from algorithms.shap_k import SHAP_K
from algorithms.BUS import BUS
import numpy as np
import pandas as pd
import os
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [17]:
n=10
K=np.arange(n-1)+1
budget=1000
rounds=1000
env = FixedBudgetEnvironment(n=n, budget=budget)

In [18]:
filepath_global = "datasets/Global feature importance/Diabetes regression random forest.csv"
filepath_unsupervised = "datasets/Unsupervised Feature Importance/vf_Fifa.csv"
directory_local = "datasets/adult_classification"
game = GlobalFeatureImportance(filepath=filepath_global, num_players=n, use_cached=True)
# game = LocalFeatureImportance(directory=directory_local, num_players=n)
# game = UnsupervisedFeatureImportance(filepath=filepath_unsupervised, num_players=n)
# game = SumUnanimityGames()

[   0.           40.77553468  -31.96247587 ... 1498.45186977 1512.2198209
 1523.92305398]
[  56.78153812   28.18850965  638.67726364   73.49364434 -125.71211038
 -126.20984618  150.15987258  179.12531564  479.52837032  169.89049626] 1523.9230539837763


In [19]:
algorithms = [
              ("BUS", BUS()), 
            #   ("ApproShapley", ApproShapley(optimize=True)), 
            #   ("CMCS", CMCS()), 
            #   ("KernelSHAP", KernelSHAP()), 
            #   ("StratSVARM(False,False)", StratSVARM(start_exact=False, theoretical_distribution=False)),
              # ("SHAPatK_30_001", SHAP_K(t_min=30, delta=0.01)),
            #   ("CMCSatK_30_001", CMCS_at_K(t_min=30, delta=0.01)),
            #   ("Greedy_CMCS(T_min=30)", Greedy_CMCS(t_min=30)),
              ]

In [20]:
results = []
for name, algorithm in algorithms:
    print(name)
    results += [(name, env.evaluate(game, algorithm, K=K, rounds=rounds))]
columns = ["K", "topk_ratio", "topk_ratio_SE", "epsilon", "epsilon_SE"]

BUS


In [21]:
for name, result in results:
    df = pd.DataFrame(data={column: result[index] for index, column in enumerate(columns)})
    df["topk_ratio_SE_plus"] = df["topk_ratio"] + df["topk_ratio_SE"]
    df["topk_ratio_SE_minus"] = df["topk_ratio"] - df["topk_ratio_SE"]

    df["epsilon_SE_plus"] = df["epsilon"] + df["epsilon_SE"]
    df["epsilon_SE_minus"] = df["epsilon"] - df["epsilon_SE"]

    df = df[["K", "topk_ratio", "topk_ratio_SE_plus", "topk_ratio_SE_minus", "epsilon", "epsilon_SE_plus", "epsilon_SE_minus"]]
    
    
    dir = f"results/data/{game.__class__.__name__}/{game.name}/budget={budget}_rounds={rounds}/k={K[0]}-{K[-1]}"
    if not os.path.isdir(dir):
        os.makedirs(dir)
    print(f"{dir}/{name}.csv")
    df.to_csv(f"{dir}/{name}.csv", index=False)

results/data/GlobalFeatureImportance/Diabetes regression random forest/budget=1000_rounds=1000/k=1-9/BUS.csv
