In [48]:
from game import GlobalFeatureImportance, LocalFeatureImportance, UnsupervisedFeatureImportance
from environment import FixedBudgetEnvironment
from algorithms.SVARM import StratSVARM
from algorithms.CMCS import CMCS, CMCS_Dependent, CMCS_Independent, CMCS_Length, Selective_CMCS, CMCS_at_K, Variance_CMCS
from algorithms.KernelSHAP import KernelSHAP
from algorithms.ApproShapley import ApproShapley
from algorithms.shap_k import SHAP_K
import numpy as np
import pandas as pd
import os
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [49]:
n=14
K=np.arange(n-1)+1
budget=1500
rounds=1000
env = FixedBudgetEnvironment(n=n, budget=budget)

In [50]:
filepath_global = "datasets/Global feature importance/Bike sharing regression random forest.csv"
filepath_unsupervised = "datasets/Unsupervised Feature Importance/vf_Fifa.csv"
directory_local = "datasets/adult_classification"
# game = GlobalFeatureImportance(filepath=filepath_global, num_players=n, use_cached=True)
game = LocalFeatureImportance(directory=directory_local, num_players=n)
# game = UnsupervisedFeatureImportance(filepath=filepath_unsupervised, num_players=n)
# game = SumUnanimityGames()

In [51]:
algorithms = [("Selective_CMCS", Selective_CMCS()),
              ("ApproShapley", ApproShapley(optimize=True)), 
              ("CMCS", CMCS()), 
              ("KernelSHAP", KernelSHAP()), 
              ("StratSVARM(False,False)", StratSVARM(start_exact=False, theoretical_distribution=False)),
              ("SHAPatK_30_001", SHAP_K(t_min=30, delta=0.01)),
              ("CMCSatK_30_001", CMCS_at_K(t_min=30, delta=0.01)),
              ("Variance_CMCS(T_min=30)", Variance_CMCS(t_min=30)),
              ]

In [52]:
results = []
for name, algorithm in algorithms:
    print(name)
    results += [(name, env.evaluate(game, algorithm, K=K, rounds=rounds))]
columns = ["K", "topk_ratio", "topk_ratio_SE", "epsilon", "epsilon_SE"]

Selective_CMCS
ApproShapley
CMCS
KernelSHAP
StratSVARM(False,False)
SHAPatK_30_001
CMCSatK_30_001
Variance_CMCS(T_min=30)


  cdf_values = scipy.stats.norm.cdf(sqrt_mij * (-diff_ij) / sigma_ij, loc=0, scale=1) # the probability that P(phi_i < phi_j), i.e. current estimation is wrong


In [53]:
for name, result in results:
    df = pd.DataFrame(data={column: result[index] for index, column in enumerate(columns)})
    df["topk_ratio_SE_plus"] = df["topk_ratio"] + df["topk_ratio_SE"]
    df["topk_ratio_SE_minus"] = df["topk_ratio"] - df["topk_ratio_SE"]

    df["epsilon_SE_plus"] = df["epsilon"] + df["epsilon_SE"]
    df["epsilon_SE_minus"] = df["epsilon"] - df["epsilon_SE"]

    df = df[["K", "topk_ratio", "topk_ratio_SE_plus", "topk_ratio_SE_minus", "epsilon", "epsilon_SE_plus", "epsilon_SE_minus"]]
    
    
    dir = f"results/data/{game.__class__.__name__}/{game.name}/budget={budget}_rounds={rounds}/k={K[0]}-{K[-1]}"
    if not os.path.isdir(dir):
        os.makedirs(dir)
    print(f"{dir}/{name}.csv")
    df.to_csv(f"{dir}/{name}.csv", index=False)

results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=1-13/Selective_CMCS.csv
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=1-13/ApproShapley.csv
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=1-13/CMCS.csv
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=1-13/KernelSHAP.csv
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=1-13/StratSVARM(False,False).csv
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=1-13/SHAPatK_30_001.csv
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=1-13/CMCSatK_30_001.csv
results/data/LocalFeatureImportance/adult_classification/budget=1500_rounds=1000/k=1-13/Variance_CMCS(T_min=30).csv
