In [263]:
from environment import Environment
import numpy as np
from game import SumUnanimityGames, GlobalFeatureImportance, LocalFeatureImportance, UnsupervisedFeatureImportance
from plot import plot
import pandas as pd

In [264]:
from algorithms.CMCS import *
from algorithms.GapE import GapE
from algorithms.SAR import SAR
from algorithms.ApproShapley import ApproShapley
from algorithms.BUS import BUS
from algorithms.HybridApproBUS import SmartHybridApproBUS
from algorithms.SVARM import SVARM, StratSVARM
from algorithms.ShapleySort import ShapleySort
from algorithms.KernelSHAP import KernelSHAP
from algorithms.shap_k import SHAP_K
%load_ext autoreload
%autoreload 2
%matplotlib inline

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [265]:
results = []

In [266]:
n=16
k=5
rounds=200
metric = "ratio"
env = Environment(n=n, budget=-1, metric=metric)


In [267]:
np.set_printoptions(formatter={'float': lambda x: "{0:0.3f}".format(x)})

In [268]:
filepath_global = "datasets/Global feature importance/Bank marketing classification random forest.csv"
filepath_unsupervised = "datasets/Unsupervised Feature Importance/vf_BigFive.csv"
directory_local = "datasets/nlp_sentiment"
game = GlobalFeatureImportance(filepath=filepath_global, num_players=n, use_cached=True)
# game = LocalFeatureImportance(directory=directory_local, num_players=n)
# game = UnsupervisedFeatureImportance(filepath=filepath_unsupervised, num_players=n)
# game = SumUnanimityGames()

[0.000 -0.006 0.000 ... 0.011 0.006 0.008]
[-0.000 -0.000 0.001 0.000 -0.000 -0.010 -0.001 0.000 0.001 0.004 0.002
 0.004 -0.000 0.002 0.001 0.005] 0.008106116433308063


In [269]:
t_min = 30
delta = 0.01
epsilon = 0.0005

In [270]:
results += [(game.name, f"SHAP@K", env.evaluate_PAC(game, SHAP_K(t_min, delta, epsilon), k, epsilon, rounds))]

100%|██████████| 200/200 [01:07<00:00,  2.95it/s, accuracy=0.99, func_calls=18706, topk_approx=[ 9 10 11 13 15], topk_real=[ 9 10 11 13 15]]              


In [271]:
results += [(game.name, f"CMCS", env.evaluate_PAC(game, CMCS(t_min, delta, epsilon), k, epsilon, rounds))]

100%|██████████| 200/200 [02:17<00:00,  1.45it/s, accuracy=1.0, func_calls=34831, topk_approx=[ 9 10 11 13 15], topk_real=[ 9 10 11 13 15]]


In [272]:
results += [(game.name, f"CMCS@K", env.evaluate_PAC(game, CMCS_at_K(t_min, delta, epsilon), k, epsilon, rounds))]

100%|██████████| 200/200 [01:09<00:00,  2.88it/s, accuracy=0.995, func_calls=10914, topk_approx=[ 9 10 11 13 15], topk_real=[ 9 10 11 13 15]]             


In [273]:
results += [(game.name, f"Greedy CMCS", env.evaluate_PAC(game, Greedy_CMCS(t_min, delta, epsilon), k, epsilon, rounds))]


100%|██████████| 200/200 [02:36<00:00,  1.28it/s, accuracy=1.0, func_calls=13489, topk_approx=[ 9 10 11 13 15], topk_real=[ 9 10 11 13 15]]


In [274]:
results

[('Bank marketing classification random forest',
  'SHAP@K',
  (15194.51, 281.2213859918836, 0.99)),
 ('Bank marketing classification random forest',
  'CMCS',
  (38466.195, 913.7680816927659, 1.0)),
 ('Bank marketing classification random forest',
  'CMCS@K',
  (12381.185, 188.0056148361655, 0.995)),
 ('Bank marketing classification random forest',
  'Greedy CMCS',
  (16150.945, 274.7223850619853, 1.0))]

In [None]:
df_dict = {}
for game, method, (value, se,acc) in results:
    if game not in df_dict:
        df_dict[game] = {}
        df_dict[game + " - SE"] = {}
    df_dict[game][method] = value
    df_dict[game + " - SE"][method] = se

# Create DataFrame
df = pd.DataFrame.from_dict(df_dict, orient='index')
df

Unnamed: 0,SHAP@K,CMCS,CMCS@K,Greedy CMCS
Bank marketing classification random forest,15194.51,38466.195,12381.185,16150.945
Bank marketing classification random forest - SE,281.221386,913.768082,188.005615,274.722385


In [277]:
df.to_csv(f"results/data/PAC/pac_k={k}_tmin={t_min}_delta={delta}_epsilon={epsilon}.csv")