# Notebook for parallel evaluation of the value function

In [None]:
# papermill parameter: notebook id
aid = 0

In [None]:
import numpy as np
import os

import nshap

import paperutil

from itertools import product

import datasets

%load_ext autoreload
%autoreload 2

### The different compute jobs

In [None]:
subsets = list(nshap.powerset(list(range(10))))

all_jobs = list(subsets)
print(len(all_jobs), 'different compute jobs')

### The current job

In [None]:
job_id = aid
dataset = 'folk_travel'
classifier = 'knn'
i_datapoint = 0
random_seed = i_datapoint

print(job_id, dataset, classifier, i_datapoint, random_seed)

### Load the dataset

In [None]:
X_train, X_test, Y_train, Y_test, feature_names = datasets.load_dataset(dataset)

In [None]:
is_classification = datasets.is_classification(dataset)

### Predict, proba or decision

In [None]:
method = 'predict'
if is_classification:
    method = 'proba'
if classifier == 'gam':
    method = 'decision'

### The number of samples is limited by the size of the data set

In [None]:
max_samples = 1000000
num_samples = min(max_samples, X_train.shape[0])

### Create output dir structure, if it does not already exist

In [None]:
froot = f'../../results/n_shapley_values/{dataset}/{classifier}/observation_{i_datapoint}_{method}_{num_samples}/'

In [None]:
paths = ['../../results/', 
         '../../results/n_shapley_values/'
         f'../../results/n_shapley_values/{dataset}/', 
         f'../../results/n_shapley_values/{dataset}/{classifier}/',
         froot]
for p in paths:
    if not os.path.exists( p ):
        os.mkdir( p )

### Train the classifier

In [None]:
clf = paperutil.train_classifier(dataset, classifier)

### The value function

In [None]:
if method == 'predict':
    vfunc = nshap.vfunc.interventional_shap(clf.predict, X_train, num_samples=num_samples, random_state=0)
elif method == 'proba':
    prediction = int( clf.predict( X_test[i_datapoint, :].reshape((1,-1)) ) )
    vfunc = nshap.vfunc.interventional_shap(clf.predict_proba, X_train, num_samples=num_samples, random_state=0, target=prediction)
elif method == 'decision':
    vfunc = nshap.vfunc.interventional_shap(clf.decision_function, X_train, num_samples=num_samples, random_state=0)

### Evaluate the value function

In [None]:
for idx in range(10):
    S = subsets[10*job_id + idx] # 10 jobs per notebook
    if len(S) > 0 and np.max(S) >= X_train.shape[1]:
        continue
    fname = froot + f'v{S}.txt'   
    # evaluate the value function and save the result
    if not os.path.exists(fname):
        result = vfunc(X_test[i_datapoint, :].reshape((1,-1)), S)
        with open(fname, 'w+') as f:
            f.write(f'{result}')