In [1]:
from sklearn.linear_model import LinearRegression, LogisticRegressionCV
from aeon.datasets import  load_from_tsfile
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from aeon.transformations.collection.convolution_based import MiniRocketMultivariate
import numpy as np

In [2]:
train = np.load("tsCaptum/data/train.npy")
test = np.load("tsCaptum/data/test.npy")
print(train.shape, test.shape)
data = {
    "train": {"X": np.expand_dims(train[:,1:],1) , "y": train[:,0].astype(np.int8) },
	"test": {"X": np.expand_dims( test[:,1:] ,1), "y": test[:,0].astype(np.int8) }
        }
print( data["train"]["X"].shape, data["train"]["y"].dtype,  data["test"]["X"].shape,   data["test"]["y"].dtype,   )
np.save("tsCaptum/data/CMJ_univariate.npy",data)

(419, 501) (179, 501)
(419, 1, 500) int8 (179, 1, 500) int8


In [3]:
from tsCaptum.explainers import Feature_Ablation, Feature_Permutation, LIME, Kernel_Shap


# load regression and classification datasets

In [2]:
X_train_reg, y_train_reg = load_from_tsfile("./tsCaptum/data/AppliancesEnergy_TRAIN.ts")
X_test_reg, y_test_reg = load_from_tsfile("./tsCaptum/data/AppliancesEnergy_TEST.ts")
print("regression train and test",X_test_reg.shape, y_test_reg.shape)

CMJ = np.load("tsCaptum/data/CMJ_univariate.npy",allow_pickle=True).item()
CMJ_X_train =CMJ["train"]["X"]
CMJ_X_test = CMJ["test"]["X"]
CMJ_y_train =CMJ["train"]["y"]
CMJ_y_test = CMJ["test"]["y"]
print("univariate classification", CMJ_X_train.shape, CMJ_X_test.shape)

MP = np.load("./tsCaptum/data/MP_centered.npy",allow_pickle=True).item()
MP_X_train =MP["train"]["X"]
MP_X_test = MP["test"]["X"]
MP_y_train =MP["train"]["y"]
MP_y_test = MP["test"]["y"]
print("multivariate classification", MP_X_train.shape, MP_X_test.shape)

regression train and test (42, 24, 144) (42,)
univariate classification (419, 1, 500) (179, 1, 500)
multivariate classification (1426, 8, 161) (595, 8, 161)


# train same classifiers anc checkout how the library works
after you have trained a classifier, have your sample to explain it's just a 2 step process

In [3]:
regressor = make_pipeline(MiniRocketMultivariate(n_jobs=1),
                          StandardScaler(),LinearRegression(n_jobs=-1))

regressor.fit(X_train_reg, y_train_reg)
print("metric is", regressor.score(X_test_reg,y_test_reg) )

metric is 0.5655307537711616


we're explaining only 20 samples as a demo

In [4]:
n_to_explain =20
X_test_reg, y_test_reg = X_test_reg[:n_to_explain], y_test_reg[:n_to_explain]

# Feature Ablation

In [5]:
from tsCaptum.explainers import Feature_Ablation
myFA = Feature_Ablation(regressor)
exp = myFA.explain(samples=X_test_reg)
print( exp.shape,"\n", exp[0,:5,:5])

24it [00:07,  3.42it/s]                        

(20, 24, 144) 
 [[ 0.8797369   0.8797369   0.8797369   0.8797369   0.8797369 ]
 [-1.9094248  -1.9094248  -1.9094248  -1.9094248  -1.9094248 ]
 [ 0.8222017   0.8222017   0.8222017   0.8222017   0.8222017 ]
 [ 0.3316269   0.3316269   0.3316269   0.3316269   0.3316269 ]
 [-0.60728645 -0.60728645 -0.60728645 -0.60728645 -0.60728645]]





apart from sample, the explain method has some additional parameter

		:param labels:      labels associated to samples in case of classification
		:param batch_size:  the batch_size to be used i.e. number of samples to be explained at the same time
		:param n_segments:  number of segments the timeseries is dived to. If you want to explain point-wise provide -1 as value
		:param normalise:   whether or not to normalise the result
		:param baseline:    the baseline which will substitute time series's values when ablated. It can be either a scalar (each time series's value is substituted by this scalar)  or a single time series

#TODO add option for normalisation?

In [6]:
exp = myFA.explain(samples=X_test_reg,batch_size=10, n_segments=5)
for i in range(5):
    print(i, ": min and max values", exp[i].min(), exp[i].max())
    
exp_normalized = myFA.explain(samples=X_test_reg,batch_size=10, n_segments=5, normalise=True)
for i in range(5):
    print(i,"min and max values", exp_normalized[i].min(), exp_normalized[i].max() )

100%|██████████| 20/20 [00:04<00:00,  4.47it/s]


0 : min and max values -6.681754 1.9838591
1 : min and max values -6.785715 2.026412
2 : min and max values -6.5918465 1.8398409
3 : min and max values -6.761695 2.0367975
4 : min and max values -6.5479803 2.2704687


100%|██████████| 20/20 [00:04<00:00,  4.51it/s]

0 min and max values -1.0 0.29690692
1 min and max values -1.0 0.2986291
2 min and max values -1.0 0.27910858
3 min and max values -0.99999994 0.30122587
4 min and max values -1.0 0.34674335





labels parameter only make sense if you're using a classifier

In [7]:
from aeon.classification.dictionary_based import WEASEL
clf = WEASEL(window_inc=4, support_probabilities=True)
clf.fit(CMJ_X_train, CMJ_y_train)
print ("QUANT accuracy is",clf.score(CMJ_X_test,CMJ_y_test),)


QUANT accuracy is 0.9720670391061452


In [8]:
n_to_explain = 20
CMJ_X_test, CMJ_y_test = CMJ_X_test[:n_to_explain], CMJ_y_test[:n_to_explain]

# SHAP

In [9]:
from tsCaptum.explainers import Shapley_Value_Sampling as SHAP
mySHAP = SHAP(clf)
exp = mySHAP.explain(CMJ_X_test, labels=CMJ_y_test)

24it [00:16,  1.44it/s]                        


# Kernel SHAP and LIME
for kernel SHAP and Lime the Captum framework suggests to use a batch size = 1, we are enforcing this proprety 

In [10]:
from tsCaptum.explainers import Kernel_Shap
myKernelSHAP = Kernel_Shap(clf)
exp = myKernelSHAP.explain(CMJ_X_test, labels=CMJ_y_test, batch_size=4)


100%|██████████| 20/20 [00:06<00:00,  3.23it/s]


In [11]:
from tsCaptum.explainers import  LIME
myLIME = LIME(clf)
exp = myLIME.explain(CMJ_X_test, labels=CMJ_y_test, batch_size=6)


100%|██████████| 20/20 [00:06<00:00,  3.22it/s]


another important optional argument is baseline i.e. the value(s) replacing time series's one when ablated by the attributions
two possible format for it: first one is a scalar i.e. a single number replacing each value to be ablated (default value is 0)

In [12]:
mySHAP = SHAP(clf)
exp = mySHAP.explain(CMJ_X_test, labels=CMJ_y_test, baseline=0)

24it [00:16,  1.44it/s]                        


second option is to provide a time series having the same shape as the one to be explained, usually one item from the train set



In [13]:
exp = mySHAP.explain(CMJ_X_test, labels=CMJ_y_test, baseline=CMJ_X_train[0:1])

24it [00:17,  1.39it/s]                        


# Feature Permutation 
this is the last explainer and it's he only one that not accept a baseline 

In [14]:
from tsCaptum.explainers import Feature_Permutation
myFP = Feature_Permutation(clf,clf_type="classifier")
exp = myFP.explain(CMJ_X_test, labels=CMJ_y_test, baseline=42)

24it [00:00, 29.20it/s]                        


finally we explain a multivariate dataset, first of all using the default arguments

In [15]:
clf_MTS = make_pipeline(MiniRocketMultivariate(n_jobs=1),
                          StandardScaler(),LogisticRegressionCV(max_iter=200, n_jobs=-1))
clf_MTS.fit(MP_X_train,MP_y_train)
print("accuracy is", clf_MTS.score(MP_X_test,MP_y_test))

accuracy is 0.7394957983193278


In [16]:
MP_X_test, MP_y_test = MP_X_test[:n_to_explain], MP_y_test[:n_to_explain]

In [17]:
myFA_MTS = Feature_Ablation(clf_MTS, clf_type="classifier")
exp = myFA_MTS.explain( samples= MP_X_test, labels=MP_y_test, batch_size=8, n_segments=10, normalise=False, baseline=0)

24it [00:03,  7.66it/s]                        


# TODO what to do with RidgeClassifier??


In [18]:
from aeon.classification.dictionary_based import MUSE
clf_MTS = MUSE(window_inc=4, use_first_order_differences=False, support_probabilities=True)
clf_MTS.fit(MP_X_train,MP_y_train)
print("accuracy is", clf_MTS.score(MP_X_test,MP_y_test))

accuracy is 0.75


In [19]:
my_explainer = Kernel_Shap(clf_MTS)
exp = my_explainer.explain( samples=MP_X_test, labels=MP_y_test, n_segments=10)
for i in range(5):
    print( i , np.unique(exp[i]).shape )

exp = my_explainer.explain( samples=MP_X_test, labels=MP_y_test, n_segments=5)
for i in range(5):
	print( i , np.unique(exp[i]).shape )

100%|██████████| 20/20 [00:40<00:00,  2.01s/it]


0 (72,)
1 (78,)
2 (79,)
3 (67,)
4 (79,)


100%|██████████| 20/20 [00:32<00:00,  1.64s/it]

0 (39,)
1 (40,)
2 (40,)
3 (39,)
4 (40,)





hydra 

In [20]:
from aeon.classification.interval_based import QUANTClassifier
clf_MTS = QUANTClassifier()
clf_MTS.fit(MP_X_train,MP_y_train)
print("accuracy is", clf_MTS.score(MP_X_test,MP_y_test))

accuracy is 0.6


In [22]:
my_explainer = Feature_Permutation(clf_MTS)
exp = my_explainer.explain( samples=MP_X_test, labels=MP_y_test, n_segments=5, normalise=False)
print(" min and max attribution without normalisation :",)
for i in range(5):
	print( i , exp[i].min(), exp[i].max() )


exp = my_explainer.explain( samples=MP_X_test, labels=MP_y_test, n_segments=5, normalise=True)
print(" min and max attribution with normalisation ", exp.min(), exp.max())
for i in range(5):
	print( i , exp[i].min(), exp[i].max() )


24it [00:05,  4.64it/s]                        


 min and max attribution without normalisation :
0 -0.009999999999999953 0.10000000000000003
1 -0.065 0.04999999999999999
2 -0.020000000000000018 0.09500000000000008
3 -0.07499999999999996 0.10000000000000003
4 -0.05999999999999994 0.2


24it [00:05,  4.67it/s]                        

 min and max attribution with normalisation  -1.0 1.0
0 -0.09999999999999949 1.0
1 -1.0 0.3103448275862068
2 -0.31578947368420934 1.0
3 -0.9999999999999999 0.8888888888888893
4 -0.39473684210526294 1.0



