In [97]:
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("../code")
import importlib, os, json
import numpy as np
import pandas as pd
import shap

import minirocket_multivariate_variable as mmv
importlib.reload(mmv)
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from collections import Counter
from tqdm import tqdm
from utils import get_cognitive_circles_data, get_cognitive_circles_data_for_classification, prepare_cognitive_circles_data_for_minirocket, medoid_per_class

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Fetching data

In [98]:
df_train_class, y_train, df_test_class, y_test = get_cognitive_circles_data_for_classification('../data/cognitive-circles')
df_train_class.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,DR7865,DR7866,DR7867,DR7868,DR7869,DR7870,DR7871,DR7872,DR7873,DR7874
0,378.950623,390.868329,404.769576,422.604988,443.42394,467.209975,482.904738,498.319701,512.78404,527.204489,...,-117.633838,-123.265863,-131.875027,-140.484191,-148.399901,-154.133713,-159.867525,-165.502344,-166.982869,-168.463395
1,454.875312,465.725686,476.588529,488.401496,500.25187,511.177057,522.0399,532.902743,537.995012,542.438903,...,-390.684958,-371.215367,-341.454023,-293.714498,-235.224907,-163.523044,-7.187791,159.877049,216.619759,276.920364
2,462.274314,442.548628,423.781796,405.987531,390.097257,376.15212,363.24813,357.103491,351.678928,343.234414,...,27.476051,20.187588,12.899125,5.610662,-1.677801,-8.966264,-16.254727,-23.54319,-30.262986,-36.886924
3,621.979426,622.958853,623.0,620.246883,613.720075,605.987531,597.296135,585.975062,573.407107,565.043017,...,10.431053,13.030254,15.629456,18.228658,20.827859,23.427061,26.026263,28.187702,25.183548,22.179395
4,612.955611,613.0,613.0,610.266334,605.554863,599.799002,593.087531,584.598005,576.598005,568.775561,...,43.396504,34.576607,25.756711,16.936815,8.116918,-0.702978,-9.522875,-18.342771,-27.162667,-35.982564


In [99]:
X_train, y_train = prepare_cognitive_circles_data_for_minirocket(df_train_class, y_train)
X_test, y_test = prepare_cognitive_circles_data_for_minirocket(df_test_class, y_test)

## Training MiniROCKET

In [100]:
mmv.fit_minirocket_parameters(X_train)
out_train = mmv.transform_prime(X_train)
Phi_train = out_train["phi"]
print("Phi_train:", Phi_train.shape)
out_test = mmv.transform_prime(X_test)
Phi_test = out_test["phi"]

Phi_train: (240, 9996)


## Training a classifier

In [101]:
clf_phi = LogisticRegression(max_iter=2000, solver="lbfgs", multi_class="auto", n_jobs=-1).fit(Phi_train, y_train)
from sklearn.metrics import accuracy_score
print('Accuracy score:', accuracy_score(y_test, clf_phi.predict(Phi_test)))



Accuracy score: 0.7708333333333334


## Obtain explanations for classifier

### Compute reference

In [102]:
def invert_class(y: pd.Series):
    if len(pd.unique(y)) == 2:
        return y.map(lambda x: 1 if x == 0 else 0)
    else:
        ## TODO: Support multi-class
        return y

def get_opposite_medoid_ids(df: pd.DataFrame, y: pd.Series, medoids_per_class: dict):
    y_opposite = invert_class(y)
    opposite_medoids = [medoids_per_class[i] for i in y_opposite.values]
    return opposite_medoids

def _ensure_TC(x):
    x = np.asarray(x)
    if x.ndim == 3:
        assert x.shape[0] == 1
        x = x[0]
    if x.ndim == 2:
        return x.T if x.shape[0] < x.shape[1] else x
    raise ValueError(f"Forma no soportada: {x.shape}")

def _flatten_tc(x_tc):
    return _ensure_TC(x_tc).reshape(-1)
    
medoid_per_class = medoid_per_class(df_train_class, y_train)
opposite_medoid_ids = get_opposite_medoid_ids(df_train_class, y_train, medoid_per_class)

Explain one instance

In [103]:
IDX_TO_EXPLAIN = 0 

In [104]:
from utils import logistic_gradient, medoid_per_class
y_train_pred = clf_phi.predict(Phi_train)

In [105]:
alphas = logistic_gradient(clf_phi, Phi_train[IDX_TO_EXPLAIN])
out_x  = mmv.transform_prime(X_train[IDX_TO_EXPLAIN])
out_x0  = mmv.transform_prime(X_train[opposite_medoid_ids[IDX_TO_EXPLAIN]])

In [106]:
beta = mmv.propagate_luis(alphas, out_x["traces"], X_train[IDX_TO_EXPLAIN], X_train[opposite_medoid_ids[IDX_TO_EXPLAIN]], mode="channel_energy")
print(beta)

[[-6.14688190e-07]
 [-3.14597598e-06]
 [-3.35342911e-06]
 ...
 [-1.57541274e-06]
 [-1.46583100e-06]
 [-1.34281746e-06]]
