In [91]:
from code.utils import medoid_per_class
%load_ext autoreload
%autoreload 2
import sys
sys.path.append("../code")
import importlib, os, json
import numpy as np
import pandas as pd
import shap

import minirocket_multivariate_variable as mmv
importlib.reload(mmv)
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from collections import Counter
from tqdm import tqdm
from utils import get_cognitive_circles_data, get_cognitive_circles_data_for_classification, prepare_cognitive_circles_data_for_minirocket

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Fetching data

In [101]:
df_train_class, y_train, df_test_class, y_test = get_cognitive_circles_data_for_classification('../data/cognitive-circles')


In [102]:
df_train_class.head()

Unnamed: 0,X1,X2,X3,X4,X5,X6,X7,X8,X9,X10,...,DR7865,DR7866,DR7867,DR7868,DR7869,DR7870,DR7871,DR7872,DR7873,DR7874
0,378.950623,390.868329,404.769576,422.604988,443.42394,467.209975,482.904738,498.319701,512.78404,527.204489,...,-117.633838,-123.265863,-131.875027,-140.484191,-148.399901,-154.133713,-159.867525,-165.502344,-166.982869,-168.463395
1,454.875312,465.725686,476.588529,488.401496,500.25187,511.177057,522.0399,532.902743,537.995012,542.438903,...,-390.684958,-371.215367,-341.454023,-293.714498,-235.224907,-163.523044,-7.187791,159.877049,216.619759,276.920364
2,462.274314,442.548628,423.781796,405.987531,390.097257,376.15212,363.24813,357.103491,351.678928,343.234414,...,27.476051,20.187588,12.899125,5.610662,-1.677801,-8.966264,-16.254727,-23.54319,-30.262986,-36.886924
3,621.979426,622.958853,623.0,620.246883,613.720075,605.987531,597.296135,585.975062,573.407107,565.043017,...,10.431053,13.030254,15.629456,18.228658,20.827859,23.427061,26.026263,28.187702,25.183548,22.179395
4,612.955611,613.0,613.0,610.266334,605.554863,599.799002,593.087531,584.598005,576.598005,568.775561,...,43.396504,34.576607,25.756711,16.936815,8.116918,-0.702978,-9.522875,-18.342771,-27.162667,-35.982564


In [103]:
X_train, y_train = prepare_cognitive_circles_data_for_minirocket(df_train_class, y_train)
X_test, y_test = prepare_cognitive_circles_data_for_minirocket(df_test_class, y_test)

0      1
1      0
2      1
3      0
4      1
      ..
235    0
236    1
237    0
238    1
239    0
Name: RealDifficulty, Length: 240, dtype: int64 0     1
1     0
2     1
3     0
4     1
5     0
6     1
7     0
8     1
9     0
10    1
11    0
12    1
13    0
14    1
15    0
16    1
17    0
18    1
19    0
20    1
21    0
22    1
23    0
24    1
25    0
26    1
27    0
28    1
29    0
30    1
31    0
32    1
33    0
34    1
35    0
36    1
37    0
38    1
39    0
40    1
41    0
42    1
43    0
44    1
45    0
46    1
47    0
Name: RealDifficulty, dtype: int64


## Training MiniROCKET

In [104]:
mmv.fit_minirocket_parameters(X_train)
out_train = mmv.transform_prime(X_train)
Phi_train = out_train["phi"]
print("Phi_train:", Phi_train.shape)
out_test = mmv.transform_prime(X_test)
Phi_test = out_test["phi"]

Phi_train: (240, 9996)


## Training a classifier

In [106]:
clf_phi = make_pipeline(
    LogisticRegression(max_iter=2000, solver="lbfgs", multi_class="auto", n_jobs=-1)
).fit(Phi_train, y_train)
from sklearn.metrics import accuracy_score
print('Accuracy score:', accuracy_score(y_test, clf_phi.predict(Phi_test)))



Accuracy score: 0.75


## Obtain explanations for classifier

### Compute reference

In [None]:
def invert_class(y: pd.Series):
    if len(pd.unique(y)) == 2:
        return y.map(lambda x: 1 if x == 0 else 0)
    else:
        ## TODO: Support multi-class
        return y

def get_opposite_medoid_ids(df: pd.DataFrame, y: pd.Series, medoids_per_class: dict):
    y_opposite = invert_class(y)
    opposite_medoids = [medoids_per_class[i] for i in y_opposite.values]
    return opposite_medoids
medoid_per_class = medoid_per_class(df_train_class, y_train)
opposite_medoid_ids = get_opposite_medoid_ids(df_train_class, y_train, medoid_per_class)

In [129]:
print(out_train)

{'phi': array([[2.1120143e-01, 8.9547879e-01, 5.2324105e-02, ..., 3.1115063e-02,
        9.9961901e-01, 0.0000000e+00],
       [4.6088392e-01, 6.3741428e-01, 3.2791466e-01, ..., 3.7274575e-01,
        7.2821945e-01, 1.2077724e-01],
       [1.5201931e-01, 9.2875284e-01, 3.9243080e-02, ..., 1.0198120e-01,
        1.0000000e+00, 0.0000000e+00],
       ...,
       [1.7970537e-01, 8.8239777e-01, 7.6200150e-02, ..., 7.4803151e-02,
        9.3662685e-01, 9.2710182e-03],
       [1.5341631e-01, 9.1541785e-01, 5.0038099e-02, ..., 3.5941072e-02,
        9.9974602e-01, 0.0000000e+00],
       [1.5938532e-01, 8.8252479e-01, 8.0264159e-02, ..., 6.4770128e-03,
        9.9580902e-01, 3.8100078e-04]], dtype=float32), 'traces': {'num_channels_per_combination': array([3, 1, 5, ..., 2, 1, 2], dtype=int32), 'channel_indices': array([4, 0, 5, ..., 2, 5, 2], dtype=int32), 'dilations': array([  1,   2,   3,   4,   5,   7,   9,  11,  14,  17,  22,  28,  35,
        43,  54,  68,  85, 106, 133, 166, 207, 259, 32

Explain one instance

In [131]:
out_train

{'phi': array([[2.1120143e-01, 8.9547879e-01, 5.2324105e-02, ..., 3.1115063e-02,
         9.9961901e-01, 0.0000000e+00],
        [4.6088392e-01, 6.3741428e-01, 3.2791466e-01, ..., 3.7274575e-01,
         7.2821945e-01, 1.2077724e-01],
        [1.5201931e-01, 9.2875284e-01, 3.9243080e-02, ..., 1.0198120e-01,
         1.0000000e+00, 0.0000000e+00],
        ...,
        [1.7970537e-01, 8.8239777e-01, 7.6200150e-02, ..., 7.4803151e-02,
         9.3662685e-01, 9.2710182e-03],
        [1.5341631e-01, 9.1541785e-01, 5.0038099e-02, ..., 3.5941072e-02,
         9.9974602e-01, 0.0000000e+00],
        [1.5938532e-01, 8.8252479e-01, 8.0264159e-02, ..., 6.4770128e-03,
         9.9580902e-01, 3.8100078e-04]], dtype=float32),
 'traces': {'num_channels_per_combination': array([3, 1, 5, ..., 2, 1, 2], dtype=int32),
  'channel_indices': array([4, 0, 5, ..., 2, 5, 2], dtype=int32),
  'dilations': array([  1,   2,   3,   4,   5,   7,   9,  11,  14,  17,  22,  28,  35,
          43,  54,  68,  85, 106, 133

In [134]:
from utils import logistic_gradient, medoid_per_class
alphas = logistic_gradient(clf_phi[0], Phi_train[0])
y_train_pred = clf_phi.predict(Phi_train)
traces_X  = out_train["traces"]
out_x0  = mmv.transform_prime(X_train[opposite_medoid_ids[0]])
print(out_x0['traces'])

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [None]:

beta = mmv.propagate_luis(alphas, traces, x_tc, x0_tc, sigma_ref=sigma_ref, mode="channel_energy")
