In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
# third party modules
import os
import h5py
import math
import pandas as pd
import numpy as np
import time
from tensorflow.keras.utils import Sequence
from tensorflow.keras.layers import (
    Input, Conv1D, MaxPooling1D, Dropout, BatchNormalization, Activation, Add, Flatten, Dense)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import (ModelCheckpoint, TensorBoard, ReduceLROnPlateau,
                                        CSVLogger, EarlyStopping)
from tensorflow.keras.models import load_model
from sklearn.pipeline import Pipeline

# project modules
import datasets as datasets


# define environmental variables to prevent overuse of CPU Cores
# Access and modify environmental variables
os.environ['TF_NUM_INTRAOP_THREADS'] = '1' #set to 1
os.environ['TF_NUM_INTEROP_THREADS'] = '3' #set to 1 less than # of requested cores
print(f"TF_NUM_INTRAOP_THREADS is {os.getenv('TF_NUM_INTRAOP_THREADS')}")
print(f"TF_NUM_INTEROP_THREADS is {os.getenv('TF_NUM_INTEROP_THREADS')}")



2025-02-02 22:48:24.238234: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-02 22:48:24.967649: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-02-02 22:48:25.153988: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1738554505.602389  362527 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1738554505.656808  362527 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-02 22:48:26.315739: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

TF_NUM_INTRAOP_THREADS is 1
TF_NUM_INTEROP_THREADS is 3


Process Raw Training Data

In [4]:
# Create testing and validation set. This is done by combining hdf5 files from the CODE 15% dataset

# define base paths
base_path = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/CODE15/exams_part"
combined_hdf5_path = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/CODE15/combined.hdf5"
# user define how many of the 18 files total to incorporate
num_files = 1         

# use a loop to create the list of strings for each hdf5 part
hdf5_paths = []
for i in range(num_files): # range(17) = 0 --> 16
    hdf5_paths.append(f"{base_path}{i}.hdf5")

# define dataset names for within each hdf5
hdf5_datasets = ['tracings', 'exam_id']


# load dataset and create combined file (create if there is none present)
#combined_hdf5 = h5py.File(combined_hdf5_path,'w')
with h5py.File(combined_hdf5_path, 'w') as combined_hdf5:
    for hdf5_dset in hdf5_datasets: # iterate through each dataset in hdf5 paths
        # find total number of samples
        total_size = sum(h5py.File(path,'r')[hdf5_dset].shape[0] for path in hdf5_paths)
        print(f"The {hdf5_dset} data has {total_size} samples")
        
        # find sample shape
        sample_shape = h5py.File(hdf5_paths[0],'r')[hdf5_dset].shape[1:]
        print(f"Sample Shape for {hdf5_dset}: {sample_shape}")

        # create new dataset in the combined HDF5 file
        combined_dataset = combined_hdf5.create_dataset(
            hdf5_dset, 
            shape=(total_size,) + sample_shape, 
            dtype=h5py.File(hdf5_paths[0], 'r')[hdf5_dset].dtype)

        # copy data from file into the combined dataset
        start_idx = 0
        for path in hdf5_paths:
            with h5py.File(path,'r') as hdf5_file:
                data = hdf5_file[hdf5_dset][:]
                combined_dataset[start_idx:start_idx + data.shape[0]] = data
                start_idx +=data.shape[0]

print("HDF5 Datasets Combined")



The tracings data has 20001 samples
Sample Shape for tracings: (4096, 12)
The exam_id data has 20001 samples
Sample Shape for exam_id: ()
HDF5 Datasets Combined


In [5]:
# make corresponding CSV file and final HDF5 file

hdf_path = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/CODE15/combined.hdf5"
csv_path = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/CODE15/exams15.csv"
modified_csv_save_path = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/CODE15/csv_merged.csv"

# read the csv into a pandas dataframe
csv_df = pd.read_csv(csv_path)  

with h5py.File(hdf_path, 'r') as f:
    # define values to sort
    dat_exam_id_initial = f['exam_id']                 # creates array of examID's from hdf5
    dat_tracings = f['tracings']                # create array of tracings 
    dat_exam_id = pd.DataFrame(dat_exam_id_initial)   # convert dat_exam_id to a pandas dataframe
    dat_exam_id.columns = ['exam_id']          # assign column name

    # print properties of initial dataframes
    print(f"The length of exam_id is: {len(dat_exam_id)}\n")
    print(f"The size of tracings is: {dat_tracings.shape}\n\n")
    print(f"The length of the raw CSV is: {len(csv_df)}\n\n")

    # use outer merge since there may be rows that have no mathces (will remove)
    # search for rows in hdf_5 that are not in sorted_csv and remove them
    # perform initial outer merge
    init_csv = dat_exam_id.merge(csv_df, how='outer', indicator=True) 

    # find indicies of rows that are "left_only" (so they can be removed from hdf5)
    # Filter rows where _merge is 'left_only'
    left_only_rows = init_csv[init_csv['_merge'] == 'left_only']
    # Get the index values of the filtered rows
    left_only_row_numbers = left_only_rows.index.tolist()
    print(f'Row numbers with left_only:\n {left_only_row_numbers}\n')
    # Get the index values of rows that are not filtered out
    index_nums = [i for i in range(len(dat_tracings)) if i not in left_only_row_numbers]

    # drop rows that do not match from csv (left or right)
    csv_merged = init_csv[init_csv['_merge'] == 'both'].drop(columns=['_merge'])

    # remove and rearrange columns
    csv_merged = csv_merged.drop(['exam_id', 'age','is_male','nn_predicted_age','patient_id','death','timey','normal_ecg','trace_file'], axis=1)
    csv_merged = csv_merged[['1dAVb','RBBB','LBBB','SB','AF','ST']]
    csv_merged = csv_merged.astype(np.float32)
    print(f'columns of training ground truth is {csv_merged.columns}\n')
    print(f'shape of training ground truth is {csv_merged.shape}\n')
    print(f'first 100 rows of training ground truth is\n {csv_merged[:100]}')

    # save pandas dataframe to csv
    csv_merged.to_csv(modified_csv_save_path)



The length of exam_id is: 20001

The size of tracings is: (20001, 4096, 12)


The length of the raw CSV is: 345779


Row numbers with left_only:
 [0]

columns of training ground truth is Index(['1dAVb', 'RBBB', 'LBBB', 'SB', 'AF', 'ST'], dtype='object')

shape of training ground truth is (20000, 6)

first 100 rows of training ground truth is
       1dAVb  RBBB  LBBB   SB   AF   ST
21      0.0   0.0   0.0  0.0  0.0  0.0
22      0.0   0.0   0.0  0.0  0.0  0.0
30      0.0   0.0   0.0  0.0  0.0  0.0
46      0.0   0.0   0.0  0.0  0.0  0.0
58      1.0   0.0   0.0  0.0  0.0  0.0
...     ...   ...   ...  ...  ...  ...
1520    0.0   0.0   0.0  0.0  0.0  0.0
1522    0.0   0.0   0.0  0.0  0.0  0.0
1533    0.0   0.0   0.0  0.0  0.0  0.0
1538    0.0   0.0   0.0  0.0  0.0  1.0
1584    0.0   0.0   0.0  0.0  0.0  0.0

[100 rows x 6 columns]


In [6]:
# remove rows of hdf5 tensors that have exam_id's that don't match anything in the CSV
print('Keeping select rows from hdf5 tensors')

with h5py.File(hdf_path, 'r+') as f:
    # define values to sort
    vals_to_sort = f['exam_id'][index_nums]                 # creates array of examID's from hdf5
    dat_tracings = f['tracings'][index_nums]                # create array of tracings 

    # print shape to ensure the right amount of elements have been removed
    print(vals_to_sort.shape)    
    print(dat_tracings.shape)

    # delete and create datasets (due to size mismatch)
    # Delete the old datasets if they exist
    del f['exam_id']
    del f['tracings']

    # Create new datasets with the filtered data
    f.create_dataset('exam_id', data=vals_to_sort)
    f.create_dataset('tracings', data=dat_tracings)


Keeping select rows from hdf5 tensors
(20000,)
(20000, 4096, 12)


Run Model on Test Set and Evaluate Results

W0000 00:00:1738554553.355134  362527 gpu_device.cc:2344] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
  self._warn_if_super_not_called()


[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 188ms/step
(827, 6)
[[1.4243196e-06 1.0710077e-07 2.6337048e-07 4.5377445e-07 9.4853863e-07
  6.4135262e-09]
 [2.8897332e-02 2.0066653e-03 3.1778637e-01 2.8277384e-05 4.8343457e-02
  3.2050017e-04]
 [3.1124635e-04 2.9402892e-05 4.1752328e-06 1.9712761e-05 9.3489867e-03
  2.4932469e-05]
 [2.3968976e-09 1.7344908e-09 6.9393408e-10 8.1738605e-10 5.6821343e-09
  2.7672636e-10]
 [5.3062342e-04 3.5334501e-06 3.3941723e-07 1.4301384e-06 2.2422880e-04
  4.7077424e-06]]
Output predictions saved


Index:0, True Label: 0, Predicted Label: 0
Index:1, True Label: 3, Predicted Label: 3
Index:2, True Label: 0, Predicted Label: 0
Index:3, True Label: 0, Predicted Label: 0
Index:4, True Label: 0, Predicted Label: 0
Index:5, True Label: 0, Predicted Label: 0
Index:6, True Label: 0, Predicted Label: 0
Index:7, True Label: 0, Predicted Label: 0
Index:8, True Label: 0, Predicted Label: 0
Index:9, True Label: 0, Predicted Label: 0
Index:10, True Label: 0, Predicted Label: 0
Index:11, True Label: 0, Predicted Label: 0
Index:12, True Label: 1, Predicted Label: 1
Index:13, True Label: 0, Predicted Label: 0
Index:14, True Label: 0, Predicted Label: 0
Index:15, True Label: 1, Predicted Label: 1
Index:16, True Label: 0, Predicted Label: 0
Index:17, True Label: 0, Predicted Label: 0
Index:18, True Label: 1, Predicted Label: 1
Index:19, True Label: 0, Predicted Label: 0
Index:20, True Label: 0, Predicted Label: 0
Index:21, True Label: 0, Predicted Label: 0
Index:22, True Label: 0, Predicted Label: 

Apply CoMTE_V2

In [9]:
"""
Part 1: A Classifier that works with COMLEX

The classifier must have 2 capabilities:
1. Predict a class ie: class 0 in classes {0, 1}
2. Predict the probability for each class
-ie: [0.1, 0.9]

and

Be able to execute capability 1 and 2 on a PANDAS dataframe,
returning an array of corresponding predictions.
"""

class BasicClassifier:
    classifier = pre_model  # tensorflow CNN
    import os
    
    @staticmethod
    def contrived_classification(pandas_dfs):
        classifier = pre_model  # tensorflow CNN

        # convert 2D pandas df to 3D dataframe (N,4096,12)
        array_3d = pandas_dfs.to_numpy().reshape(int(pandas_dfs.shape[0]/4096), 4096, 12)

        # create instance of ECGSequence to store the (N,4096,12) dataset
        temp_path = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/temporary.hdf5"
        temp_dataset_name = "tracings"
        if os.path.exists(temp_path):
            os.remove(temp_path)
        # create hdf with appropriate data
        hdf_file = h5py.File(temp_path, 'w')
        hdf_file.create_dataset(temp_dataset_name,data = array_3d)
        # init instnace of ECG Sequence holding modified with hdf path
        modified_instance = datasets.ECGSequence(temp_path, temp_dataset_name)

        # get classification and probability
        probability = classifier.predict(modified_instance, verbose = 1)    
        
    
        # close hdf5's
        modified_instance._closehdf()
        hdf_file.close()
        os.remove(temp_path)

        # analyze model output with thresholding
        # define given thresholds
        threshold = np.array([0.124, 0.07, 0.05, 0.278, 0.390, 0.174])
        
        # generate class 0 probability
        exceedances = 1 - (np.maximum((probability - threshold) , 0) / (1 - threshold))
        normal_prob = np.mean(exceedances, axis = 1, keepdims = True) # normal prob should be (N,1)
        
        # Add normal_prob as a new column
        probability_n = np.column_stack((normal_prob, probability))     

        # new threshold
        new_threshold = np.array([1, 0.124, 0.07, 0.05, 0.278, 0.390, 0.174])
        
        mask = probability_n >= new_threshold
        sample_classes = []
        
        for row, mask in zip(probability_n, mask):
            passing_indices = np.where(mask)[0]
            if len(passing_indices) > 1:  # If more than one indices pass
                # find margin between threshold and probability
                diff_array = probability_n - new_threshold
                passing_index = np.argmax(diff_array)
                # append the index that has the highest margin
                sample_classes.append(passing_index)
            
            elif len(passing_indices) == 0:  # no passes
                sample_classes.append(0) 
            else:
                sample_classes.append(passing_indices[0])  # Select the first (or adjust logic)
                
        return sample_classes


    @staticmethod
    def contrived_classification_proba(pandas_dfs):
        classifier = pre_model  # tensorflow CNN
        
        # convert 2D pandas df to 3D dataframe (N,4096,12)
        array_3d = pandas_dfs.to_numpy().reshape(int(pandas_dfs.shape[0]/4096), 4096, 12)

        # create instance of ECGSequence to store the (N,4096,12) dataset
        temp_path = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/temporary.hdf5"
        temp_dataset_name = "tracings"
        if os.path.exists(temp_path):
            os.remove(temp_path)
        # create hdf with appropriate data
        hdf_file = h5py.File(temp_path, 'w')
        hdf_file.create_dataset(temp_dataset_name,data = array_3d)
        # init instnace of ECG Sequence holding modified with hdf path
        modified_instance = datasets.ECGSequence(temp_path, temp_dataset_name)

        # get classification and probability
        probability = classifier.predict(modified_instance, verbose = 0)  
        
    
        # close hdf5's
        modified_instance._closehdf()
        hdf_file.close()
        os.remove(temp_path)

        
        # analyze model output with thresholding
         # define given thresholds
        threshold = np.array([0.124, 0.07, 0.05, 0.278, 0.390, 0.174])
        
        # generate class 0 probability
        exceedances = 1 - (np.maximum((probability - threshold) , 0) / (1 - threshold))
        normal_prob = np.mean(exceedances)

        # modify result 
        probability = np.insert(probability,0,normal_prob)   

        # probability should be in a 2D array format
        if probability.ndim == 1:  # Check if it's 1D
            probability = probability.reshape(1, -1)
        
        return probability

In [10]:
"""
Part 2: Training data and labels

[The explanation will use counterfactuals drawn from this input data]

The training data can be should be an iterable of samples
(ie: python array, numpy array, pandas dataframe),
where each sample needs to be the same size array as the others.

The labels should be a corresponding iterable to the samples.

COMLEX will only use samples for which the labels are the same
as the prediction from the trained classifier.

Note:
We don't support variable-length training data at this time,
use a different projection of the data if you have such data.
"""

class BasicData:
    # define basic variables
    classes_available = [0,1,2,3,4,5,6]
    num_columns = 4096

    # define key paths and variables for training data
    path_to_hdf5_test = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/CODE/ecg_tracings.hdf5"
    num_features = 12
    dataset_name_hdf_tracings = "tracings" 
    training_set_hdf_path = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/CODE15/combined.hdf5"
    y_train_csv_path = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/CODE15/csv_merged.csv"
    # read the csv into a np dataframe 
    np_train_labels = np.genfromtxt(y_train_csv_path, delimiter=",")
    
    # for the ECG implementation, the data wrapper must convert a 3D HDF5 file into a pandas multiindex array
    # create instances of ECGSequence for train data 
    train_seq, valid_seq = datasets.ECGSequence.get_train_and_val(training_set_hdf_path, dataset_name_hdf_tracings, y_train_csv_path,val_split=0.02)
    # return array-like samples for the data wrapper (returns 20000x4096x12 np array)
    timeseries = train_seq._gettimeseries_()
    num_features = 12
    # iterable of corresponding labels for the samples for the data wrapper (returns 20000x6 np array) <--- take out first column that represents ExamID
    labels = train_seq._gettruelabel_()[:,1:] 

The number of samples in the dataset is 20000
The index in which the validation set starts and train set ends is 19600
<datasets.ECGSequence object at 0x147267f51660>
<datasets.ECGSequence object at 0x147267f53d60>


In [11]:
"""
Part 3: Wrapping it up.

The training data, training labels, and trained classifier need to be wrapped up
into a form that can pass through COMLEX.

While wrapping up the training data and labels is relatively straightforward,
wrapping up the classifier is more difficult
"""

import sys
sys.path.append('/projectnb/peaclab-mon/JLi/projectx/CoMTE_V2_JLi/comlex_core')  # Path to the comlex_core directory

# import project (wrapper) modules
from src import explainers
from src.explainable_model_ECG import ClfModel as ClfModel
from src.explainable_data_ECG import ClfData as ClfData

class BasicComlexInput:

    # 1. wrap training points
    df_train_points = ClfData.wrap_df_x(BasicData.timeseries, BasicData.num_features)
    
    # 2. wrap training labels
    df_train_labels = ClfData.wrap_df_y(BasicData.labels)
    
    # 3. wrap up the classifier
    # note: column_attr, or the corresponding name of the columns in the sample,
    #  is unique to dataframes, and auto-generated by wrap_df_x
    wrapped_classifier = ClfModel(BasicClassifier.classifier,
                                predict_attr=BasicClassifier.contrived_classification,
                                predict_proba_attr=BasicClassifier.contrived_classification_proba,
                                column_attr=df_train_points.columns.values.tolist(),
                                classes_attr=BasicData.classes_available,
                                window_size_attr=BasicData.num_columns) 

In [None]:
## look into testing set to select test point


# load testing dataset 
path_to_hdf5_test = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/CODE/ecg_tracings.hdf5"
dataset_name_test = "tracings"  

# Import data. SEQ is an instance of class ECGSequence
seq = datasets.ECGSequence(path_to_hdf5_test, dataset_name_test)  # using default batch size

# load pretrained model (still need to compile later) 
model_path = "/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/PretrainedModels/model/model.hdf5"
pre_model = load_model(model_path)  


# compile and apply model to testing dataset
pre_model.compile(loss='binary_crossentropy', optimizer=Adam())
model_predictions = pre_model.predict(seq,verbose=1)   # y_score is a numpy array with dimensions 827x6. It holds the predictions generated by the model

# extra
print(model_predictions.shape)
print(model_predictions[:5])

# Generate dataframe
np.save("/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/dnn_output.npy", model_predictions)
print("Output predictions saved")








##########

# diagnosis order = ['1dAVb', 'RBBB', 'LBBB', 'SB', 'AF', 'ST']
# label 0 = 'Normal'
# label 1 = '1dAVb'
# label 2 = 'RBBB'
# ...

# apply threshold
threshold = np.array([0.124, 0.07, 0.05, 0.278, 0.390, 0.174])

# apply threshold to convert array of SELF-GENERATED probabilities to array of selections 
mask = model_predictions > threshold # record instances in which y_score_best > threshold
y_pred_2D = np.zeros_like(model_predictions)         # fill array with same size as y_score_best with zeros
y_pred_2D[mask] = 1                                  # set certain values (defined by mask) to 1

# true values for test set
# load ground truth for test set
y_true_2D = pd.read_csv('/projectnb/peaclab-mon/JLi/projectx/AutoECGDiagnosisData/CODE/annotations/gold_standard.csv').values


# process arrays of predictions so lables are numbered (dimension 827x6 --> 827x1)
y_pred = []
for i in range(y_pred_2D.shape[0]):
    one_present = 0
    for j in range(y_pred_2D.shape[1]):   # for each row, iterate through columns
        if y_pred_2D[i, j] == 1:
            y_pred.append(j + 1)
            one_present = 1
            break
    if one_present == 0:   # after each row, check if a 1 has been assigned
        y_pred.append(0)
        
y_true = []
for i in range(y_true_2D.shape[0]):
    one_present = 0
    for j in range(y_true_2D.shape[1]):   # for each row, iterate through columns
        if y_true_2D[i, j] == 1:
            y_true.append(j + 1)
            one_present = 1
            break
    if one_present == 0:   # after each row, check if a 1 has been assigned
        y_true.append(0)


# select indices/conditions for CoMTE
true_select = 3 #UPDATE HERE FOR OTHER CLASSES
pred_select = 1 #UPDATE HERE FOR OTHER CLASSES

# find relevant indices
indices_test = []
for idx, (true, pred) in enumerate(zip(y_true, y_pred)):
    print(f"Index:{idx}, True Label: {true}, Predicted Label: {pred}") # print elements
    if true ==  true_select and pred == pred_select:
        indices_test.append(idx)   
        
print('\n\n\n')
print(f"The {indices_test} indices match the case defined above:\n(true_select = {true_select}, pred_select = {pred_select})")











In [12]:
# Part 4: run through COMLEX

"""
Part 4: Running it through COMLEX

Requires:
1. wrapped classifier
2. wrapped training data
3. wrapped training labels

To run COMLEX:
1. wrap the test point
2. instantiate a comlex runner on the wrapped components
-OptimizedSearch sets up a KDTree for based on the data,
 in order to speed up the search time for the counterfactual
 explanation.
-OptimizedSearch will fallback to BruteForceSearch if it fails
 to find a counterfactual explanation with a predicted
 probability greater than 0.95.
3. use the comlex runner to explain wrapped datapoint
"""


# get testing point
test_point = seq._getsample_(253)
# wrap test point 
test_df = ClfData.wrap_df_test_point(test_point)

# 2. set up an optimized search comlex runner
comlex = explainers.OptimizedSearch(BasicComlexInput.wrapped_classifier,
                                    BasicComlexInput.df_train_points,
                                    BasicComlexInput.df_train_labels,
                                    silent=True, threads=4, num_distractors=3)



In [16]:
# 3. explain the test point
# make sure target_class != test_df_class, or else comlex.explain does nothing
# test_df_class = contrived_classification(test_df) # = 0
target_class = 1
explanation = comlex.explain(test_df,to_maximize=target_class,
                             return_dist=True,single=True,
                             savefig=True,train_iter=10,
                             timeseries=False,filename="sample_result.png")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
using greedy search
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
trying distractor 1 of 3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 48ms/step
current probas is [[9.0364844e-01 1.7087395e-01 2.5014516e-03 5.4837018e-01 1.7168720e-03
  2.2591704e-01 8.8277413e-04]]
[np.int64(1)]
1
finding best col
col DI
unequal
col DII
unequal
col DIII
unequal
col AVR
unequal
col AVL
unequal
col AVF
unequal
col V1
unequal
col V2
unequal
col V3
unequal
col V4
unequal
col V5
unequal
col V6
unequal
4
('DI', 1)
0.077106886
('DII', 1)
0.038080294
('DIII', 1)
0.03133081
('AVR', 1)
0.08052611
('AVL', 1)
0.09074222
('AVF', 1)
0.09611185
('V1', 1)
0.040726554
('V2', 1)
0.07599588
('V3', 1)
0.06288811
('V4', 1)
0.06918588
('V5', 1)
0.065757275
('V6', 1)
0.089020945
checking
best col is None
trying distractor 2 of 3
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step
current pr

In [15]:
# analyze output
print(f"explanation is {explanation}")

replacements_np = explanation[0]
replacements = {str(item) for item in replacements_np}
distractor_new = explanation[1]
counterfactual_explanation = test_df.copy()
#counterfactual_explanation = [point for point in test_df] # make copy of original test data before doing replacements

for replacement_i in replacements:
    counterfactual_explanation[replacement_i] = distractor_new[replacement_i].values[0]

print(f"The classification of {test_point}\n"
      f"can be changed to {target_class}\n" +
      f"by changing the sample at points {explanation[0]},\n" +
      f"with points from the distractor:\n{explanation[1]}\n\n" +
      f"The modified sample that would lead to a different classification is:\n{counterfactual_explanation}")

#print(BasicComlexInput.df_train_points.columns.values.tolist())
#print(BasicData.classes_available)
#print(BasicData.num_columns)
#print(BasicComlexInput.df_train_points)
#print("")
#print(BasicComlexInput.df_train_labels)




explanation is (set(),                   DI  DII  DIII  AVR  AVL  AVF   V1   V2   V3   V4   V5   V6
index timestamp                                                             
4661  0          0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
      1          0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
      2          0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
      3          0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
      4          0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
...              ...  ...   ...  ...  ...  ...  ...  ...  ...  ...  ...  ...
      4091       0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
      4092       0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
      4093       0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
      4094       0.0  0.0   0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0
      4095       0.0  0.0   0.0  0.0  0.0  0.0  0.0  