#### Metrics for three interpretability techniques: LIME, ANCHOR, CIU

Metrics tested are identity, separability, fidelity, and speed.

In [12]:
import os
import time
import tqdm
import numpy as np
import pandas as pd
import warnings
#warnings.filterwarnings('ignore')

from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import sklearn

from scipy.sparse import csr_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer

from anchor import utils
from anchor import anchor_tabular

import metrics_rules

In [19]:
datasets_folder = "datasets"

folder_names = []
attribute_names_list = []
categorical_indicator_list = []
X_list = []
y_list = []

for folder_name in os.listdir(datasets_folder):
    folder_path = os.path.join(datasets_folder, folder_name)
    
    if os.path.isdir(folder_path):
        attribute_names_path = os.path.join(folder_path, "attribute_names.csv")
        categorical_indicator_path = os.path.join(folder_path, "categorical_indicator.csv")
        X_path = os.path.join(folder_path, "X.csv")
        y_path = os.path.join(folder_path, "y.csv")
        
        attribute_names_df = pd.read_csv(attribute_names_path)
        categorical_indicator_df = pd.read_csv(categorical_indicator_path)
        X_df = pd.read_csv(X_path)
        y_df = pd.read_csv(y_path)

        unique_classes = y_df.iloc[:, 0].unique()
        sampled_indices = []
        for cls in unique_classes:
            cls_indices = y_df[y_df.iloc[:, 0] == cls].index
            sampled_indices.append(np.random.choice(cls_indices, 1)[0])

        sampled_indices = np.array(sampled_indices)

        needed_samples = 100 - len(sampled_indices)
        seed_value = 42 
        np.random.seed(seed_value)

        if needed_samples > 0:
            additional_indices = np.random.choice(y_df.index, needed_samples, replace=False)
            sampled_indices = np.concatenate([sampled_indices, additional_indices])
        
        
        X_list.append(X_df.loc[sampled_indices])
        y_list.append(y_df.loc[sampled_indices])

        folder_names.append(folder_name)
        attribute_names_list.append(attribute_names_df)
        categorical_indicator_list.append(categorical_indicator_df)


In [20]:
def convert_to_numeric_and_impute(X_list, y_list):
    imputer = SimpleImputer(strategy='mean')
    label_encoder = LabelEncoder()

    def process_X_dataframe(df):
        for column in df.columns:
            if isinstance(df[column].iloc[0], csr_matrix):
                df[column] = df[column].apply(lambda x: x.toarray()[0,0] if x.shape[1] == 1 else x.toarray())

            df[column] = pd.to_numeric(df[column], errors='coerce')

            if df[column].dtype == 'object':
                df[column] = df[column].fillna('Missing')
                df[column] = label_encoder.fit_transform(df[column])
            else:
                if df[column].notna().any():
                    df[column] = imputer.fit_transform(df[[column]]).ravel()
                else:
                    df[column] = df[column].fillna(0)
        return df

    def process_y_dataframe(df):
        if df.dtypes[0] == 'object' or not np.issubdtype(df.dtypes[0], np.number):
            df_encoded = df.apply(lambda x: label_encoder.fit_transform(x))
            df_encoded = df_encoded.rename(columns={df_encoded.columns[0]: 'class'})
            return df_encoded
        
        else:
            return df
            print('aaaah')


        
    X_list = [process_X_dataframe(df) for df in X_list]
    y_list = [process_y_dataframe(df) for df in y_list]

    return X_list, y_list

X_list, y_list = convert_to_numeric_and_impute(X_list, y_list)

One dataset here, gen five later.

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X_list[0], y_list[0], test_size=0.2, random_state=555)

rf = sklearn.ensemble.RandomForestClassifier(n_estimators=50, n_jobs=5)
rf.fit(X_train, y_train)

accuracy = accuracy_score(y_test, rf.predict(X_test))
print(f"Dataset {0} - Accuracy: {accuracy}")

Dataset 0 - Accuracy: 0.65


In [17]:
def calculate_within_class_variance(X, labels, centroids):
    unique_labels = np.unique(labels)
    variances = {label: ((X[labels == label] - centroids[label])**2).mean() for label in unique_labels}
    total_variance = np.mean(list(variances.values()))
    return total_variance

#Calculate the distance (e.g., Euclidean distance) between the centroids of each pair of classes. 
#This measures how far apart the classes are from each other.
def calculate_between_class_separation(centroids):
    unique_labels = list(centroids.keys())
    separations = []
    for i in range(len(unique_labels)):
        for j in range(i+1, len(unique_labels)):
            separation = np.linalg.norm(centroids[unique_labels[i]] - centroids[unique_labels[j]])
            separations.append(separation)
    avg_separation = np.mean(separations)
    return avg_separation

def calculate_separability(X, labels):
    centroids = calculate_centroids(X, labels)
    within_class_var = calculate_within_class_variance(X, labels, centroids)
    between_class_sep = calculate_between_class_separation(centroids)
    
    if within_class_var == 0:  
        return np.inf
    
    separability_score = between_class_sep / within_class_var
    return separability_score 

In [16]:
class_names = np.unique(y_train).tolist()
attribute_names = X_train.columns.tolist()

explainer = anchor_tabular.AnchorTabularExplainer(
    class_names,
    attribute_names,
    X_train.values)

def exp_fn_blk(xtest):
    exp1 = []
    for i in tqdm.tqdm(range(len(xtest))):
        start_clock = time.time()
        exp = explainer.explain_instance(X_test.values[i], rf.predict, threshold=0.95)
        end_clock = time.time()
        calc_time = end_clock - start_clock
        exp_list = [0]*len(X_train.columns)
        for j in exp.features():
            exp_list[j] = 1
        exp1.append(exp_list)
    return np.array(exp1), calc_time

100%|███████████████████████████████████████████| 20/20 [01:02<00:00,  3.14s/it]
100%|███████████████████████████████████████████| 20/20 [01:07<00:00,  3.37s/it]

4.606726408004761
(85.0, 3, 20)
(2, 20, 400, 0.5)





In [35]:
# Precision
prec = 0
for i in tqdm.tqdm(range(len(X_test))):
    exp = explainer.explain_instance(X_test.values[i], rf.predict, threshold=0.95)
    prec += exp.precision()
print(prec/len(X_test))

  5%|██▏                                         | 1/20 [00:06<01:54,  6.05s/it]


KeyboardInterrupt: 

##### ANCHOR

In [None]:
# Store scores
identity_anchor_scores = []
separability_anchor_scores = []
speed_anchor_seconds = []
precision_scores = []

df_interp = pd.DataFrame(columns=["Dataset", "Fidelity", "Identity", "Separability", "Speed"])

for i in range(len(X_list)):
    print(f"Dataset {i}")
    X, y = X_list[i], y_list[i].squeeze()

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=555)
    rf = RandomForestClassifier()
    rf.fit(X_train, y_train)

    accuracy = accuracy_score(y_test, rf.predict(X_test))

    explainer = anchor_tabular.AnchorTabularExplainer(
                np.unique(y_train).tolist(),
                X_train.columns.tolist(),
                X_train.values)
    
    

    def exp_fn_blk(xtest):
        exp1 = []
        prec = 0
        for i in tqdm.tqdm(range(len(xtest))):
            start_clock = time.time()
            exp = explainer.explain_instance(X_test.values[i], rf.predict, threshold=0.95)
            end_clock = time.time()
            prec += exp.precision()
            calc_time = end_clock - start_clock
            exp_list = [0]*len(X_train.columns)
            for j in exp.features():
                exp_list[j] = 1
            exp1.append(exp_list)
        return np.array(exp1), calc_time, prec

    
    start_time = time.time() 
    exp1 = exp_fn_blk(X_test[:100])
    exp2 = exp_fn_blk(X_test[:100])
    end_time = time.time()
    speed = end_time - start_time 
    
    prec = 0
    for i in tqdm.tqdm(range(len(X_test))):
        exp = explainer.explain_instance(X_test.values[i], rf.predict, threshold=0.95)
        prec += exp.precision()
    
    
    df_interp = df_interp.append({
        "Dataset": i,
        "Fidelity": prec, 
        "Identity": metrics_rules.calc_identity_rules(exp1[0], exp2[0]),
        "Separability": metrics_rules.calc_separability_rules(exp1[0]),
        "Speed": speed 
    }, ignore_index=True)

Dataset 0
Dataset 307 - Accuracy: 0.65


100%|███████████████████████████████████████████| 20/20 [01:01<00:00,  3.08s/it]
100%|███████████████████████████████████████████| 20/20 [00:54<00:00,  2.73s/it]
100%|███████████████████████████████████████████| 20/20 [01:06<00:00,  3.31s/it]


Dataset 1
Dataset 1067 - Accuracy: 0.75


100%|███████████████████████████████████████████| 20/20 [00:52<00:00,  2.61s/it]
100%|███████████████████████████████████████████| 20/20 [00:47<00:00,  2.36s/it]
100%|███████████████████████████████████████████| 20/20 [00:48<00:00,  2.42s/it]


Dataset 2
Dataset 50 - Accuracy: 0.65


100%|███████████████████████████████████████████| 20/20 [00:00<00:00, 51.65it/s]
100%|███████████████████████████████████████████| 20/20 [00:00<00:00, 53.44it/s]
100%|███████████████████████████████████████████| 20/20 [00:00<00:00, 52.44it/s]


Dataset 3
Dataset 32 - Accuracy: 0.75


100%|███████████████████████████████████████████| 20/20 [01:17<00:00,  3.88s/it]
100%|███████████████████████████████████████████| 20/20 [01:32<00:00,  4.60s/it]
100%|███████████████████████████████████████████| 20/20 [01:30<00:00,  4.53s/it]


Dataset 4
Dataset 1466 - Accuracy: 0.95


100%|███████████████████████████████████████████| 20/20 [01:35<00:00,  4.79s/it]
100%|███████████████████████████████████████████| 20/20 [01:36<00:00,  4.84s/it]
 60%|█████████████████████████▊                 | 12/20 [00:59<00:40,  5.10s/it]

In [245]:
df_interp.columns = ['anchor_' + col for col in df_interp.columns]

In [251]:
df_interp.to_csv('records_anchor.csv')