In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import train_test_split

from lore_sa import sklearn_classifier_bbox

In [2]:
# Load dataset
df = pd.read_csv('german_credit.csv')

# Define X and y
X = df.drop(columns='default')
y = df['default']

# Preprocessor
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), [1, 4, 7, 10, 12, 15, 17]),
        ('cat', OrdinalEncoder(), [0, 2, 3, 5, 6, 8, 9, 11, 13, 14, 16, 18, 19])
    ]
)

# Build model pipeline
model = make_pipeline(preprocessor, RandomForestClassifier(n_estimators=100, random_state=42))

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# Fit model
model.fit(X_train, y_train)

bbox = sklearn_classifier_bbox.sklearnBBox(model)

In [3]:
from lore_sa.dataset import TabularDataset
from lore_sa.neighgen import GeneticGenerator
from lore_sa.encoder_decoder import ColumnTransformerEnc
from lore_sa.lore import Lore
from lore_sa.surrogate import DecisionTreeSurrogate
from sklearn.preprocessing import FunctionTransformer  # For identity encoding

# Load dataset
dataset = TabularDataset.from_csv('german_credit.csv', class_name="default")
dataset.df.dropna(inplace=True)
dataset.update_descriptor()

enc = ColumnTransformerEnc(dataset.descriptor)
enc.target_encoder = FunctionTransformer(func=lambda x: x, inverse_func=lambda x: x)

generator = GeneticGenerator(bbox, dataset, enc)
surrogate = DecisionTreeSurrogate()

# Initialize Lore
tabularLore = Lore(bbox, dataset, enc, generator, surrogate)

In [4]:
instance_id = 7  # You can pick any row index
# Drop the target column 'default' before passing to explain
instance = dataset.df.drop(columns='default').iloc[instance_id]

In [5]:
explanation = tabularLore.explain(instance)
print(explanation)

{'rule': {'premises': [{'attr': 'purpose', 'val': 'car (used)', 'op': '='}, {'attr': 'credit_amount', 'val': np.float64(11448.0), 'op': '<='}, {'attr': 'other_debtors', 'val': 'co-applicant', 'op': '!='}, {'attr': 'property', 'val': 'unknown / no property', 'op': '!='}, {'attr': 'housing', 'val': 'for free', 'op': '!='}], 'consequence': {'attr': 'default', 'val': np.int64(0), 'op': '='}}, 'counterfactuals': [{'premises': [{'attr': 'purpose', 'val': 'car (used)', 'op': '!='}, {'attr': 'savings', 'val': '... < 100 DM', 'op': '='}, {'attr': 'telephone', 'val': 'yes, registered under the customers name ', 'op': '='}, {'attr': 'personal_status_sex', 'val': 'male : single', 'op': '='}, {'attr': 'purpose', 'val': '(vacation - does not exist?)', 'op': '!='}, {'attr': 'present_emp_since', 'val': '1 <', 'op': '!='}, {'attr': 'account_check_status', 'val': '>', 'op': '!='}], 'consequence': {'attr': 'default', 'val': np.int64(1), 'op': '='}}], 'fidelity': 1.0, 'deltas': [[]], 'counterfactual_sampl

In [6]:
def get_features_from_rule(explanation):
    return {premise['attr'] for premise in explanation['rule']['premises']}

# Example usage
features_used = get_features_from_rule(explanation)
print("Features used in the explanation (main rule):", features_used)

Features used in the explanation (main rule): {'purpose', 'property', 'housing', 'credit_amount', 'other_debtors'}


In [7]:
prediction = model.predict([instance])
print(prediction)

[0]


In [12]:

# Step 2: Get predicted class
predicted_class = model.predict([instance])
print(predicted_class)

# Step 1: Get predicted probabilities
proba = model.predict_proba([instance])[0]  # Gives probabilities for each class
print(proba)

[0]
[0.84 0.16]


In [13]:
# Step 3: Get index of predicted class
class_index = list(model.classes_).index(predicted_class)
print(class_index)

# Step 4: Get confidence score
original_confidence_score = proba[class_index]
print(original_confidence_score)

0
0.84


In [14]:
import numpy as np

# Features to perturb
features_to_perturb = features_used

# Original instance
instance1 = dataset.df.iloc[instance_id].copy()
original_instance = instance1.drop(labels='default')

# Predict original class
original_pred_class = model.predict([original_instance])[0]
print(f"Original predicted class: {original_pred_class}")

def get_random_value_excluding(column, exclude_value):
    col_data = dataset.df[column].dropna()

    # Numerical feature
    if pd.api.types.is_numeric_dtype(col_data):
        # Calculate IQR
        Q1 = col_data.quantile(0.25)
        Q3 = col_data.quantile(0.75)
        IQR = Q3 - Q1

        # Define IQR range
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR

        # Filter values in IQR range (excluding the original value)
        filtered_values = col_data[(col_data >= lower_bound) & 
                                   (col_data <= upper_bound) & 
                                   (col_data != exclude_value)]

        if filtered_values.empty:
            return exclude_value  # fallback

        return np.random.choice(filtered_values)
    
    # Categorical feature
    else:
        unique_values = col_data.astype(str).unique()
        filtered_values = [val for val in unique_values if val != str(exclude_value)]
        
        if not filtered_values:
            return exclude_value  # fallback

        return np.random.choice(filtered_values)

# Lists for tracking
confidence_scores = []
class_changes = 0
n_iterations = 100

for i in range(n_iterations):
    # Perturb the instance
    perturbed_instance = original_instance.copy()
    for feature in features_to_perturb:
        perturbed_instance[feature] = get_random_value_excluding(feature, original_instance[feature])
    
    perturbed_df = pd.DataFrame([perturbed_instance])

    # Predict class and confidence
    predicted_proba = model.predict_proba(perturbed_df)[0]
    predicted_class = model.predict(perturbed_df)[0]
    confidence = predicted_proba[predicted_class]

    confidence_scores.append(confidence)

    # Check and report class change
    if predicted_class != original_pred_class:
        class_changes += 1
        print(f"[{i+1}] Class changed to: {predicted_class} with confidence: {confidence:.4f}")

# Summary
print(f"\nOut of {n_iterations} perturbations:")
print(f"- Class changed {class_changes} times")
print(f"- Mean confidence: {np.mean(confidence_scores):.4f}")
print(f"Min: {np.min(confidence_scores):.4f}, Max: {np.max(confidence_scores):.4f}")

# Difference from original
differences = [original_confidence_score - c for c in confidence_scores]
print(f"\nAverage drop in confidence vs. original: {np.mean(differences):.4f}")

Original predicted class: 0
[3] Class changed to: 1 with confidence: 0.5500
[4] Class changed to: 1 with confidence: 0.5800
[6] Class changed to: 1 with confidence: 0.5400
[7] Class changed to: 1 with confidence: 0.5600
[8] Class changed to: 1 with confidence: 0.5500
[9] Class changed to: 1 with confidence: 0.5400
[11] Class changed to: 1 with confidence: 0.5400
[12] Class changed to: 1 with confidence: 0.5600
[15] Class changed to: 1 with confidence: 0.5800
[16] Class changed to: 1 with confidence: 0.5600
[20] Class changed to: 1 with confidence: 0.5800
[23] Class changed to: 1 with confidence: 0.5600
[24] Class changed to: 1 with confidence: 0.6200
[26] Class changed to: 1 with confidence: 0.5700
[29] Class changed to: 1 with confidence: 0.6300
[30] Class changed to: 1 with confidence: 0.5100
[31] Class changed to: 1 with confidence: 0.6600
[32] Class changed to: 1 with confidence: 0.5300
[33] Class changed to: 1 with confidence: 0.5400
[34] Class changed to: 1 with confidence: 0.580

In [15]:
def get_features_from_rule(explanation):
    return {premise['attr'] for premise in explanation['rule']['premises']}

# Get features used in the explanation
features_used = get_features_from_rule(explanation)

# Get all feature names from the dataset (excluding target column 'default')
all_features = dataset.df.drop(columns='default').columns

# Get features not used in the explanation
unused_features = [feature for feature in all_features if feature not in features_used]

print("Features NOT used in the explanation:", unused_features)


Features NOT used in the explanation: ['account_check_status', 'duration_in_month', 'credit_history', 'savings', 'present_emp_since', 'installment_as_income_perc', 'personal_status_sex', 'present_res_since', 'age', 'other_installment_plans', 'credits_this_bank', 'job', 'people_under_maintenance', 'telephone', 'foreign_worker']


In [16]:
import numpy as np

# Features to perturb
features_to_perturb = unused_features

# Original instance
instance1 = dataset.df.iloc[instance_id].copy()
original_instance = instance1.drop(labels='default')

# Predict original class
original_pred_class = model.predict([original_instance])[0]
print(f"Original predicted class: {original_pred_class}")

def get_random_value_excluding(column, exclude_value):
    unique_values = dataset.df[column].dropna().unique()
    filtered_values = [val for val in unique_values if val != exclude_value]
    return np.random.choice(filtered_values)

# Lists for tracking
confidence_scores = []
class_changes = 0
n_iterations = 100

for i in range(n_iterations):
    # Perturb the instance
    perturbed_instance = original_instance.copy()
    for feature in features_to_perturb:
        perturbed_instance[feature] = get_random_value_excluding(feature, original_instance[feature])
    
    perturbed_df = pd.DataFrame([perturbed_instance])

    # Predict class and confidence
    predicted_proba = model.predict_proba(perturbed_df)[0]
    predicted_class = model.predict(perturbed_df)[0]
    confidence = predicted_proba[predicted_class]

    confidence_scores.append(confidence)

    # Check and report class change
    if predicted_class != original_pred_class:
        class_changes += 1
        print(f"[{i+1}] Class changed to: {predicted_class} with confidence: {confidence:.4f}")

# Summary
print(f"\nOut of {n_iterations} perturbations:")
print(f"- Class changed {class_changes} times")
print(f"- Mean confidence: {np.mean(confidence_scores):.4f}")
print(f"Min: {np.min(confidence_scores):.4f}, Max: {np.max(confidence_scores):.4f}")

# Difference from original
differences = [original_confidence_score - c for c in confidence_scores]
print(f"\nAverage drop in confidence vs. original: {np.mean(differences):.4f}")

Original predicted class: 0
[18] Class changed to: 1 with confidence: 0.5100
[30] Class changed to: 1 with confidence: 0.5200
[31] Class changed to: 1 with confidence: 0.5200
[45] Class changed to: 1 with confidence: 0.5700
[59] Class changed to: 1 with confidence: 0.5100
[98] Class changed to: 1 with confidence: 0.5600

Out of 100 perturbations:
- Class changed 6 times
- Mean confidence: 0.6152
Min: 0.5000, Max: 0.8100

Average drop in confidence vs. original: 0.2248


In [17]:
import numpy as np

# Features to perturb
features_to_perturb = unused_features

# Original instance
instance1 = dataset.df.iloc[instance_id].copy()
original_instance = instance1.drop(labels='default')

# Predict original class
original_pred_class = model.predict([original_instance])[0]
print(f"Original predicted class: {original_pred_class}")

def get_random_value_excluding(column, exclude_value):
    col_data = dataset.df[column].dropna()

    # Numerical feature
    if pd.api.types.is_numeric_dtype(col_data):
        # Calculate IQR
        Q1 = col_data.quantile(0.25)
        Q3 = col_data.quantile(0.75)
        IQR = Q3 - Q1

        # Define IQR range
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR

        # Filter values in IQR range (excluding the original value)
        filtered_values = col_data[(col_data >= lower_bound) & 
                                   (col_data <= upper_bound) & 
                                   (col_data != exclude_value)]

        if filtered_values.empty:
            return exclude_value  # fallback

        return np.random.choice(filtered_values)
    
    # Categorical feature
    else:
        unique_values = col_data.astype(str).unique()
        filtered_values = [val for val in unique_values if val != str(exclude_value)]
        
        if not filtered_values:
            return exclude_value  # fallback

        return np.random.choice(filtered_values)

# Lists for tracking
confidence_scores = []
class_changes = 0
n_iterations = 100

for i in range(n_iterations):
    # Perturb the instance
    perturbed_instance = original_instance.copy()
    for feature in features_to_perturb:
        perturbed_instance[feature] = get_random_value_excluding(feature, original_instance[feature])
    
    perturbed_df = pd.DataFrame([perturbed_instance])

    # Predict class and confidence
    predicted_proba = model.predict_proba(perturbed_df)[0]
    predicted_class = model.predict(perturbed_df)[0]
    confidence = predicted_proba[predicted_class]

    confidence_scores.append(confidence)

    # Check and report class change
    if predicted_class != original_pred_class:
        class_changes += 1
        print(f"[{i+1}] Class changed to: {predicted_class} with confidence: {confidence:.4f}")

# Summary
print(f"\nOut of {n_iterations} perturbations:")
print(f"- Class changed {class_changes} times")
print(f"- Mean confidence: {np.mean(confidence_scores):.4f}")
print(f"Min: {np.min(confidence_scores):.4f}, Max: {np.max(confidence_scores):.4f}")

# Difference from original
differences = [original_confidence_score - c for c in confidence_scores]
print(f"\nAverage drop in confidence vs. original: {np.mean(differences):.4f}")

Original predicted class: 0
[57] Class changed to: 1 with confidence: 0.5600
[79] Class changed to: 1 with confidence: 0.5100
[89] Class changed to: 1 with confidence: 0.5700
[99] Class changed to: 1 with confidence: 0.5100

Out of 100 perturbations:
- Class changed 4 times
- Mean confidence: 0.6503
Min: 0.5000, Max: 0.8600

Average drop in confidence vs. original: 0.1897
