## Colab: install deps

In [1]:
if 'google.colab' in str(get_ipython()):
  !pip install git+https://github.com/mattclifford1/CLIME

# Pipeline for CLIME: Cost-sensitive LIME 

In [2]:
import matplotlib.pyplot as plt
import numpy as np
from clime import data, model, explainer, plot_utils

22-Dec-15 16:26:17 fatf.utils.array.tools INFO     Using numpy's numpy.lib.recfunctions.structured_to_unstructured as fatf.utils.array.tools.structured_to_unstructured and fatf.utils.array.tools.structured_to_unstructured_row.
  plt.style.use('seaborn')


# Data 
Set up class samples

In [14]:
# setup class proportions
c1 = 25
c2 = 75
n_samples = c1 + c2
max_class = max(c1, c2)
class_proportions = [c1/max_class, c2/max_class]

Generate data

In [15]:
# generate data - moons or Guassian
balanced_train_data = data.get_moons(samples=n_samples)
# balanced_train_data = data.get_gaussian(samples=n_samples)

# get an unbalanced version of the dataset (uniform random removal of a class)
unbalanced_train_data = data.unbalance(balanced_train_data, class_proportions)

100

 rebalancing classes... 

--------------------------------------------------
Class 0 | Balanced = 50 , Unbalanced = 16
--------------------------------------------------
Class 1 | Balanced = 50 , Unbalanced = 50


Plot the datasets

In [5]:
datasets = {
    'balanced data': balanced_train_data, 
    'unbalanced data': unbalanced_train_data,
    'rebalanced data': data.balance(unbalanced_train_data, verbose=True),
    }
plot_utils.plot_data_dict(datasets)

Class 1 | 62
Class 0 | 15


ValueError: Sample larger than population or is negative

# Model

In [None]:
clf = model.SVM(balanced_train_data)
clf_unbal = model.SVM(unbalanced_train_data)

plot the models' decision boundaries

In [None]:
models = {
          'generated': {'model': clf, 'data': balanced_train_data}, 
          'unbalanced': {'model': clf_unbal, 'data': unbalanced_train_data}
         }
plot_utils.plot_clfs(models)

## Balanced training comparision

In [None]:
models = {
          'normal': {'model': clf_unbal, 'data': unbalanced_train_data}, 
          'weighted training': {'model': model.SVM(unbalanced_train_data, class_weight='balanced'), 'data': unbalanced_train_data},
          'boundary adjust': {'model': model.SVM_balance_boundary(unbalanced_train_data, boundary_weight=1), 'data': unbalanced_train_data},
          'probability adjust': {'model': model.SVM_balance_proba(unbalanced_train_data), 'data': unbalanced_train_data},
         }

plot adjusted classifiers

In [None]:
plot_utils.plot_clfs(models)

# Explainer

In [None]:
lime = explainer.LIME(balanced_train_data, clf)
lime_explanation = lime(balanced_train_data['X'][0, :])

lime_unbal = explainer.LIME(unbalanced_train_data, clf_unbal)
lime_explanation = lime_unbal(unbalanced_train_data['X'][0, :])

## Balanced Data Explanation

In [None]:
lime.plot_explanation()

## Unbalanced Data Explanation

In [None]:
lime_unbal.plot_explanation()