In [None]:
import pandas as pd
import numpy as np
import pickle
from sklearn.preprocessing import StandardScaler, LabelEncoder
import minisom
from scipy.cluster.hierarchy import dendrogram, set_link_color_palette
import matplotlib.pyplot as plt
from diro2c.data_generation.neighborhood_generation import modified_gpdatagenerator
from diro2c.data_generation.distance_functions import simple_match_distance, normalized_euclidean_distance, mixed_distance
from diro2c.data_generation.helper import *
from diro2c.enums.diff_classifier_method_type import diff_classifier_method_type
from diro2c.data_generation.neighborhood_generation.gpdatagenerator import calculate_feature_values
plt.style.use('ggplot')
from data.getdata import loaddata, prepare_df
from sklearn.tree import DecisionTreeClassifier
from matplotlib import colors
from sklearn import metrics
import matplotlib.pylab as pl
import matplotlib.gridspec as gridspec
from sklearn.cluster import AgglomerativeClustering
from matplotlib.legend import Legend

def getclusterid(x, som, clusterarr):
    #x ... normalized instance
    bmu = getwinnerid(x, som)
    c = clusterarr.loc[clusterarr.node == bmu, 'cluster']
    return c

def getwinnerid(x, som):
    #x ... normalized instance
    bmu = som.winner(x)[1]
    return bmu

def distance_function(x0, x1, discrete, continuous, class_name):
    return mixed_distance(x0, x1, discrete, continuous, class_name,
                          ddist=simple_match_distance,
                          cdist=normalized_euclidean_distance)

from vars import plot_colors, plot_contours, make_meshgrid, color_dict

In [None]:
performancefile = 'results/FromLocalToGlobalrunning.txt'

In [None]:
data = 'running1'
train, cols = loaddata(data)
modelA = pickle.load(open('blackboxes/' + data + 'A.sav', 'rb'))
modelB = pickle.load(open('blackboxes/' + data + 'B.sav', 'rb'))
train['yA'] = modelA.predict(train[['x1', 'x2']].values)
train['yB'] = modelB.predict(train[['x1', 'x2']].values)
train['difference'] = train.apply(lambda row: '%g' % row['yA'] + '|' + '%g' % row['yB'], axis=1)
train.drop(columns=['yA', 'yB'], inplace=True)

In [None]:
trainsom = train[cols].copy()
d = StandardScaler()
scaler = d.fit(trainsom.values)
trainsomnorm = scaler.transform(trainsom.values)
trainsomnorm = trainsomnorm[~train.difference.isin(['0|0', '1|1', '2|2'])]
trainsom = trainsom[~train.difference.isin(['0|0', '1|1', '2|2'])]

In [None]:
n_nodes = int(np.floor(5*np.sqrt(len(trainsom))))
som = minisom.MiniSom(1, n_nodes, trainsom.shape[1], sigma=4, learning_rate=0.6, random_seed = 1)
som.train(trainsomnorm, 100000, verbose = True)

In [None]:
pos = som.get_weights()[0]
pos = scaler.inverse_transform(pos)

In [None]:
#plot of trained SOM
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(1, 1, 1)
plt.subplots_adjust(hspace=0.5)

ax.scatter(train.loc[~train.difference.isin(['0|0', '1|1', '2|2']), 'x1'],
           train.loc[~train.difference.isin(['0|0', '1|1', '2|2']), 'x2'],
           alpha=0.5,
           color='black',
           s=20)
ax.scatter(train.loc[train.difference.isin(['0|0', '1|1', '2|2']), 'x1'],
           train.loc[train.difference.isin(['0|0', '1|1', '2|2']), 'x2'],
           alpha=0.1,
           color='black',
           s=20)
plt.plot(pos[:, 0], pos[:, 1], linestyle='-', color='black')

ax.set_facecolor('#FFFFFF')
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.grid(True, color='#F3F3F3')

plt.savefig('docout/sections/localtoglobal/results/SOMtrained_' + data + '.jpg',dpi=150, bbox_inches='tight',transparent=True,pad_inches=0)

In [None]:
niterations = [0, 100, 500, 1000, 2000, 5000, 10000, 100000, 1000000]

gs = gridspec.GridSpec(3, 3)
fig = plt.figure(figsize=(12, 9))

#plot of iterations training:
for index, value in enumerate(niterations):
    col = int(np.mod(index, 3))
    row = int(np.floor(index/3))

    som = minisom.MiniSom(1, n_nodes, trainsom.shape[1], sigma=5, learning_rate=0.1, random_seed = 0)
    if value > 0:
        som.train(trainsomnorm, value, verbose = False)
    pos = som.get_weights()[0]
    pos = scaler.inverse_transform(pos)

    quantization = np.round(som.quantization_error(trainsomnorm), 2)
    topographic = np.round(som.topographic_error(trainsomnorm), 2)

    ax = pl.subplot(gs[row, col])
    ax.scatter(train.x1, train.x2,
               alpha=0.4,
               color='black', s=3)
    plt.plot(pos[:, 0], pos[:, 1], linestyle='-', color='black')

    ax.set_facecolor('#FFFFFF')
    ax.set_xlabel('$x_1$')
    ax.set_ylabel('$x_2$')
    ax.grid(True, color='#F3F3F3')

    ax.set_title('iteration: ' + str(value) + '\nquantization error: ' + str(quantization) +
                 '\ntopographic error: ' + str(topographic), fontsize = 10, loc = 'left')

plt.subplots_adjust(wspace=0.25, hspace=0.7)

In [None]:
connectivity_matrix = np.zeros((n_nodes, n_nodes))
for i in range(n_nodes-1):
    connectivity_matrix[i,i+1] = 1.0

In [None]:
def plot_dendrogram(model, **kwargs):
    #copied from official documentation: https://scikit-learn.org/stable/auto_examples/cluster/plot_agglomerative_dendrogram.html
    # Create linkage matrix and then plot the dendrogram

    # create the counts of samples under each node
    counts = np.zeros(model.children_.shape[0])
    n_samples = len(model.labels_)
    for i, merge in enumerate(model.children_):
        current_count = 0
        for child_idx in merge:
            if child_idx < n_samples:
                current_count += 1  # leaf node
            else:
                current_count += counts[child_idx - n_samples]
        counts[i] = current_count

    linkage_matrix = np.column_stack(
        [model.children_, model.distances_, counts]
    ).astype(float)
    # Plot the corresponding dendrogram
    d = dendrogram(linkage_matrix, **kwargs)
    return d

In [None]:
weights = som.get_weights()[0]
model = AgglomerativeClustering(distance_threshold=0, n_clusters=None, linkage = 'single',
                                connectivity=connectivity_matrix)
model = model.fit(weights)

In [None]:
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(1, 1, 1)
plt.subplots_adjust(hspace=0.5)
set_link_color_palette(plot_colors)
threshold = 0.25#0.225 #0.25
den = plot_dendrogram(model, no_labels=True, color_threshold=threshold, above_threshold_color='k')
ax.set_facecolor('#FFFFFF')
ax.axhline(y=threshold, c = 'black', linestyle = 'dotted')
#plt.savefig('docout/sections/localtoglobal/results/approach4_Dendrogram_SOMNodes_' + data + '.jpg',dpi=150, bbox_inches='tight',transparent=True,pad_inches=0)

In [None]:
ward = AgglomerativeClustering(connectivity=connectivity_matrix, linkage="single",
                               distance_threshold=threshold, n_clusters=None).fit(weights)
label = ward.labels_
clusterarr = pd.DataFrame({'node': range(n_nodes), 'cluster': label})
clusterarr['cluster'] = pd.factorize(clusterarr.cluster)[0]
ncluster = len(np.unique(label))
print(ncluster)

In [None]:
#determine cluster ID for each instance of the training set:
clusterwinnerspos = np.apply_along_axis(getclusterid, 1, som.get_weights()[0], som, clusterarr)
clusterwinners = np.apply_along_axis(getclusterid, 1, trainsomnorm, som, clusterarr)
nodeswinners = np.apply_along_axis(getwinnerid, 1, trainsomnorm, som)

In [None]:
clusterdendrogram = [clusterwinnerspos[x, 0] for x in den['leaves']]
clustercolor = [[x, y] for x, y in zip(clusterdendrogram, den['leaves_color_list'])]
clustercolor = np.unique(clustercolor, axis=0)
clustercolor = clustercolor[clustercolor[:, 0].astype(int).argsort()]
alreadyused = clustercolor[~(clustercolor[:, 1] == 'k'), 1]
available = [x for x in plot_colors if x not in alreadyused]
clustercolor[clustercolor[:, 1] == 'k', 1] = available[:(ncluster - len(alreadyused))]
clustercolor = pd.DataFrame(clustercolor, columns=['cluster', 'color'])

In [None]:
clusterswithoutdata = [x for x in range(ncluster) if x not in list(np.unique(clusterwinners))]
nodesofclusterwithoutdata = clusterarr.loc[clusterarr.cluster.isin(clusterswithoutdata), 'node'].tolist()

In [None]:
#for each cluster, for each node determine nearest node in cluster with data:
for node in nodesofclusterwithoutdata:
    weightnode = weights[node]
    nextnode = node
    i = 1
    while nextnode in nodesofclusterwithoutdata:
        map = som._activation_distance(weightnode, som._weights)[0, [node-i, node+i]].argsort()
        nextnode = node + i if map[0]>0 else node - i
        i = i+1
    oldcluster = clusterarr.loc[clusterarr.node == node, 'cluster'].item()
    newcluster = clusterarr.loc[clusterarr.node == nextnode, 'cluster'].item()
    clusterarr.loc[clusterarr.node == node, 'cluster'] = newcluster
    clustercolor.loc[clustercolor.cluster == str(oldcluster), 'color'] = clustercolor.loc[
        clustercolor.cluster == str(newcluster), 'color'].item()

In [None]:
#determine cluster ID for each instance of the training set:
clusterwinnerspos = np.apply_along_axis(getclusterid, 1, som.get_weights()[0], som, clusterarr)
clusterwinners = np.apply_along_axis(getclusterid, 1, trainsomnorm, som, clusterarr)
nodeswinners = np.apply_along_axis(getwinnerid, 1, trainsomnorm, som)

In [None]:
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(1, 1, 1)
plt.subplots_adjust(hspace=0.5)

markers = [['.', 'black'], ['<', 'black'], ['^', 'black'],['s', 'black'], ['X', 'white'],['>', 'black'], ['D', 'black'],['_', 'black'], ['v', 'black'], ['1', 'black'], ['*', 'white'], ['|', 'black'], ['3', 'black']]

plt.plot(pos[:, 0], pos[:, 1], linestyle='-', color='black', alpha = 0.5)
for index, (i,col) in clustercolor.iterrows():
    i = int(i)
    ind = (clusterwinners == i).flatten()
    ax.scatter(trainsom[ind].x1, trainsom[ind].x2, c=col, label = col, s=30, marker = markers[i][0])
    ind = (clusterwinnerspos == i).flatten()
    ax.scatter(pos[ind][:,0], pos[ind][:,1], c=col, label = col, s=90, marker = markers[i][0])

ax.scatter(train.loc[train.difference.isin(['0|0', '1|1', '2|2']), 'x1'],
           train.loc[train.difference.isin(['0|0', '1|1', '2|2']), 'x2'],
           alpha=0.1,
           color='black',
           s=10)

ax.set_facecolor('#FFFFFF')
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.grid(True, color = '#F3F3F3')

plt.savefig('docout/sections/localtoglobal/results/approach4_SOMclustered_' + data + '.jpg',dpi=150, bbox_inches='tight',transparent=True,pad_inches=0)

In [None]:
#build a tree (explainer) for each node:
#prep for diroc
train['difference'] = train['difference'].astype(str)
dataset = prepare_df(train, 'train', 'difference', discrete=['difference'], continuous=cols)
features = dataset['columns'].copy()
features.remove('difference')
X = np.array(train[features])
feature_values = calculate_feature_values(
    X, dataset['columns'], 'difference', dataset['discrete'], dataset['continuous'], len(train)
)
discrete_no_class = list(dataset['discrete'])
discrete_no_class.remove('difference')

neighborhoods = dict()
explainers = dict.fromkeys(list(range(ncluster)))
prunedexplainers = dict.fromkeys(list(range(ncluster)))

clusterassignment = np.apply_along_axis(getclusterid, 1, trainsomnorm, som, clusterarr)
clusterassignment = clusterassignment.flatten()

traindifferences = train.loc[~train.difference.isin(['0|0', '1|1', '2|2'])]

indexinstances = []

nodeswithoutdata = [x for x in range(n_nodes) if x not in list(np.unique(nodeswinners))]
subclusterarr = clusterarr.loc[~clusterarr.node.isin(nodeswithoutdata)]

for clusterid in np.unique(clusterassignment):
    print('processing cluster ' + str(clusterid))
    if len(subclusterarr.loc[subclusterarr.cluster==clusterid])>4:
        start = subclusterarr.loc[subclusterarr.cluster == clusterid,'node'].min()
        end = subclusterarr.loc[subclusterarr.cluster == clusterid,'node'].max()
        middle = int(subclusterarr.loc[subclusterarr.cluster == clusterid,'node'].median())
        nodes = [start, end, middle]
    elif len(subclusterarr.loc[subclusterarr.cluster==clusterid])>2:
        start = subclusterarr.loc[subclusterarr.cluster == clusterid,'node'].min()
        end = subclusterarr.loc[subclusterarr.cluster == clusterid,'node'].max()
        nodes = [start, end]
    else:
        nodes = list(subclusterarr.loc[subclusterarr.cluster == clusterid,'node'].sample(n=1, random_state = clusterid))

    Z3 = np.empty((0, 2))

    for x in nodes:
        indx = (nodeswinners == x)
        if indx.sum() >0:
            instance = traindifferences.loc[indx].sample(n=1, random_state=clusterid+3)
            instanceindex = instance.index[0]
            indexinstances.append(instanceindex)
            instance = instance.values.reshape(-1, )[:-1]
            Z = modified_gpdatagenerator.generate_modified_data(instance, feature_values, modelA, modelB,
                                                                diff_classifier_method_type.multiclass_diff_classifier,
                                                                discrete_no_class, dataset['continuous'], 'difference',
                                                                dataset['idx_features'],
                                                                distance_function, neigtype={'ss': 0.5, 'sd': 0.5},
                                                                population_size=1000, halloffame_ratio=None,
                                                                alpha1=0.5, alpha2=0.5, eta1=1, eta2=0.0,
                                                                tournsize=10, cxpb=0.2, mutpb=0.2, ngen=100,
                                                                return_logbook=False, max_steps=20, is_unique=True)
            Z3 = np.concatenate([Z3, Z])

            #restrict neighborhood to current cluster
            Z3df = pd.DataFrame(Z3, columns = cols)
            Z3df = scaler.transform(Z3df.values)
            neighborhoodwinners = np.apply_along_axis(getclusterid, 1, Z3df, som, clusterarr)
            ind = (neighborhoodwinners == clusterid).flatten()
            Z3 = Z3[ind]
    neighborhoods[clusterid] = Z3
    predA = modelA.predict(Z3).astype(str)
    predB = modelB.predict(Z3).astype(str)
    difference = pd.Series(np.char.add(np.char.add(predA, '|'), predB))
    clf = DecisionTreeClassifier(random_state=0)
    clf.fit(Z3, difference)
    explainers[clusterid] = clf
    print('finished processing cluster ' + str(clusterid))

In [None]:
with open('Approach2Explainer_' + data + '.pickle', 'wb') as handle:
    pickle.dump(explainers, handle, protocol=pickle.HIGHEST_PROTOCOL)
with open('Approach2Neighborhood_' + data + '.pickle', 'wb') as handle:
    pickle.dump(neighborhoods, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
with open('Approach2Explainer_' + data + '.pickle', 'rb') as handle:
    explainers = pickle.load(handle)
with open('Approach2Neighborhood_' + data + '.pickle', 'rb') as handle:
    neighborhoods = pickle.load(handle)

In [None]:
choseninstances = train.iloc[indexinstances]

X0, X1 = train.x1, train.x2
xx, yy = make_meshgrid(X0, X1, h = 0.005)

grid = np.c_[xx.ravel(), yy.ravel()]
grid = scaler.transform(grid)

z = np.apply_along_axis(getclusterid, 1, grid, som, clusterarr)
z = z.reshape(xx.shape)
z = z+0.5

In [None]:
#plot of chosen instances
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(1, 1, 1)
plt.subplots_adjust(hspace=0.5)

collist = list(clustercolor.color)
MyCmap=colors.ListedColormap(collist)
cf = ax.contourf(xx,yy,z, alpha = 0.8, cmap=MyCmap, levels = list(range(ncluster+1)))

choseninstancesnorm = scaler.transform(choseninstances[cols])
choseninstancesclusters = np.apply_along_axis(getclusterid, 1, choseninstancesnorm, som, clusterarr)

for index, (i,col) in clustercolor.iterrows():
    idx = choseninstancesclusters == int(i)
    ax.scatter(choseninstances.loc[idx].x1, choseninstances.loc[idx].x2,
               color=col,s=150, marker = 'X',edgecolor = 'black')
    ind = (clusterwinnerspos == int(i)).flatten()
    ax.scatter(pos[ind][:,0], pos[ind][:,1], c=col, label = col, s=50, edgecolor = 'black')
plt.plot(pos[:, 0], pos[:, 1], linestyle='-', color='black', linewidth= 1)

ax.set_facecolor('#FFFFFF')
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.grid(True, color='#F3F3F3')

plt.savefig('docout/sections/localtoglobal/results/approach4_SOMregionsexplainer_' + data + '.jpg',dpi=150, bbox_inches='tight',transparent=True,pad_inches=0)

In [None]:
#load test data:
test, cols = loaddata(data+'test')
test['yA'] = modelA.predict(test[cols].values)
test['yB'] = modelB.predict(test[cols].values)
test['difference'] = test.apply(lambda row: '%g' % row['yA'] + '|' + '%g' % row['yB'], axis=1)
test.drop(columns=['yA', 'yB'], inplace=True)
#transform test data:
testnorm = scaler.transform(test[cols].values)

In [None]:
#predictions for each node
winners = np.apply_along_axis(getclusterid, 1, testnorm, som, clusterarr)
pred = winners.copy().astype(str)
for currwinner in np.unique(winners):
    idx = winners == currwinner
    pred[idx] = explainers[currwinner].predict(test.loc[idx, cols].values)

In [None]:
#Evaluation:
if data == 'running1':
    dataname = '"Sine"'
else:
    dataname = '"Spiral"'


depths = [x.get_depth() for x in explainers.values() if x is not None]
leaves = [x.get_n_leaves() for x in explainers.values() if x is not None]
with open(performancefile, 'a') as myfile:
    line = ' '.join([dataname,
                     '"Approach 4: Structured sampling"',
                     str(np.round(np.mean(depths), 3)),
                     str(np.round(np.mean(leaves), 3)),
                     str(metrics.accuracy_score(test.difference, pred)),
                     str(metrics.precision_score(test.difference, pred, average='macro')),
                     str(metrics.recall_score(test.difference, pred, average='macro'))
                     ])
    myfile.write(line + '\n')

In [None]:
neighborhoodsdf = pd.concat(
    [pd.DataFrame(v) for v in neighborhoods.values()], axis = 0)
shapes = [v.shape[0] for v in neighborhoods.values()]
tuples = [(k,i) for shape,k in zip(shapes, neighborhoods.keys()) for i in range(shape)]
neighborhoodsdf.index = pd.MultiIndex.from_tuples(tuples)
neighborhoodsdf.columns = ['x1', 'x2']

In [None]:
fig, ax = plt.subplots(figsize=(10, 8))

plt.xlabel('$x_1$')
plt.ylabel('$x_2$')

X0, X1 = train.x1, train.x2
xx, yy = make_meshgrid(X0, X1, h=0.005)

z1 = modelA.predict(np.c_[xx.ravel(), yy.ravel()])
z1 = z1.reshape(xx.shape)
z2 = modelB.predict(np.c_[xx.ravel(), yy.ravel()])
z2 = z2.reshape(xx.shape)

cntr1 = plot_contours(ax, modelA, xx, yy, levels=1, colors='black', linewidths=2, linestyles='dotted')
cntr2 = plot_contours(ax, modelB, xx, yy, levels=1, colors='black', linewidths=1)

h = [plt.plot([], [], ls=i, color='black')[0] for i in ['dotted', 'solid']]
ax.legend(handles=h, labels=['Decision Boundary $M_A$', 'Decision Boundary $M_B$'],
          loc='lower left', title='', frameon=False, bbox_to_anchor=(0, -0.1), ncol=2)

#add generated neighborhoods:
ax.scatter(neighborhoodsdf.x1, neighborhoodsdf.x2, c='black', alpha=0.3, s=10)

#add selected instances
choseninstances = train.iloc[indexinstances]
ax.scatter(choseninstances.x1, choseninstances.x2, c='#D90429', s=150, marker='X')

ax.set_facecolor('#FFFFFF')
plt.tight_layout()
plt.grid(True, color='#F3F3F3')

ax.set_xlim(train.x1.min(), train.x1.max())
ax.set_ylim(train.x2.min(), train.x2.max())

plt.savefig('docout/sections/localtoglobal/results/approach4_Generatedneighborhoods_' + data + '.jpg',dpi=150, bbox_inches='tight',transparent=True,pad_inches=0)

In [None]:
def predict(x, explainers):
    winner = x[-1]
    x = x[:-1]
    mod = explainers[winner]
    return mod.predict(x.reshape(1, -1))

In [None]:
X0, X1 = train.x1, train.x2
xx, yy = make_meshgrid(X0, X1, h=0.05)
grid = np.c_[xx.ravel(), yy.ravel()]
gridtransformed = scaler.transform(grid)
winners = np.apply_along_axis(getclusterid, 1, gridtransformed, som, clusterarr)
winners = winners.reshape((len(winners), 1))
res = np.append(grid, winners, axis=1)
predgrid = np.apply_along_axis(predict, 1, res, explainers)

In [None]:
d=LabelEncoder()
d.fit(np.array([x for x in color_dict[data].keys()]))
z = d.transform(predgrid)
z = z.reshape(xx.shape)

ordering = [x for x in color_dict[data].keys()]
keys = list(ordering)
ordering.sort()
ordering = [keys.index(x) for x in ordering]

values = [x[0] for x in color_dict[data].values()]
orderedmap = [values[i] for i in ordering]

In [None]:
MyCmap=colors.ListedColormap(orderedmap)
fig, ax = plt.subplots(figsize = (10,8))

cntr1 = plot_contours(ax, modelA, xx, yy, levels = 1,colors = 'black',linewidths = 2, linestyles = 'dotted')
cntr2 = plot_contours(ax, modelB, xx, yy, levels = 1, colors = 'black',linewidths = 1)

cp = ax.contourf(xx, yy, z+0.1, alpha = 0.7, cmap=MyCmap)

h = [plt.plot([],[], color = i[0], linewidth=10, label = j)[0] for j,i in color_dict[data].items()]
ax.legend(handles=h, loc='lower left', title='Prediction Explainer', frameon = False, bbox_to_anchor=(0,-0.15), ncol = 9)

ax.set_facecolor('#FFFFFF')
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.grid(True, color = '#F3F3F3')

ax.set_ylim(train.x2.min(), train.x2.max())
ax.set_xlim(train.x1.min(), train.x1.max())

plt.tight_layout()
plt.savefig('docout/sections/localtoglobal/results/approach4_decisionsurface_' + data + '.jpg',dpi=150, bbox_inches='tight',transparent=True,pad_inches=0)

In [None]:
from matplotlib.colors import to_rgb
from sklearn.tree import plot_tree
import matplotlib
import re
import pyperclip


def replace_text(obj):
    if type(obj) == matplotlib.text.Annotation:
        txt = obj.get_text()
        txt = re.sub("samples[^$]*class", "class", txt)
        obj.set_text(txt)
    return obj

In [None]:
region = 0
fig, ax = plt.subplots(figsize=(25, 9))
class_names = explainers[region].classes_
colorss = [color_dict[data][x][0] for x in class_names]
N = len(class_names)
artists = plot_tree(explainers[region], fontsize=8, ax=ax,
                    impurity=False, node_ids=True,
                    feature_names=cols, class_names=class_names)
ax.properties()['children'] = [replace_text(i) for i in ax.properties()['children']]
for artist, impurity, value in zip(artists, explainers[region].tree_.impurity, explainers[region].tree_.value):
    # let the max value decide the color; whiten the color depending on impurity (gini)
    r, g, b = to_rgb(colorss[np.argmax(value)])
    f = impurity * N / (N - 1) if N > 1 else 0
    artist.get_bbox_patch().set_facecolor((f + (1 - f) * r, f + (1 - f) * g, f + (1 - f) * b))
    artist.get_bbox_patch().set_edgecolor('black')
plt.savefig('docout/sections/localtoglobal/results/Approach4_region0_' + data + '_explainer.jpg', dpi=300, bbox_inches='tight',
            transparent=True, pad_inches=0)

In [None]:
from vars import get_rules

In [None]:
rules = get_rules(explainers[0], ['x_1', 'x_2'], class_names)
rules = '\n'.join(rules)
pyperclip.copy(rules)