In [None]:
import pandas as pd
import numpy as np
import matplotlib.pylab as pl
import matplotlib.gridspec as gridspec
from data.getdata import loaddata
from data.split3fold import split3fold
import pickle
import matplotlib.pyplot as plt
from matplotlib import colors
from matplotlib.legend import Legend
plt.style.use('ggplot')

from vars import color_dict, make_meshgrid, plot_contours

### Running example 1

In [None]:
data = 'running1'
train, cols = loaddata(data)
modelA = pickle.load(open('blackboxes/' + data + 'A.sav', 'rb'))
modelB = pickle.load(open('blackboxes/' + data + 'B.sav', 'rb'))
train['yA'] = modelA.predict(train[['x1', 'x2']].values)
train['yB'] = modelB.predict(train[['x1', 'x2']].values)
train['difference'] = train.apply(lambda row: '%g' % row['yA'] + '|' + '%g' % row['yB'], axis=1)

In [None]:
plt.figure(figsize=(10, 8))
plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
for i, (color, marker) in color_dict['running1'].items():
    idx = np.where(train.difference == i)
    plt.scatter(train.iloc[idx].x1, train.iloc[idx].x2,
                c=color, s=10, label = i, marker=marker[0])
ax = plt.gca()
lgnd = ax.legend(title = '$M_A$ | $M_B$', frameon = False)

for handle in lgnd.legendHandles:
    handle.set_sizes([50.0])
ax.set_facecolor('#FFFFFF')

x = np.arange(-10,10,0.1)
bba = 4*np.sin(x)
bbb = np.sin(x)/x

lines = []
lines += ax.plot(x, bba, ':', color = 'black')
lines += ax.plot(x, bbb, '-', color = 'black')
leg = Legend(ax, lines, ['Decision boundary $M_A$', 'Decision boundary $M_B$'],
             loc='lower right', frameon=False)
ax.add_artist(leg)
plt.tight_layout()
plt.grid(True, color = '#F3F3F3')
plt.savefig("./docout/sections/localtoglobal/experimentdesign/runningexample1.jpg", dpi=250)

In [None]:
gs = gridspec.GridSpec(2, 2)

x = np.arange(-10,10,0.1)
bba = 4*np.sin(x)
bbb = np.sin(x)/x
fig = pl.figure(figsize=(10, 8))

plot_colors = ['#FFB703', '#FB8500']
ax1 = pl.subplot(gs[0, 0])
for i, color, marker in zip([0,1], plot_colors, ['o', 'x']):
    idx = np.where(train.yA == i)
    ax1.scatter(train.iloc[idx].x1, train.iloc[idx].x2, c=color, s=10, label = i, marker = marker, alpha = 0.7)
ax1.plot(x, bba, ':', color = 'black')
lgnd = ax1.legend(title = '$M_A$',
                  loc="upper right", frameon = False)
for handle in lgnd.legendHandles:
    handle.set_sizes([50.0])

plot_colors = ['#9E0059', '#FF0054']
ax2 = pl.subplot(gs[0, 1]) # row 0, col 1
for i, color, marker in zip([0,1], plot_colors, ['o', 'x']):
    idx = np.where(train.yB == i)
    ax2.scatter(train.iloc[idx].x1, train.iloc[idx].x2, c=color, s=10, label = i, alpha = 0.5, marker=marker)
ax2.plot(x, bbb, '-', color = 'black')
lgnd = ax2.legend(title = '$M_B$',
                  loc="upper right", frameon = False)
for handle in lgnd.legendHandles:
    handle.set_sizes([50.0])

ax3 = pl.subplot(gs[1, :]) # row 1, span all columns
plot_colors = ['#DFE667', '#176675','#54C2CC',  '#7EA310']

for i, (color, marker) in color_dict['running1'].items():
    idx = np.where(train.difference == i)
    ax3.scatter(train.iloc[idx].x1, train.iloc[idx].x2,c=color, s=10, label = i, marker = marker[0])

lgnd = ax3.legend(title = '$M_A$ | $M_B$',
                  loc="upper right", frameon = False)
for handle in lgnd.legendHandles:
    handle.set_sizes([50.0])
ax1.set_facecolor('#FFFFFF')
ax2.set_facecolor('#FFFFFF')
ax3.set_facecolor('#FFFFFF')

lines = []
lines += ax3.plot(x, bba, ':', color = 'black')
lines += ax3.plot(x, bbb, '-', color = 'black')

leg = Legend(ax3,lines, ['Decision boundary $M_A$', 'Decision boundary $M_B$'],
             loc="lower left", frameon=False, bbox_to_anchor=(0,2.3), ncol = 2)
fig.add_artist(leg)
ax1.grid(True, color = '#F3F3F3')
ax2.grid(True, color = '#F3F3F3')
ax3.grid(True, color = '#F3F3F3')

plt.savefig("./docout/sections/localtoglobal/experimentdesign/decisiondifferencesrunning1.jpg", dpi=200)

### Running Example 2

In [None]:
data = 'running2'
train, cols = loaddata(data)
modelA = pickle.load(open('blackboxes/' + data + 'A.sav', 'rb'))
modelB = pickle.load(open('blackboxes/' + data + 'B.sav', 'rb'))
train['yA'] = modelA.predict(train[['x1', 'x2']].values)
train['yB'] = modelB.predict(train[['x1', 'x2']].values)
train['difference'] = train.apply(lambda row: '%g' % row['yA'] + '|' + '%g' % row['yB'], axis=1)

In [None]:
X0, X1 = train.x1, train.x2
xx, yy = make_meshgrid(X0, X1, h = 0.005)
fig, ax = plt.subplots(figsize = (10,8))
cntr1 = plot_contours(ax, modelA, xx, yy, levels = 1,colors = 'black',linewidths = 2, linestyles = 'dotted')
cntr2 = plot_contours(ax, modelB, xx, yy, levels = 1, colors = 'black',linewidths = 1)

for i, (color, marker) in color_dict[data].items():
    idx = np.where(train.difference == i)
    ax.scatter(train.iloc[idx].x1, train.iloc[idx].x2,c=color, s=20, label = i, marker = marker[0])
leg = plt.legend(loc='upper right', title='$M_A$ | $M_B$', frameon = False)
for handle in leg.legendHandles:
    handle.set_sizes([50.0])
ax.add_artist(leg)

h = [plt.plot([],[], ls = i, color = 'black')[0] for i in ['solid', 'dotted']]
ax.legend(handles=h, labels=['Decision Boundary $M_A$', 'Decision Boundary $M_B$'],
          loc='lower right', title='', frameon = False)

plt.xlabel('$x_1$')
plt.ylabel('$x_2$')
plt.grid(True, color = '#F3F3F3')
ax.set_facecolor('#FFFFFF')
plt.tight_layout()
plt.savefig("./docout/sections/localtoglobal/experimentdesign/runningexample2.jpg", dpi=250)

In [None]:
gs = gridspec.GridSpec(2, 2)
fig = pl.figure(figsize=(10, 8))

xx, yy = make_meshgrid(X0, X1, h = 0.005)

ax1 = pl.subplot(gs[0, 0]) # row 0, col 0
for i, color, marker in zip([0,1,2], ['#FFB703', '#FB8500', "#ce2029"], ['.', 'x', 'v']):
    idx = np.where(train.yA == i)
    ax1.scatter(train.iloc[idx].x1, train.iloc[idx].x2,c=color, s=10, label = i, marker = marker)
cntr1 = plot_contours(ax1, modelA, xx, yy, levels = 1,colors = 'black',linewidths = 2, linestyles = 'dotted')
lgnd = ax1.legend(title = '$M_A$',bbox_to_anchor=(1.0, 1), loc="upper left", frameon = False)
for handle in lgnd.legendHandles:
    handle.set_sizes([50.0])

ax2 = pl.subplot(gs[0, 1]) # row 0, col 1
for i, color, marker in zip([0,1,2], ['#9E0059', '#FF0054', "#2f4b7c"], ['.', 'x', 'v']):
    idx = np.where(train.yB == i)
    ax2.scatter(train.iloc[idx].x1, train.iloc[idx].x2,c=color, s=10, label = i, marker = marker)
cntr2 = plot_contours(ax2, modelB, xx, yy, levels = 1, colors = 'black',linewidths = 1)
lgnd = ax2.legend(title = '$M_B$',bbox_to_anchor=(1.0, 1), loc="upper left", frameon = False)
for handle in lgnd.legendHandles:
    handle.set_sizes([50.0])

ax3 = pl.subplot(gs[1, :]) # row 1, span all columns
for i, (color, marker) in color_dict[data].items():
    idx = np.where(train.difference == i)
    ax3.scatter(train.iloc[idx].x1, train.iloc[idx].x2, c=color, s=10, label = i, marker = marker[0])
leg = plt.legend(title='$M_A$ | $M_B$', frameon = False, bbox_to_anchor=(1.0, 1), loc="upper left")
for handle in leg.legendHandles:
    handle.set_sizes([50.0])
ax3.add_artist(leg)

ax1.set_facecolor('#FFFFFF')
ax2.set_facecolor('#FFFFFF')
ax3.set_facecolor('#FFFFFF')

cntr1 = plot_contours(ax3, modelA, xx, yy, levels = 1,colors = 'black',linewidths = 2, linestyles = 'dotted')
cntr2 = plot_contours(ax3, modelB, xx, yy, levels = 1, colors = 'black',linewidths = 1)

h = [plt.plot([],[], ls = i, color = 'black')[0] for i in ['dotted', 'solid']]
ax3.legend(handles=h, labels=['Decision Boundary $M_A$', 'Decision Boundary $M_B$'],
          loc='lower left', title='', frameon = False, bbox_to_anchor=(0,1), ncol = 2)
ax1.grid(True, color = '#F3F3F3')
ax2.grid(True, color = '#F3F3F3')
ax3.grid(True, color = '#F3F3F3')

plt.tight_layout()
plt.savefig("./docout/sections/localtoglobal/experimentdesign/decisiondifferencesrunning2.jpg", dpi=300)

### Bank Marketing

In [None]:
from data.split3fold import split3fold

In [None]:
data = 'bankmarketing'

In [None]:
dataA, dataB, cols, discrete, continuous, le = loaddata(data)
blackboxtrainA, trainA, testA = split3fold(dataA, 0.4, 0.2, random_state=1)
blackboxtrainB, trainB, testB = split3fold(dataB, 0.4, 0.2, random_state=1)

In [None]:
modelA = pickle.load(open('blackboxes/'+data+'A.sav', 'rb'))
modelB = pickle.load(open('blackboxes/'+data+'B.sav', 'rb'))

In [None]:
#create difference detection dataset:
train = pd.concat([trainA, trainB])
train['predA'] = modelA.predict(train[cols].values)
train['predB'] = modelB.predict(train[cols].values)
train['difference'] = train.apply(lambda row: str(int(row['predA'])) + '|' + str(int(row['predB'])), axis = 1)
train.drop(columns=['predA', 'predB', 'y'], inplace=True)
train.reset_index(inplace=True, drop = True)

test = pd.concat([testA, testB])
test['predA'] = modelA.predict(test[cols].values)
test['predB'] = modelB.predict(test[cols].values)
test['difference'] = test.apply(lambda row: str(int(row['predA'])) + '|' + str(int(row['predB'])), axis = 1)
test.drop(columns=['predA', 'predB', 'y'], inplace=True)
test.reset_index(inplace=True, drop = True)

### Compas

In [None]:
data = 'compas'

In [None]:
dataA, dataB, cols, discrete, continuous, le = loaddata(data)
blackboxtrainA, trainA, testA = split3fold(dataA, 0.4, 0.2, random_state=1)
blackboxtrainB, trainB, testB = split3fold(dataB, 0.4, 0.2, random_state=1)

In [None]:
modelA = pickle.load(open('blackboxes/'+data+'A.sav', 'rb'))
modelB = pickle.load(open('blackboxes/'+data+'B.sav', 'rb'))

In [None]:
#create difference detection dataset:
train = pd.concat([trainA, trainB])
train['predA'] = modelA.predict(train[cols].values)
train['predB'] = modelB.predict(train[cols].values)
train['difference'] = train.apply(lambda row: str(int(row['predA'])) + '|' + str(int(row['predB'])), axis = 1)
train.drop(columns=['predA', 'predB', 'y'], inplace=True)
train.reset_index(inplace=True, drop = True)

test = pd.concat([testA, testB])
test['predA'] = modelA.predict(test[cols].values)
test['predB'] = modelB.predict(test[cols].values)
test['difference'] = test.apply(lambda row: str(int(row['predA'])) + '|' + str(int(row['predB'])), axis = 1)
test.drop(columns=['predA', 'predB', 'y'], inplace=True)
test.reset_index(inplace=True, drop = True)