In [1]:

#!pip install ipywidgets
#!pip install interpret
#!pip install dice-ml

In [2]:
# imports
from __future__ import print_function
import pandas as pd
import numpy as np
import copy
import ipywidgets as widgets
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from interpret import show
from interpret.blackbox import LimeTabular
from ipywidgets import interact, interactive, fixed, interact_manual, Layout
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from IPython.display import display, clear_output
# DiCE imports
import dice_ml
from dice_ml.utils import helpers  # helper functions
import random



In [3]:
import seaborn as sns
from sklearn.metrics import roc_curve, confusion_matrix, auc
def evalBinaryClassifier(model, x, y, labels=['Positives','Negatives']):
    '''
    Visualize the performance of  a Logistic Regression Binary Classifier.
    
    Displays a labelled Confusion Matrix, distributions of the predicted
    probabilities for both classes, the ROC curve, and F1 score of a fitted
    Binary Logistic Classifier. Author: gregcondit.com/articles/logr-charts
    
    Parameters
    ----------
    model : fitted scikit-learn model with predict_proba & predict methods
        and classes_ attribute. Typically LogisticRegression or 
        LogisticRegressionCV
    
    x : {array-like, sparse matrix}, shape (n_samples, n_features)
        Training vector, where n_samples is the number of samples
        in the data to be tested, and n_features is the number of features
    
    y : array-like, shape (n_samples,)
        Target vector relative to x.
    
    labels: list, optional
        list of text labels for the two classes, with the positive label first
        
    Displays
    ----------
    3 Subplots
    
    Returns
    ----------
    F1: float
    '''
    #model predicts probabilities of positive class
    p = model.predict_proba(x)
    if len(model.classes_)!=2:
        raise ValueError('A binary class problem is required')
    if model.classes_[1] == 1:
        pos_p = p[:,1]
    elif model.classes_[0] == 1:
        pos_p = p[:,0]
    
    #FIGURE
    plt.figure(figsize=[15,4])
    
    #1 -- Confusion matrix
    cm = confusion_matrix(y,model.predict(x))
    plt.subplot(131)
    ax = sns.heatmap(cm, annot=True, cmap='Blues', cbar=False, 
                annot_kws={"size": 14}, fmt='g')
    cmlabels = ['True Negatives', 'False Positives',
              'False Negatives', 'True Positives']
    for i,t in enumerate(ax.texts):
        t.set_text(t.get_text() + "\n" + cmlabels[i])
    plt.title('Confusion Matrix', size=15)
    plt.xlabel('Predicted Values', size=13)
    plt.ylabel('True Values', size=13)
      
    #2 -- Distributions of Predicted Probabilities of both classes
    df = pd.DataFrame({'probPos':pos_p, 'target': y})
    plt.subplot(132)
    plt.hist(df[df.target==1].probPos, density=True, bins=25,
             alpha=.5, color='green',  label=labels[0])
    plt.hist(df[df.target==0].probPos, density=True, bins=25,
             alpha=.5, color='red', label=labels[1])
    plt.axvline(.5, color='blue', linestyle='--', label='Boundary')
    plt.xlim([0,1])
    plt.title('Distributions of Predictions', size=15)
    plt.xlabel('Positive Probability (predicted)', size=13)
    plt.ylabel('Samples (normalized scale)', size=13)
    plt.legend(loc="upper right")
    
    #3 -- ROC curve with annotated decision point
    fp_rates, tp_rates, _ = roc_curve(y,p[:,1])
    roc_auc = auc(fp_rates, tp_rates)
    plt.subplot(133)
    plt.plot(fp_rates, tp_rates, color='green',
             lw=1, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], lw=1, linestyle='--', color='grey')
    #plot current decision point:
    tn, fp, fn, tp = [i for i in cm.ravel()]
    plt.plot(fp/(fp+tn), tp/(tp+fn), 'bo', markersize=8, label='Decision Point')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate', size=13)
    plt.ylabel('True Positive Rate', size=13)
    plt.title('ROC Curve', size=15)
    plt.legend(loc="lower right")
    plt.subplots_adjust(wspace=.3)
    plt.show()
    #Print and Return the F1 score
    tn, fp, fn, tp = [i for i in cm.ravel()]
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
    F1 = 2*(precision * recall) / (precision + recall)
    printout = (
        f'Precision: {round(precision,2)} | '
        f'Recall: {round(recall,2)} | '
        f'F1 Score: {round(F1,2)} | '
    )
    print(printout)
    return F1

In [4]:
# load and read data
f = open('oxy_data.csv')
df = pd.read_csv(f, header=0, index_col=False, engine='c', encoding='UTF-8')

# delete Nan values from the feature dataframe
df_column_list = df.columns.tolist()  # extract all column names into a list
for column_name in df_column_list:
    df = df.drop(df.index[df[column_name] == 'NA()'])

# delete Nan values from the feature dataframe
df_column_list = df.columns.tolist()  # extract all column names into a list
for column_name in df_column_list:
    df = df.drop(df.index[df[column_name] == '2P'])

#drop all rows with null values

df.dropna(inplace = True)

#rearranging coloumns to bring outcome to front
df_column_list = df.columns.tolist() 
temp = df_column_list[0]
df_column_list[0] = df_column_list[1];
df_column_list[1] = temp
df_column_list
df=df.reindex(columns=df_column_list)
df.head()

#SPLIT SETS
X, y = df[df.columns[1:34]], df[df.columns[0]] 
X.head()

# set appropriate datatypes
X, y = X.astype(float), y.astype(int)


# train test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4)



In [5]:
import pickle
# fit a linear logistic regression model
model = LogisticRegression(max_iter = 300).fit(X_train, y_train)

# print accuracies on train and test sets
#print('Mean train accuracy for logistic regression: {:0.4f}'.format(model.score(X_train, y_train)))
#print('Mean test accuracy for logistic regression: {:0.4f}'.format(model.score(X_test, y_test)))




In [6]:
H = X_test.head(1).reset_index(drop = "True")
Z = H.to_dict('records')[0] #singular value to dictionary for updates


In [7]:


# fit a RandomForestClassifier

model2 = RandomForestClassifier(n_estimators=60).fit(X_train, y_train)


# print accuracies on train and test sets
#print('Mean train accuracy for RandomForestClassifier: {:0.4f}'.format(model2.score(X_train, y_train)))
#print('Mean test accuracy for RandomForestClassifier: {:0.4f}'.format(model2.score(X_test, y_test)))



In [8]:
from sklearn.naive_bayes import GaussianNB

#fit a GaussianNB

model3 = GaussianNB().fit(X_train, y_train)


# print accuracies on train and test sets
#print('Mean train accuracy for GaussianNB: {:0.4f}'.format(model3.score(X_train, y_train)))
#print('Mean test accuracy for GaussianNB: {:0.4f}'.format(model3.score(X_test, y_test)))

In [9]:
from sklearn.ensemble import VotingClassifier
#fit a VotingClassifier

model4 = VotingClassifier(
    estimators=[("lr", model), ("rf", model2), ("gnb", model3)],
    voting="soft",
    weights=[1, 1, 5],
).fit(X_train, y_train)

# print accuracies on train and test sets
#print('Mean train accuracy for VotingClassifier: {:0.4f}'.format(model4.score(X_train, y_train)))
#print('Mean test accuracy for VotingClassifier: {:0.4f}'.format(model4.score(X_test, y_test)))

In [10]:
#F1 = evalBinaryClassifier(model,X_test,y_test)

In [11]:
#F2 = evalBinaryClassifier(model2,X_test,y_test)

In [12]:
#F3 = evalBinaryClassifier(model3,X_test,y_test)

In [13]:
#find max and min values

df_column_list = X.columns.tolist() 
df_column_list[1]
l = (len(df_column_list))

In [14]:
def pred_val (A):

  #predict output for all classifiers
  prediction = [c.predict(A) for c in (model, model2, model3, model4)]
  #print("Sample input: ") 
  #display(A.head(1))
  #get prediction for only first sample
  result = [pr[0] for pr in prediction]


  # predict class probabilities for all classifiers
  probas = [c.predict_proba(A) for c in (model, model2, model3, model4)]

  # get class probabilities for the first sample in the dataset
  class0_1 = [pr[0, 0] for pr in probas]
  class1_1 = [pr[0, 1] for pr in probas]

  #printing values
  #prediction:

  #print("The predicted class by LogisticRegression is: " + str(result[0]) + " with positive class probability being " +str(max(class0_1[0],class1_1[0])))
  #print("The predicted class by GaussianNB is: " + str(result[1]) + " with positive class probability being " +str(max(class0_1[1],class1_1[1])))
  #print("The predicted class by RandomForestClassifier is: " + str(result[2]) + " with positive class probability being " +str(max(class0_1[2],class1_1[2])))
  #print("The predicted class by VotingClassifier is: " + str(result[3]) + " with positive class probability being " +str(max(class0_1[3],class1_1[3])))
  return [class0_1[0],class1_1[0],class0_1[1],class1_1[1],class0_1[2],class1_1[2],class0_1[3],class1_1[3]]


In [15]:
def pred (A):

  #predict output for all classifiers
  prediction = [c.predict(X_test) for c in (model, model2, model3, model4)]
  print("Sample input: ") 
  display(X_test.head(1))
  #get prediction for only first sample
  result = [pr[0] for pr in prediction]


  # predict class probabilities for all classifiers
  probas = [c.predict_proba(X_test) for c in (model, model2, model3, model4)]

  # get class probabilities for the first sample in the dataset
  class0_1 = [pr[0, 0] for pr in probas]
  class1_1 = [pr[0, 1] for pr in probas]

  #printing values
  #prediction:

  print("The predicted class by LogisticRegression is: " + str(result[0]) + " with positive class probability being " +str(max(class0_1[0],class1_1[0])))
  print("The predicted class by GaussianNB is: " + str(result[1]) + " with positive class probability being " +str(max(class0_1[1],class1_1[1])))
  print("The predicted class by RandomForestClassifier is: " + str(result[2]) + " with positive class probability being " +str(max(class0_1[2],class1_1[2])))
  print("The predicted class by VotingClassifier is: " + str(result[3]) + " with positive class probability being " +str(max(class0_1[3],class1_1[3])))

  # plotting

  N = 4  # number of groups
  ind = np.arange(N)  # group positions
  width = 0.35  # bar width

  fig, ax = plt.subplots()

  # bars for classifier 1-3

  p1 = ax.bar(ind, np.hstack(([class0_1[:-1], [0]])), width, color="green", edgecolor="k")
  p2 = ax.bar(
      ind + width,
      np.hstack(([class1_1[:-1], [0]])),
      width,
      color="lightgreen",
      edgecolor="k",
  )

  # bars for VotingClassifier
  p3 = ax.bar(ind, [0, 0, 0, class0_1[-1]], width, color="blue", edgecolor="k")
  p4 = ax.bar(
      ind + width, [0, 0, 0, class1_1[-1]], width, color="steelblue", edgecolor="k"
  )

  # plot annotations
  plt.axvline(2.8, color="k", linestyle="dashed")
  ax.set_xticks(ind + width)
  ax.set_xticklabels(
      [
          "LogisticRegression\nweight 1",
          "GaussianNB\nweight 1",
          "RandomForestClassifier\nweight 5",
          "VotingClassifier\n(average probabilities)",
      ],
      rotation=40,
      ha="right",
  )
  plt.ylim([0, 1])
  plt.title("Class probabilities for sample 1 by different classifiers")
  plt.legend([p1[0], p2[0]], ["Well Outcome: 0", "Well Outcome: 1"], loc="upper left")
  plt.tight_layout()
  plt.show()

In [16]:
def inp1 ():
  drop = widgets.Dropdown(
      options=df_column_list,
      value=df_column_list[0],
      description='Feature',
      disabled=False,
  )
  ##Input values Field1
  field = widgets.BoundedFloatText(
      value=0,
      min=0,
      max=5,
      #step=0.00001,
      description='Value:',
      disabled=False
  )
  slide = widgets.FloatSlider(
      value=0,
      min=0,
      max=5,
      step=0.000001,
      description='Value:',
      disabled=False,
      continuous_update=False,
      orientation='horizontal',
      readout=True,
      readout_format='.6f',
      #layout=widgets.Layout(width='100%')
  )
  widgets.jslink((field, 'value'), (slide, 'value'))
  def on_choose_a(d):
    if drop.value == df_column_list[1] or drop.value == df_column_list[2] or drop.value == df_column_list[3] or  drop.value == df_column_list[4] or drop.value == df_column_list[5] or drop.value == df_column_list[6]:
      slide.min = 0;
      slide.max = 1;
      slide.step = 0.000001;
    elif drop.value == df_column_list[0]:
      slide.min = 1;
      slide.max = 4;
      slide.step = 1;
    elif drop.value == df_column_list[2]:
      slide.min = -1;
      slide.max = 1;
      slide.step = 0.000001;
    elif drop.value == df_column_list[17]:
      slide.min = 0;
      slide.max = 4;
      slide.step = 1;
    else:
      slide.min = 0;
      slide.max = 5;
      slide.step = 1;
    return slide.value
  widgets.dlink((drop, "value"), (slide, "value"), on_choose_a)
  return drop, field, slide;

In [17]:

d1, f1, s1  = inp1 ()
d2, f2, s2  = inp1 ()
inputa = widgets.VBox([d1, f1, s1]);
inputb = widgets.VBox([d2, f2, s2]);

In [18]:

column_names1 = ["a", "Outcome 0: Probability (LR)", "Outcome 1: Probability (LR)", "Outcome 0: Probability (GNB)", "Outcome 1: Probability (GNB)", "Outcome 0: Probability (RF)", "Outcome 1: Probability (RF)", "Outcome 0: Probability (VC)", "Outcome 1: Probability (VC)" ];
column_names2 = ["a", "b", "Outcome 0: Probability (LR)", "Outcome 1: Probability (LR)", "Outcome 0: Probability (GNB)", "Outcome 1: Probability (GNB)", "Outcome 0: Probability (RF)", "Outcome 1: Probability (RF)", "Outcome 0: Probability (VC)", "Outcome 1: Probability (VC)" ];
temp_for_display1 = pd.DataFrame(columns = column_names1)
temp_for_display2 = pd.DataFrame(columns = column_names2)

button0 = widgets.Button(description="NEW INSTANCE", layout=Layout(width='100%', height='30px'))
button0.style.button_color = 'gray'
button0.style.text_color = 'black'

out = widgets.Output()
out1 = widgets.Output()
out2 = widgets.Output()

def on_button_clicked(b):
  with out:
    clear_output()
  with out1:
    clear_output()
  with out2:
    clear_output()
  global temp_for_display1
  global temp_for_display2
  global column_names1
  global column_names2
  temp_for_display1 = pd.DataFrame(columns = column_names1)
  temp_for_display2 = pd.DataFrame(columns = column_names2)


button0.on_click(on_button_clicked)


In [19]:
button30 = widgets.Button(description="?", button_style='warning', layout=Layout( width = "30px"), tooltip='Click to start a new istance \n of feature selection.' )
button30.style.button_color = '#b3b3ff'

button3 = widgets.Button(description="?", button_style='warning', layout=Layout(  width = "30px"), tooltip='Press to change between \n 1 or 2 changeable features.' )
button3.style.button_color = '#b3b3ff'

button31 = widgets.Button(description="?", button_style='warning', layout=Layout( width = "30px"), tooltip='Add feature value to input feature set and display' )
button31.style.button_color = '#b3b3ff'

button32 = widgets.Button(description="?", button_style='warning', layout=Layout(  width = "30px"), tooltip='GENERATE PLOTS' )
button32.style.button_color = '#b3b3ff'

button33 = widgets.Button(description="?", button_style='warning', layout=Layout(  width = "30px"), tooltip='Generate 20 Counterfactuals with \n  the selected feature from the \n  dropdown menu' )
button33.style.button_color = '#b3b3ff'


In [22]:


## Feature Selection
select_text = widgets.HTML(value="<b>Select No. of Variables to modify <b>")
select_opt = widgets.RadioButtons(
    options=[1, 2],
    layout={'width': 'max-content'}, # If the items' names are long
    disabled=False 
)


#display(select)

button = widgets.Button(description="Refresh!")
#output = widgets.Output()

button_refresh = widgets.HBox([button, button3])
select = widgets.VBox([select_text, select_opt, button_refresh])
#display(button)

input = widgets.HBox(children = [inputa])
def on_button_clicked(b):
    with out:
        clear_output()
    with out1:
        clear_output()
    with out2:
        clear_output()
    global temp_for_display1
    global temp_for_display2
    global column_names1
    global column_names2
    temp_for_display1 = pd.DataFrame(columns = column_names1)
    temp_for_display2 = pd.DataFrame(columns = column_names2)
    if select_opt.value ==1:
        input.children = [inputa]
    else:
        input.children = [inputa, inputb]

show1 = widgets.HBox([select, input])
#display(show1)

button.on_click(on_button_clicked)



In [23]:
#from IPython.display import display
button1 = widgets.Button(description="ADD SET")
#button1.style.button_color = 'lightblue'
#button1.style.text_color = ''




#display(button1)

def on_button_clicked1(b):
  if select_opt.value == 1:
    
    temp_for_display1.rename(columns={"a": d1.value }, inplace=True)
  else:
    temp_for_display1.rename(columns={"a": d1.value }, inplace=True)
    temp_for_display2.rename(columns={"a": d1.value, "b": d2.value }, inplace=True)



  temp = copy.deepcopy(Z)
  if select_opt.value == 1:
    temp[d1.value] = f1.value;
  else:
    temp[d1.value] = f1.value;
    temp[d2.value] = f2.value;

  input= pd.DataFrame(temp, index=[0,])
  #input


  #GET PREDICTION AND PROBABILITIES:

  x = pred_val (input)

  #print(list_inp)


  if select_opt.value == 1:

    list_inp = [f1.value]
    list_inp.extend(x)
    temp_for_display1.loc[len(temp_for_display1)] = list_inp
    
  else:
    list_inp = [f1.value, f2.value]
    list_inp.extend(x)
    temp_for_display2.loc[len(temp_for_display2)] = list_inp
  if select_opt.value == 1:
    with out:
      clear_output()
      display(temp_for_display1)
  else:
    with out:
      clear_output()
      display(temp_for_display2)
  #print( "set of values added!")

button1.on_click(on_button_clicked1)

In [24]:
button2 = widgets.Button(description="GENERATE", button_style='success', layout=Layout( height='100px') )



def on_button_clicked2(b):
  global temp_for_display1
  global temp_for_display2
  if temp_for_display1.columns[0] == 'a':
    with out1:
      print("Please add values first")
      return
    with out2:
      print("Please add values first")
      return
  if select_opt.value == 1:
    temp_for_display1 = temp_for_display1.sort_values(d1.value)
    #display(temp_for_display1)
  else:
    temp_for_display2 = temp_for_display2.sort_values(d1.value)
    #display(temp_for_display2)

  if select_opt.value == 1:

    fig1 = make_subplots(rows=2, cols=2, x_title = d1.value, y_title = "Prediction Probability",subplot_titles=("Logistic Regression", "Gaussian NB", "Random Forest Classifier", "Voting Classifier"))

    fig1.add_trace(go.Scatter( x = temp_for_display1[d1.value].values , y = temp_for_display1['Outcome 0: Probability (LR)'].values, name = 'fefefef'), row=1, col=1)

    fig1.add_trace(go.Scatter( x = temp_for_display1[d1.value].values , y = temp_for_display1['Outcome 0: Probability (GNB)'].values, name = 'fefefef2'),
              row=1, col=2)

    fig1.add_trace(go.Scatter(x = temp_for_display1[d1.value].values , y = temp_for_display1['Outcome 0: Probability (RF)'].values, name = 'fefefef3'),
              row=2, col=1)

    fig1.add_trace(go.Scatter(x = temp_for_display1[d1.value].values , y = temp_for_display1['Outcome 0: Probability (VC)'].values, name = 'fefefef4'),
              row=2, col=2)
    
    fig1.update_layout(showlegend=False)

    fig2 = make_subplots(rows=2, cols=2,  x_title = d1.value, y_title = "Prediction Probability", subplot_titles=("Logistic Regression", "Gaussian NB", "Random Forest Classifier", "Voting Classifier"))

    fig2.add_trace(go.Scatter( x = temp_for_display1[d1.value].values , y = temp_for_display1['Outcome 1: Probability (LR)'].values, name = 'fefefef'),
              row=1, col=1)

    fig2.add_trace(go.Scatter( x = temp_for_display1[d1.value].values , y = temp_for_display1['Outcome 1: Probability (GNB)'].values, name = 'fefefef2'),
              row=1, col=2)

    fig2.add_trace(go.Scatter(x = temp_for_display1[d1.value].values , y = temp_for_display1['Outcome 1: Probability (RF)'].values, name = 'fefefef3'),
              row=2, col=1)

    fig2.add_trace(go.Scatter(x = temp_for_display1[d1.value].values , y = temp_for_display1['Outcome 1: Probability (VC)'].values, name = 'fefefef4'),
              row=2, col=2)
    
    fig2.update_layout(showlegend=False)


  if select_opt.value == 2:
    fig3 = make_subplots(rows=2, cols=2, specs=[[{"type": "scatter3d"}, {"type": "scatter3d"}],[{"type": "scatter3d"}, {"type": "scatter3d"}]], subplot_titles=("Logistic Regression", "Gaussian NB", "Random Forest Classifier", "Voting Classifier"))

    fig3.add_trace(go.Scatter3d( x = temp_for_display2[d1.value].values , y = temp_for_display2[d2.value].values,  z = temp_for_display2['Outcome 0: Probability (LR)'].values, name = 'fefefef'),
              row=1, col=1)

    fig3.add_trace(go.Scatter3d( x = temp_for_display2[d1.value].values , y = temp_for_display2[d2.value].values, z = temp_for_display2['Outcome 0: Probability (GNB)'].values, name = 'fefefef2'),
              row=1, col=2)

    fig3.add_trace(go.Scatter3d(x = temp_for_display2[d1.value].values , y = temp_for_display2[d2.value].values, z = temp_for_display2['Outcome 0: Probability (RF)'].values, name = 'fefefef3'),
              row=2, col=1)

    fig3.add_trace(go.Scatter3d(x = temp_for_display2[d1.value].values , y = temp_for_display2[d2.value].values, z = temp_for_display2['Outcome 0: Probability (VC)'].values, name = 'fefefef4'),
              row=2, col=2)

    fig3.update_layout(height=900, showlegend=False)


    fig4 = make_subplots(rows=2, cols=2, specs=[[{"type": "scatter3d"}, {"type": "scatter3d"}],[{"type": "scatter3d"}, {"type": "scatter3d"}]], subplot_titles=("Logistic Regression", "Gaussian NB", "Random Forest Classifier", "Voting Classifier"))

    fig4.add_trace(go.Scatter3d( x = temp_for_display2[d1.value].values , y = temp_for_display2[d2.value].values, z = temp_for_display2['Outcome 1: Probability (LR)'].values, name = 'fefefef'),
              row=1, col=1)

    fig4.add_trace(go.Scatter3d( x = temp_for_display2[d1.value].values , y = temp_for_display2[d2.value].values, z = temp_for_display2['Outcome 1: Probability (GNB)'].values, name = 'fefefef2'),
              row=1, col=2)

    fig4.add_trace(go.Scatter3d(x = temp_for_display2[d1.value].values , y = temp_for_display2[d2.value].values, z = temp_for_display2['Outcome 1: Probability (RF)'].values, name = 'fefefef3'),
              row=2, col=1)

    fig4.add_trace(go.Scatter3d(x = temp_for_display2[d1.value].values , y = temp_for_display2[d2.value].values, z = temp_for_display2['Outcome 1: Probability (VC)'].values, name = 'fefefef4'),
              row=2, col=2)
  
    fig4.update_layout(height=900, showlegend=False)
  with out:
    clear_output()
  with out1:
    clear_output()
    if select_opt.value == 1:
      fig1.show()
    else:
      fig3.show()
  with out2:
    clear_output()
    if select_opt.value == 1:
      fig2.show()
    else:
      fig4.show()

button2.on_click(on_button_clicked2)

tab = widgets.Tab(children = [out1, out2])
tab.set_title(0, 'Well Outcome: 0')
tab.set_title(1, 'Well Outcome: 1')
#

In [25]:
#GRENERATING COUNTERFACTUALS
def counterfact ():
  global Z;
  global temp_for_display1;
  global temp_for_display2;
  if d1.value == df_column_list[1] or d1.value == df_column_list[2] or d1.value == df_column_list[3] or  d1.value == df_column_list[4] or d1.value == df_column_list[5] or d1.value == df_column_list[6]:
    value1 = round(random.uniform(0,1), 6);
  elif d1.value == df_column_list[0]:
    value1 = random.randint(1, 4);
  elif d1.value == df_column_list[2]:
    value1 = round(random.uniform(-1,1), 6);
  elif d1.value == df_column_list[17]:
    value1 = random.randint(0, 4);
  else:
    value1 = random.randint(0, 5);
  if select_opt.value == 2:
    if d2.value == df_column_list[1] or d2.value == df_column_list[2] or d2.value == df_column_list[3] or  d2.value == df_column_list[4] or d2.value == df_column_list[5] or d2.value == df_column_list[6]:
      value2 = round(random.uniform(0,1), 6);
    elif d2.value == df_column_list[0]:
      value2 = random.randint(1, 4);
    elif d2.value == df_column_list[2]:
      value2 = round(random.uniform(-1,1), 6);
    elif d2.value == df_column_list[17]:
      value2 = random.randint(0, 4);
    else:
      value2 = random.randint(0, 5);

  if select_opt.value == 1:
    temp_for_display1.rename(columns={"a": d1.value }, inplace=True)
  else:
    temp_for_display1.rename(columns={"a": d1.value }, inplace=True)
    temp_for_display2.rename(columns={"a": d1.value, "b": d2.value }, inplace=True)

  temp = copy.deepcopy(Z)
  if select_opt.value == 1:
    temp[d1.value] = value1;
  else:
    temp[d1.value] = value1;
    temp[d2.value] = value2;
  
  input= pd.DataFrame(temp, index=[0,])

  #GET PREDICTION AND PROBABILITIES:
  x = pred_val (input)

  if select_opt.value == 1:
    list_inp = [value1]
    list_inp.extend(x)
    temp_for_display1.loc[len(temp_for_display1)] = list_inp
  else:
    list_inp = [value1, value2]
    list_inp.extend(x)
    temp_for_display2.loc[len(temp_for_display2)] = list_inp


In [29]:
button_c = widgets.Button(description=" 20 \n Counterfactuals", layout=Layout( height='50px'))
button_c.style.button_color = '#f4ad5d'
def on_button_clickedc(b):
  global Z;
  global temp_for_display1;
  global temp_for_display2;
  for x in range(0, 20):
    counterfact ()
  display(temp_for_display1)
button_c.on_click(on_button_clickedc)




In [30]:
value1 = ('Mean train accuracy for logistic regression: {:0.4f}'.format(model.score(X_train, y_train)))
value2 = ('Mean test accuracy for logistic regression: {:0.4f}'.format(model.score(X_test, y_test)))

value3 = ('Mean train accuracy for GaussianNB: {:0.4f}'.format(model3.score(X_train, y_train)))
value4 = ('Mean test accuracy for GaussianNB: {:0.4f}'.format(model3.score(X_test, y_test)))

value5 = ('Mean train accuracy for RandomForestClassifier: {:0.4f}'.format(model2.score(X_train, y_train)))
value6 = ('Mean test accuracy for RandomForestClassifier: {:0.4f}'.format(model2.score(X_test, y_test)))

value7 = ('Mean train accuracy for VotingClassifier: {:0.4f}'.format(model4.score(X_train, y_train)))
value8 = ('Mean test accuracy for VotingClassifier: {:0.4f}'.format(model4.score(X_test, y_test)))


model1_text1 = widgets.HTML(value= value1)


In [31]:
button_new = widgets.HBox([button0, button30])
button_add = widgets.HBox([button1, button31])
val_text = widgets.HTML(value="<b>OR GENERATE: <b>")
button_coun = widgets.HBox([button_c, button33])
button_val = widgets.VBox([button_add, val_text, button_coun])
button_gen = widgets.HBox([button2, button32])
show0 = widgets.HBox([button_val, button_gen])
show2 = widgets.HBox([show1, show0])
show3 = widgets.VBox([button_new, show2,out, tab])
display(show3)

VBox(children=(HBox(children=(Button(description='NEW INSTANCE', layout=Layout(height='30px', width='100%'), s…

Unnamed: 0,Data Quality Seismic,Outcome 0: Probability (LR),Outcome 1: Probability (LR),Outcome 0: Probability (GNB),Outcome 1: Probability (GNB),Outcome 0: Probability (RF),Outcome 1: Probability (RF),Outcome 0: Probability (VC),Outcome 1: Probability (VC)
0,0.587574,0.776221,0.223779,0.466667,0.533333,0.896387,0.103613,0.822594,0.177406
1,0.632814,0.779327,0.220673,0.45,0.55,0.897577,0.102423,0.823888,0.176112
2,0.673066,0.782064,0.217936,0.483333,0.516667,0.896966,0.103034,0.826223,0.173777
3,0.6304,0.779162,0.220838,0.45,0.55,0.897564,0.102436,0.823855,0.176145
4,0.407993,0.763587,0.236413,0.483333,0.516667,0.869254,0.130746,0.80617,0.19383
5,0.120711,0.742372,0.257628,0.483333,0.516667,0.68313,0.31687,0.670194,0.329806
6,0.373003,0.761069,0.238931,0.483333,0.516667,0.858588,0.141412,0.798192,0.201808
7,0.941221,0.799673,0.200327,0.516667,0.483333,0.843584,0.156416,0.776323,0.223677
8,0.841367,0.793243,0.206757,0.516667,0.483333,0.87564,0.12436,0.798301,0.201699
9,0.537229,0.772728,0.227272,0.483333,0.516667,0.892649,0.107351,0.824186,0.175814
