In [None]:
import sys
!{sys.executable} -m pip install --upgrade setuptools
!{sys.executable} -m pip install witwidget
!jupyter nbextension install --py --symlink --sys-prefix witwidget
!jupyter nbextension enable --py --sys-prefix witwidget
!{sys.executable} -m pip install pandas

Collecting setuptools
  Downloading setuptools-46.1.3-py3-none-any.whl (582 kB)
[K     |################################| 582 kB 1.2 MB/s eta 0:00:01
[?25hInstalling collected packages: setuptools
  Attempting uninstall: setuptools
    Found existing installation: setuptools 39.0.1
    Uninstalling setuptools-39.0.1:
      Successfully uninstalled setuptools-39.0.1
Successfully installed setuptools-46.1.3
Collecting witwidget
  Downloading witwidget-1.6.0-py3-none-any.whl (2.3 MB)
[K     |################################| 2.3 MB 1.5 MB/s eta 0:00:01
Collecting google-api-python-client>=1.7.8
  Downloading google_api_python_client-1.8.0-py3-none-any.whl (57 kB)
[K     |################################| 57 kB 1.6 MB/s eta 0:00:01
[?25hCollecting absl-py>=0.4
  Downloading absl-py-0.9.0.tar.gz (104 kB)
[K     |################################| 104 kB 1.4 MB/s eta 0:00:01
Collecting six>=1.12.0
  Downloading six-1.14.0-py2.py3-none-any.whl (10 kB)
Collecting oauth2client>=4.1.3
  Dow

In [None]:
# force reload six
import six
import importlib
print(six.__version__)
importlib.reload(six)
print(six.__version__) 

In [None]:
#@title Define helper functions {display-mode: "form"}

import pandas as pd
import numpy as np
import tensorflow as tf
import functools

# Creates a tf feature spec from the dataframe and columns specified.
def create_feature_spec(df, columns=None):
    feature_spec = {}
    if columns == None:
        columns = df.columns.values.tolist()
    for f in columns:
        if df[f].dtype is np.dtype(np.int64):
            feature_spec[f] = tf.io.FixedLenFeature(shape=(), dtype=tf.int64)
        elif df[f].dtype is np.dtype(np.float64):
            feature_spec[f] = tf.io.FixedLenFeature(shape=(), dtype=tf.float32)
        else:
            feature_spec[f] = tf.io.FixedLenFeature(shape=(), dtype=tf.string)
    return feature_spec

# Creates simple numeric and categorical feature columns from a feature spec and a
# list of columns from that spec to use.
#
# NOTE: Models might perform better with some feature engineering such as bucketed
# numeric columns and hash-bucket/embedding columns for categorical features.
def create_feature_columns(columns, feature_spec):
    ret = []
    for col in columns:
        if feature_spec[col].dtype is tf.int64 or feature_spec[col].dtype is tf.float32:
            ret.append(tf.feature_column.numeric_column(col))
        else:
            ret.append(tf.feature_column.indicator_column(
                tf.feature_column.categorical_column_with_vocabulary_list(col, list(df[col].unique()))))
    return ret

# An input function for providing input to a model from tf.Examples
def tfexamples_input_fn(examples, feature_spec, label, mode=tf.estimator.ModeKeys.EVAL,
                       num_epochs=None, 
                       batch_size=64):
    def ex_generator():
        for i in range(len(examples)):
            yield examples[i].SerializeToString()
    dataset = tf.data.Dataset.from_generator(
      ex_generator, tf.dtypes.string, tf.TensorShape([]))
    if mode == tf.estimator.ModeKeys.TRAIN:
        dataset = dataset.shuffle(buffer_size=2 * batch_size + 1)
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(lambda tf_example: parse_tf_example(tf_example, label, feature_spec))
    dataset = dataset.repeat(num_epochs)
    return dataset

# Parses Tf.Example protos into features for the input function.
def parse_tf_example(example_proto, label, feature_spec):
    parsed_features = tf.io.parse_example(serialized=example_proto, features=feature_spec)
    target = parsed_features.pop(label)
    return parsed_features, target

# Converts a dataframe into a list of tf.Example protos.
def df_to_examples(df, columns=None):
    examples = []
    if columns == None:
        columns = df.columns.values.tolist()
    for index, row in df.iterrows():
        example = tf.train.Example()
        for col in columns:
            if df[col].dtype is np.dtype(np.int64):
                example.features.feature[col].int64_list.value.append(int(row[col]))
            elif df[col].dtype is np.dtype(np.float64):
                example.features.feature[col].float_list.value.append(row[col])
            elif row[col] == row[col]:
                example.features.feature[col].bytes_list.value.append(row[col].encode('utf-8'))
        examples.append(example)
    return examples

# Converts a dataframe column into a column of 0's and 1's based on the provided test.
# Used to force label columns to be numeric for binary classification using a TF estimator.
def make_label_column_numeric(df, label_column, test):
      df[label_column] = np.where(test(df[label_column]), 1, 0)

In [None]:
#@title Read training dataset from CSV {display-mode: "form"}

import pandas as pd


# Read the dataset from the provided CSV and print out information about it.
iter = range(-5,6) 
df = pd.DataFrame({'number':list(iter), 'is_positive':[i>=0 for i in iter]})

df

In [None]:
#@title Specify input columns and column to predict {display-mode: "form"}
import numpy as np

# Set the column in the dataset you wish for the model to predict
label_column = 'is_positive'

# Make the label column numeric (0 and 1), for use in our model.
# In this case, examples with a target value of '>50K' are considered to be in
# the '1' (positive) class and all other examples are considered to be in the
# '0' (negative) class.
make_label_column_numeric(df, label_column, lambda val: val)

# Set list of all columns from the dataset we will use for model input.
input_features = [
  'number']

# Create a list containing all input features and the label column
features_and_labels = input_features + [label_column]

In [None]:
#@title Convert dataset to tf.Example protos {display-mode: "form"}

examples = df_to_examples(df)

In [None]:
#@title Invoke What-If Tool for test data and the trained model {display-mode: "form"}

num_datapoints = 10  #@param {type: "number"}
tool_height_in_px = 1000  #@param {type: "number"}

from witwidget.notebook.visualization import WitConfigBuilder
from witwidget.notebook.visualization import WitWidget

def predict(l):
    def fun(i):
        return 1 if i.features.feature['number'].int64_list.value[0]>=0 else 0
    return [[fun(i), fun(i)] for i in l]

# Setup the tool with the test examples and the trained classifier
config_builder = WitConfigBuilder(examples).set_custom_predict_fn(predict)
WitWidget(config_builder, height=tool_height_in_px)

In [None]:
help(WitConfigBuilder.set_custom_predict_fn)