In [0]:
import math
import io
import os
import time
import datetime
import csv
import re

import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

import operator
import pandas as pd

from io import open
from collections import namedtuple
from sklearn import tree
from tqdm import tqdm

from tensorflow.examples.tutorials.mnist import input_data
from tensorflow.contrib import learn



## Training a Sentiment network (3 conv layers (for bigrams, trigrams and quadgrams) + 1 fully connected)



In [0]:
#Yoon Kim's Convolutional Neural Networks for Sentence Classification paper, implemented in Tensorflow by Denny Britz, mostly copied from his Github repo.
# This uses some different settings than the paper, like learning embeddings instead of using word2vec (or using both), using another dataset, not SST (Stanford Sentiment Treebank), not having L2 regularization.
# TODO - Add options for those settings as well, making it more faithful to the paper.

#Fix for flags
tf.app.flags.DEFINE_string('f', '', 'kernel')

#Dataset info
tf.flags.DEFINE_float("dev_sample_percentage", .1, "Percentage of the training data to use for validation")
tf.flags.DEFINE_string("positive_data_file", "./data/rt-polaritydata/rt-polarity.pos", "Data source for the positive data.")
tf.flags.DEFINE_string("negative_data_file", "./data/rt-polaritydata/rt-polarity.neg", "Data source for the negative data.")

# Model Hyperparameters
tf.flags.DEFINE_integer("embedding_dim", 128, "Dimensionality of character embedding (default: 128)")
tf.flags.DEFINE_string("filter_sizes", "3,4,5", "Comma-separated filter sizes (default: '3,4,5')")
tf.flags.DEFINE_integer("num_filters", 128, "Number of filters per filter size (default: 128)")
tf.flags.DEFINE_float("dropout_keep_prob", 0.5, "Dropout keep probability (default: 0.5)")
tf.flags.DEFINE_float("l2_reg_lambda", 0.0, "L2 regularization lambda (default: 0.0)")

# Training parameters
tf.flags.DEFINE_integer("batch_size", 64, "Batch Size (default: 64)")
tf.flags.DEFINE_integer("num_epochs", 100, "Number of training epochs (default: 200)")
tf.flags.DEFINE_integer("evaluate_every", 100, "Evaluate model on dev set after this many steps (default: 100)")
tf.flags.DEFINE_integer("checkpoint_every", 100, "Save model after this many steps (default: 100)")
tf.flags.DEFINE_integer("num_checkpoints", 5, "Number of checkpoints to store (default: 5)")
# Misc Parameters
tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")

FLAGS = tf.flags.FLAGS

In [30]:
#DOWNLOADING DATASET

!mkdir ./data/
!mkdir ./data/rt-polaritydata/
!wget https://github.com/hayesconverse/sym_convnn/raw/master/Textual_Invariants/data/rt-polaritydata/rt-polarity.pos -O ./data/rt-polaritydata/rt-polarity.pos
!wget https://github.com/hayesconverse/sym_convnn/raw/master/Textual_Invariants/data/rt-polaritydata/rt-polarity.neg -O ./data/rt-polaritydata/rt-polarity.neg

mkdir: cannot create directory ‘./data/’: File exists
mkdir: cannot create directory ‘./data/rt-polaritydata/’: File exists
--2018-10-25 19:50:42--  https://github.com/hayesconverse/sym_convnn/raw/master/Textual_Invariants/data/rt-polaritydata/rt-polarity.pos
Resolving github.com (github.com)... 192.30.253.113, 192.30.253.112
Connecting to github.com (github.com)|192.30.253.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/hayesconverse/sym_convnn/master/Textual_Invariants/data/rt-polaritydata/rt-polarity.pos [following]
--2018-10-25 19:50:42--  https://raw.githubusercontent.com/hayesconverse/sym_convnn/master/Textual_Invariants/data/rt-polaritydata/rt-polarity.pos
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response...

In [39]:
#READ AND PROCESS INPUTS

def clean_str(string):
  """
  Tokenization/string cleaning for all datasets except for SST.
  Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py
  """
  string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string)
  string = re.sub(r"\'s", " \'s", string)
  string = re.sub(r"\'ve", " \'ve", string)
  string = re.sub(r"n\'t", " n\'t", string)
  string = re.sub(r"\'re", " \'re", string)
  string = re.sub(r"\'d", " \'d", string)
  string = re.sub(r"\'ll", " \'ll", string)
  string = re.sub(r",", " , ", string)
  string = re.sub(r"!", " ! ", string)
  string = re.sub(r"\(", " \( ", string)
  string = re.sub(r"\)", " \) ", string)
  string = re.sub(r"\?", " \? ", string)
  string = re.sub(r"\s{2,}", " ", string)
  return string.strip().lower()


def load_data_and_labels(positive_data_file, negative_data_file):
  """
  Loads MR polarity data from files, splits the data into words and generates labels.
  Returns split sentences and labels.
  """
  # Load data from files
  positive_examples = list(open(positive_data_file, "r", encoding='utf-8').readlines())
  positive_examples = [s.strip() for s in positive_examples]
  negative_examples = list(open(negative_data_file, "r", encoding='utf-8').readlines())
  negative_examples = [s.strip() for s in negative_examples]
  # Split by words
  x_text = positive_examples + negative_examples
  x_text = [clean_str(sent) for sent in x_text]
  # Generate labels
  positive_labels = [[0, 1] for _ in positive_examples]
  negative_labels = [[1, 0] for _ in negative_examples]
  y = np.concatenate([positive_labels, negative_labels], 0)
  return [x_text, y]


def batch_iter(data, batch_size, num_epochs, shuffle=True):
  """
  Generates a batch iterator for a dataset.
  """
  data = np.array(data)
  data_size = len(data)
  num_batches_per_epoch = int((len(data)-1)/batch_size) + 1
  for epoch in range(num_epochs):
    # Shuffle the data at each epoch
    if shuffle:
      shuffle_indices = np.random.permutation(np.arange(data_size))
      shuffled_data = data[shuffle_indices]
    else:
      shuffled_data = data
    for batch_num in range(num_batches_per_epoch):
      start_index = batch_num * batch_size
      end_index = min((batch_num + 1) * batch_size, data_size)
      yield shuffled_data[start_index:end_index]

def preprocess():
  # Data Preparation
  # ==================================================

  # Load data
  print("Loading data...")
  x_text, y = load_data_and_labels(FLAGS.positive_data_file, FLAGS.negative_data_file)
  #print x_text[0]
  # Build vocabulary
  max_document_length = max([len(x.split(" ")) for x in x_text])
  vocab_processor = learn.preprocessing.VocabularyProcessor(max_document_length)
  x = np.array(list(vocab_processor.fit_transform(x_text)))

  # Randomly shuffle data
  np.random.seed(10)
  shuffle_indices = np.random.permutation(np.arange(len(y)))
  x_shuffled = x[shuffle_indices]
  y_shuffled = y[shuffle_indices]
  x_text_shuffled = [x_text[i] for i in list(shuffle_indices)]
  #print shuffle_indices

  # Split train/test set
  # TODO: This is very crude, should use cross-validation
  dev_sample_index = -1 * int(FLAGS.dev_sample_percentage * float(len(y)))
  x_train, x_dev = x_shuffled[:dev_sample_index], x_shuffled[dev_sample_index:]
  y_train, y_dev = y_shuffled[:dev_sample_index], y_shuffled[dev_sample_index:]
  x_text_train, x_text_dev = x_text_shuffled[:dev_sample_index], x_text_shuffled[dev_sample_index:]

  del x, y, x_shuffled, y_shuffled

  print("Vocabulary Size: {:d}".format(len(vocab_processor.vocabulary_)))
  print("Train/Dev split: {:d}/{:d}".format(len(y_train), len(y_dev)))
  return x_train, y_train, vocab_processor, x_dev, y_dev, x_text_train, x_text_dev

x_train, y_train, vocab_processor, x_test, y_test, x_text_train, x_text_test = preprocess()

Loading data...
Vocabulary Size: 18758
Train/Dev split: 9596/1066
[   65   827 11955  2990  6742   250   532  2152  3564  3434   532 11956
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0     0     0     0     0
     0     0     0     0     0     0     0     0]
like these russo guys lookin' for their mamet instead found their sturges


In [0]:
def weight_variable(shape, name):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial, name=name)

def bias_variable(shape, name):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial, name=name)

def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

#TODO Names are from the original implementation, may need to update them
def create_model(sequence_length, num_classes, vocab_size,
  embedding_size, filter_sizes, num_filters, l2_reg_lambda=0.0):
  
  # Placeholders for input, output and dropout
  input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x")
  input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y")
  dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")

  # Keeping track of l2 regularization loss (optional)
  l2_loss = tf.constant(0.0)

  # Embedding layer
  # tf.name_scope("embedding")
  with tf.device('/cpu:0'):
    W = tf.Variable(
      tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0),
      name="W_embed")
    embedded_chars = tf.nn.embedding_lookup(W, input_x)
    embedded_chars_expanded = tf.expand_dims(embedded_chars, -1)

  # Create a convolution + maxpool layer for each filter size
  pooled_outputs = []
  relu_layers = []
  for i, filter_size in enumerate(filter_sizes):
    #with tf.name_scope("conv-maxpool-%s" % filter_size):
    # Convolution Layer
    filter_shape = [filter_size, embedding_size, 1, num_filters]
    W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W_{}".format(filter_size))
    b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b_{}".format(filter_size))
    conv = tf.nn.conv2d(
      embedded_chars_expanded,
      W,
      strides=[1, 1, 1, 1],
      padding="VALID",
      name="conv_{}".format(filter_size))
    # Apply nonlinearity
    h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu_{}".format(filter_size))
    relu_layers.append(h)
    # Maxpooling over the outputs
    pooled = tf.nn.max_pool(
      h,
      ksize=[1, sequence_length - filter_size + 1, 1, 1],
      strides=[1, 1, 1, 1],
      padding='VALID',
      name="pool")
    pooled_outputs.append(pooled)

  # Combine all the pooled features
  num_filters_total = num_filters * len(filter_sizes)
  h_pool = tf.concat(pooled_outputs, 3)
  h_pool_flat = tf.reshape(h_pool, [-1, num_filters_total])

  # Add dropout
  with tf.name_scope("dropout"):
    h_drop = tf.nn.dropout(h_pool_flat, dropout_keep_prob)

  # Final (unnormalized) scores and predictions
  #with tf.name_scope("output"):
  W = tf.get_variable(
    "W_fc",
    shape=[num_filters_total, num_classes],
    initializer=tf.contrib.layers.xavier_initializer())
  b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b_fc")
  l2_loss += tf.nn.l2_loss(W)
  l2_loss += tf.nn.l2_loss(b)
  scores = tf.nn.xw_plus_b(h_drop, W, b, name="scores")
  predictions = tf.argmax(scores, 1, name="predictions")

  # Calculate mean cross-entropy loss
  with tf.name_scope("loss"):
    losses = tf.nn.softmax_cross_entropy_with_logits(logits=scores, labels=input_y)
    loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss

  # Accuracy
  with tf.name_scope("accuracy"):
    correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1))
  accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
  
  cross_entropy = tf.reduce_mean(
    tf.nn.softmax_cross_entropy_with_logits_v2(labels=input_y, logits=scores))
  
  return cross_entropy, accuracy, input_x, dropout_keep_prob, scores, input_y, relu_layers
  
  ###ANKUR'S CODE:
  #x = tf.placeholder(tf.float32, shape=[None, 784], name='input')
  #y_ = tf.placeholder(tf.float32, shape=[None, 10])

  #W_conv1 = weight_variable([5, 5, 1, 32], 'w_conv1')
  #b_conv1 = bias_variable([32], 'b_conv1')

  #x_image = tf.reshape(x, [-1,28,28,1])
  #h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
  #h_pool1 = max_pool_2x2(h_conv1)
  #h_pool1 = tf.identity(h_pool1, name="conv1")

  #W_conv2 = weight_variable([5, 5, 32, 64], 'w_conv2')
  #b_conv2 = bias_variable([64], 'b_conv2')

  #h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
  #h_pool2 = max_pool_2x2(h_conv2)
  #h_pool2 = tf.identity(h_pool2, name="conv2")

  #W_fc1 = weight_variable([7 * 7 * 64, 1024], 'w_fc1')
  #b_fc1 = bias_variable([1024], 'b_fc1')

  #h_pool2_flat = tf.reshape(h_pool2, [-1, 7*7*64])
  #h_fc1 = tf.nn.relu(tf.matmul(h_pool2_flat, W_fc1) + b_fc1, name='fc1')

  #keep_prob = tf.placeholder(tf.float32, name='keep_prob')
  #h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

  #W_fc2 = weight_variable([1024, 10], 'w_fc2')
  #b_fc2 = bias_variable([10], 'b_fc2')

  #y_conv = tf.add(tf.matmul(h_fc1_drop, W_fc2), b_fc2, name='prediction')

  #cross_entropy = tf.reduce_mean(
  #    tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_conv))

  #correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
  #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  

### Train a new model

In [6]:
#TODO Need to make the dataset compliant to tf.data format so that I can use next_batch or things like that.

#tf.reset_default_graph()
#sess = tf.InteractiveSession()
#cross_entropy, accuracy, x, keep_prob, y_conv, y_ = create_model(sequence_length=x_train.shape[1],
#        num_classes=y_train.shape[1],
#        vocab_size=len(vocab_processor.vocabulary_),
#        embedding_size=FLAGS.embedding_dim,
#        filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
#        num_filters=FLAGS.num_filters,
#        l2_reg_lambda=FLAGS.l2_reg_lambda)
#train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
#saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
#sess.run(tf.global_variables_initializer())
#for i in range(0, 1200):
#  batch = train_dataset.batch(FLAGS.batch_size)
#  train_step.run(feed_dict={x: batch[0], y_: np.eye(10)[batch[1]], keep_prob: 0.5})
#  if i%100 == 0:
#    test_accuracy = accuracy.eval(feed_dict={
#        x:test_dataset.images, y_: np.eye(10)[test_dataset.labels], keep_prob: 1.0})
#    print("step %d, test accuracy %g"%(i, test_accuracy))    
#ckpt_path_name = saver.save(sess, './checkpoints/mnist_invariant.ckpt', global_step=i)
#print "Checkpoint saved at: %s" % ckpt_path_name





tf.reset_default_graph()
sess = tf.InteractiveSession()
cross_entropy, accuracy, x, keep_prob, y_conv, y_, relu_layers = create_model(sequence_length=x_train.shape[1],
        num_classes=y_train.shape[1],
        vocab_size=len(vocab_processor.vocabulary_),
        embedding_size=FLAGS.embedding_dim,
        filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
        num_filters=FLAGS.num_filters,
        l2_reg_lambda=FLAGS.l2_reg_lambda)
train_step = tf.train.AdamOptimizer(1e-3).minimize(cross_entropy)
saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
sess.run(tf.global_variables_initializer())

# Generate batches
batches = batch_iter(
  list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs)
for i,batch in enumerate(batches):
  x_batch, y_batch = zip(*batch)
  train_step.run(feed_dict={x: x_batch, y_: y_batch, keep_prob: FLAGS.dropout_keep_prob})
  if i%1000 == 0 and i > 0:
    test_accuracy = accuracy.eval(feed_dict={
        x:x_test, y_: y_test, keep_prob: 1.0})
    print("step %d, test accuracy %g"%(i, test_accuracy))    
ckpt_path_name = saver.save(sess, './checkpoints/text_invariant.ckpt', global_step=i)
print "Checkpoint saved at: %s" % ckpt_path_name




Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

step 1000, test accuracy 0.702627
step 2000, test accuracy 0.732645
step 3000, test accuracy 0.723265
step 4000, test accuracy 0.721388
step 5000, test accuracy 0.727955
step 6000, test accuracy 0.74015
step 7000, test accuracy 0.725141
Checkpoint saved at: ./checkpoints/text_invariant.ckpt-7499


In [0]:
from google.colab import files
files.download(ckpt_path_name + '.index')
files.download(ckpt_path_name + '.meta')
files.download(ckpt_path_name + '.data-00000-of-00001')

### Restore a pretrained model

In [0]:
!mkdir -p ./checkpoints
!wget https://github.com/hayesconverse/sym_convnn/raw/master/Textual_Invariants/text_checkpoint/text_invariant.ckpt.index -O ./checkpoints/text_invariants.ckpt.index
!wget https://github.com/hayesconverse/sym_convnn/raw/master/Textual_Invariants/text_checkpoint/text_invariant.ckpt.meta -O ./checkpoints/text_invariants.ckpt.meta
!wget https://github.com/hayesconverse/sym_convnn/raw/master/Textual_Invariants/text_checkpoint/text_invariant.ckpt.data-00000-of-00001 -O ./checkpoints/text_invariants.ckpt.data-00000-of-00001

In [0]:
tf.reset_default_graph()
sess = tf.InteractiveSession()
cross_entropy, accuracy, x, keep_prob, y_conv, y_ = create_model(sequence_length=x_train.shape[1],
        num_classes=y_train.shape[1],
        vocab_size=len(vocab_processor.vocabulary_),
        embedding_size=FLAGS.embedding_dim,
        filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
        num_filters=FLAGS.num_filters,
        l2_reg_lambda=FLAGS.l2_reg_lambda)
saver = tf.train.Saver(write_version=tf.train.SaverDef.V2)
saver.restore(sess, './checkpoints/text_invariants.ckpt')

test_accuracy = accuracy.eval(feed_dict={
    x:x_test, y_: y_test, keep_prob: 1.0})
print("Test accuracy %g"%(test_accuracy))    

train_accuracy = accuracy.eval(feed_dict={
    x:x_train, y_: y_train, keep_prob: 1.0})
print("Train accuracy %g"%(train_accuracy))  



### Parameters of the network

In [7]:

filter_sizes=list(map(int, FLAGS.filter_sizes.split(",")))

#w_embed = sess.run(sess.graph.get_tensor_by_name('W_embed:0'))
#print "Embedding, weight shape", w_embed.shape

w_convs = [None] * len(filter_sizes)
b_convs = [None] * len(filter_sizes)
relu_convs = [None] * len(filter_sizes)

for i, filter_size in enumerate(filter_sizes):
  w_convs[i] = sess.run(sess.graph.get_tensor_by_name('W_{}:0'.format(filter_size)))
  b_convs[i] = sess.run(sess.graph.get_tensor_by_name('W_{}:0'.format(filter_size)))
  relu_convs[i] = sess.graph.get_tensor_by_name('relu_{}:0'.format(filter_size))
  print "Conv with filter size {}, weight and bias shape".format(filter_size), w_convs[i].shape, b_convs[i].shape  
  
w_fc = sess.run(sess.graph.get_tensor_by_name('W_fc:0'))
b_fc = sess.run(sess.graph.get_tensor_by_name('b_fc:0'))
print "FC, weight and bias shape", w_fc.shape, b_fc.shape


Conv with filter size 3, weight and bias shape (3, 128, 1, 128) (3, 128, 1, 128)
Conv with filter size 4, weight and bias shape (4, 128, 1, 128) (4, 128, 1, 128)
Conv with filter size 5, weight and bias shape (5, 128, 1, 128) (5, 128, 1, 128)
FC, weight and bias shape (384, 2) (2,)


## Library for IG Attribution and Conductance

In [0]:
t_label = tf.placeholder(tf.int32)
t_neuron_id = tf.placeholder(tf.int32)
t_grad = tf.gradients(y_conv[:, t_label], x)
t_fc1 = sess.graph.get_tensor_by_name('fc1:0')
t_conv1 = sess.graph.get_tensor_by_name('conv1:0')
t_conv1 = sess.graph.get_tensor_by_name('conv2:0')
t_grad_neuron = tf.gradients(y_conv[:, t_label], t_fc1)[0]
t_grad_conductance = tf.gradients(t_fc1[:,t_neuron_id], x, grad_ys=t_grad_neuron[:, t_neuron_id])

In [0]:
import copy

def get_prediction(inps, tensor=y_conv, batch_size=100):
  def get_prediction_batch(batch):
    feed = {x: np.array(batch), keep_prob:1.0}
    return sess.run(tensor, feed_dict=feed)
  n = len(inps)
  if n%batch_size == 0:
    batches = [inps[i*batch_size:(i+1)*batch_size] for i in range(int(n/batch_size))]
  else:
    batches = [inps[i*batch_size:(i+1)*batch_size] for i in range(int(n/batch_size) +1)]
  #print len(batches)
  batch_predictions = [get_prediction_batch(b) for b in tqdm(batches)]
  #print len(batch_predictions)
  #print batch_predictions[0]
  #print batch_predictions[1]
  return np.concatenate(tuple(batch_predictions), axis=0)

def attribute(inp, label, baseline=None, steps=50, use_top_label=False):
  def top_label(inp):
    return np.argmax(get_prediction([inp])[0])
  if baseline is None:
    baseline = 0*inp
  scaled_inputs = [baseline + (float(i)/steps)*(inp-baseline) for i in range(0, steps)]
  feed = {keep_prob:1.0}
  if use_top_label:
    feed[x] = [inp]
    logits = sess.run(y_conv, feed_dict=feed)[0]
    label = np.argmax(logits)
  feed[x] = scaled_inputs
  feed[t_label] = label
  grads, scores = sess.run([t_grad, y_conv], feed_dict=feed)  # shapes: <steps+1>, <steps+1, inp.shape>
  integrated_gradients = (inp-baseline)*np.average(grads[0], axis=0)  # shape: <inp.shape>
  print "FINAL SCORE", scores[-1][label]
  print "BASELINE SCORE", scores[0][label]
  print "SUM", np.sum(integrated_gradients), "DIFF", scores[-1][label] - scores[0][label]
  return integrated_gradients

def conductance(inp, label, neuron_id=None, baseline=None, steps=50):
  # neuron_id is the id of the neuron in layer t_fc1 through which conductance
  # must be computed. If None, vanilla IG is computed.
  if baseline is None:
    baseline = 0*inp
  scaled_inputs = [baseline + (float(i)/steps)*(inp-baseline) for i in range(0, steps)]
  feed = {keep_prob:1.0}
  feed[x] = scaled_inputs
  feed[t_label] = label
  if neuron_id != None:
    feed[t_neuron_id] = neuron_id
    grads, scores = sess.run([t_grad_conductance, y_conv], feed_dict=feed)  # shapes: <steps+1>, <steps+1, inp.shape>
    integrated_gradients = (inp-baseline)*np.average(grads[0], axis=0)  # shape: <inp.shape>
    return integrated_gradients
  grads, scores = sess.run([t_grad, y_conv], feed_dict=feed)  # shapes: <steps+1>, <steps+1, inp.shape>    
  integrated_gradients = (inp-baseline)*np.average(grads[0], axis=0)  # shape: <inp.shape>
  print "FINAL SCORE", scores[-1][label]
  print "BASELINE SCORE", scores[0][label]
  print "SUM", np.sum(integrated_gradients), "DIFF", scores[-1][label] - scores[0][label]
  return integrated_gradients

## Library for Visualizing Images and Attributions

In [0]:
import PIL.Image
from IPython.display import clear_output, Image, display, HTML
import numpy as np
from cStringIO import StringIO

In [0]:
FONT_PATH='/usr/share/fonts/truetype/dejavu/DejaVuSansCondensed.ttf'
IMAGE_SIZE = 28

def mnist_to_rgb(mnist_img):
  """
  Transformsn an MNIST image (shape: <784>) to a grayscale
  RGB image (shape: <28,28,3>)
  """
  pixel_array = mnist_img.reshape(IMAGE_SIZE, IMAGE_SIZE)  # shape: 28,28
  rgb_image = np.transpose([pixel_array,pixel_array,pixel_array], axes=[1,2,0])
  return rgb_image

def pil_img(a):
  '''Returns a PIL image created from the provided RGB array.
  '''
  a = np.uint8(a)
  return PIL.Image.fromarray(a)

def mnist_to_pil_img(inp):
  rgb_inp = 255*mnist_to_rgb(inp)
  vis_inp = pil_img(rgb_inp)
  return vis_inp  

def pil_fig(fig):
  # Returns a PIL image obtained from the provided PLT figure.
  buf = io.BytesIO()
  fig.savefig(buf, format='png')
  plt.close(fig)
  buf.seek(0)
  img = PIL.Image.open(buf)
  return img

def show_img(img, fmt='jpeg'):
  '''Displays the provided PIL image
  '''
  f = StringIO()
  img.save(f, fmt)
  display(Image(data=f.getvalue()))
 
def show_mnist_img(mnist_img):
  show_img(pil_img(255*mnist_to_rgb(mnist_img)))
  
def gray_scale(img):
  '''Converts the provided RGB image to gray scale.
  '''
  img = np.average(img, axis=2)
  return np.transpose([img, img, img], axes=[1,2,0])

def normalize(attrs, ptile=99):
  '''Normalize the provided attributions so that they fall between
     -1.0 and 1.0.
  '''
  h = np.percentile(attrs, ptile)
  l = np.percentile(attrs, 100-ptile)
  return np.clip(attrs/max(abs(h), abs(l)), -1.0, 1.0)    

def pil_text(strs, shape, start_h=10, start_w=10, font_size=18, color=(0, 0, 0)):
  # Returns a PIL image with the provided text.
  img = pil_img(255*np.ones(shape))
  draw = PIL.ImageDraw.Draw(img)
  font = PIL.ImageFont.truetype(FONT_PATH, font_size)
  h = start_h
  for s in strs: 
    draw.text((start_w,h), s, fill=color, font=font)
    h = h + 30
  return img

def combine(imgs, horizontal=True):
  # Combines the provided PIL Images horizontally or veritically
  if horizontal:
    w = np.sum([img.size[0]+10 for img in imgs])
    h = np.max([img.size[1] for img in imgs])
  else:
    w = np.max([img.size[0] for img in imgs])
    h = np.sum([img.size[1]+10 for img in imgs])
  final_img = PIL.Image.new('RGB', (w, h), color='white')
  pos = 0
  for img in imgs:
    if horizontal:
      final_img.paste(im=img, box=(pos,0))
      pos = pos+img.size[0]+10
    else:
      final_img.paste(im=img, box=(0,pos))
      pos = pos+img.size[1]+10
  return final_img

def visualize_attrs(img, attrs, ptile=99):
  '''Visaualizes the provided attributions by first aggregating them
    along the color channel to obtain per-pixel attributions and then
    scaling the intensities of the pixels in the original image in
    proportion to absolute value of these attributions.

    The provided image and attributions must of shape (224, 224, 3).
  '''
  if np.sum(attrs) == 0.0:
    # print "Attributions are all ZERO"
    return pil_img(0*img)
  attrs = gray_scale(attrs)
  attrs = abs(attrs)
  attrs = np.clip(attrs/np.percentile(attrs, ptile), 0,1)
  vis = img*attrs
  return pil_img(vis)
  
  
R=np.array([255,0,0])
G=np.array([0,255,0])
B=np.array([0,0,255])
def visualize_attrs2(img, attrs, pos_ch=G, neg_ch=R, ptile=99):
  '''Visaualizes the provided attributions by first aggregating them
     along the color channel and then overlaying the positive attributions
     along pos_ch, and negative attributions along neg_ch.

     The provided image and attributions must of shape (224, 224, 3).
  '''
  if np.sum(attrs) == 0.0:
    # print "Attributions are all ZERO"
    return pil_img(0*img)
  attrs = gray_scale(attrs)
  attrs = normalize(attrs, ptile)   
  pos_attrs = attrs * (attrs >= 0.0)
  neg_attrs = -1.0 * attrs * (attrs < 0.0)
  attrs_mask = pos_attrs*pos_ch + neg_attrs*neg_ch
  vis = 0.3*gray_scale(img) + 0.7*attrs_mask
  return pil_img(vis)

## Extracting Invariant Candidates

In [0]:
def fingerprint_suffix(inps):
  #relu_convs[0] is the output of first convolutional filter after relu, we can also use second or third filter for suffixes.
  return (get_prediction(inps, tensor=relu_convs[0], batch_size=1) > 0.0).astype('int')

def fingerprint_prefix(inps):
  return (get_prediction(inps, tensor=tf.reshape(w_convs[0], [-1, 128*128*3]), batch_size=1)>0.0).astype('int')

In [0]:
# train_suffixes, train_predictions are in the same order
# as mnist.train.images. Henceforth when we use the index i we will
# be referring to mnist.train.images[i].
#print len(x_train)
#print len(y_train)
#print len(x_test)
#print len(y_test)


train_suffixes = fingerprint_suffix(x_train)
print "Suffixes computed for all training data"
train_predictions = np.argmax(get_prediction(x_train, tensor=y_conv, batch_size=1), axis=1)
print "Predictions computed for all training data"
train_suffixes = train_suffixes.reshape(len(x_train),54*128)

In [60]:
# test_suffixes, test_predictions are in the same order
# as mnist.train.images. Henceforth when we use the index i we will
# be referring to mnist.train.images[i].
test_suffixes = fingerprint_suffix(x_test)
print "Suffixes computed for all test data"
test_predictions = np.argmax(get_prediction(x_test,tensor=y_conv, batch_size=10), axis=1)
print "Predictions computed for all test data"
test_suffixes = test_suffixes.reshape(len(x_test),54*128)

100%|██████████| 1066/1066 [00:01<00:00, 866.64it/s]
 36%|███▋      | 39/107 [00:00<00:00, 385.29it/s]

Suffixes computed for all test data


100%|██████████| 107/107 [00:00<00:00, 337.04it/s]

Predictions computed for all test data





In [0]:
def describe_input(i, training=True):
  #print "Input:", x_train[i]
  print "Input in words:", x_text_train[i]
  print "Groundtruth:", np.argmax(y_train,axis=1)[i]
  print "Prediction:", train_predictions[i]
  print "Fine-grained prediction", 10*np.argmax(y_train, axis=1)[i] + train_predictions[i]
  #show_mnist_img(mnist.train.images[i])

### Build the Decision Tree

In [0]:
# Basic decision tree
print len(x_train)
print len(train_suffixes)#???
print len(train_predictions)
print len(train_suffixes[0])

#print len(x_test)
#print len(test_suffixes)#???
#print len(test_predictions)

#basic_estimator = tree.DecisionTreeClassifier()
#basic_estimator = basic_estimator.fit(train_suffixes, train_predictions)
#get_all_invariants(basic_estimator)

basic_estimator = tree.DecisionTreeClassifier()
basic_estimator = basic_estimator.fit(train_suffixes, train_predictions)
get_all_invariants(basic_estimator)




In [73]:
# Fine-grained predictions decision tree
fine_grained_predictions = 10*np.argmax(y_train, axis=1) + train_predictions
print 'Misclassified in training data' if 10 in fine_grained_predictions or 1 in fine_grained_predictions else 'No misclassified in training data'
fine_grained_estimator = tree.DecisionTreeClassifier()
fine_grained_estimator = fine_grained_estimator.fit(train_suffixes, fine_grained_predictions)
get_all_invariants(fine_grained_estimator)
print fine_grained_predictions

No misclassified in training data


100%|██████████| 893/893 [00:00<00:00, 216591.31it/s]

Obtained all paths
[ 0  0  0 ... 11 11  0]





In [0]:
# Decision tree per label
def get_relative_predictions(label):
  print "Create relative predictions for label:%d" % label
  res = np.zeros(train_predictions.shape)
  for i in range(len(train_predictions)):
    pred = train_predictions[i]
    gt = y_train[i]
    if gt == label and pred == gt:
      res[i] = 0
    elif gt == label and pred != gt:
      res[i] = 1
    else:
      res[i] = 2
  print "Num correct: %d" % np.sum(res == 0)
  print "Num misclassified: %d" % np.sum(res == 1)
  print "Num others: %d" % np.sum(res == 2)
  return res

def get_relative_estimator(label):
  predictions = get_relative_predictions(label)
  print "Creating decision tree for label:%d" % label
  estimator = tree.DecisionTreeClassifier()
  estimator.fit(train_suffixes, predictions)
  return estimator

In [0]:
# SLOW; run only if you want to build relative estimators.
relative_estimators = [None for _ in range(10)]
for i in range(10):
  relative_estimators[i] = get_relative_estimator(i)

### Examine clusters/invariants

In [0]:
def get_decision_path(estimator, inp):
  # Extract the decision path taken by an input as an ordered list of indices
  # of the neurons that were evaluated.
  # See: http://scikit-learn.org/stable/auto_examples/tree/plot_unveil_tree_structure.html
  n_nodes = estimator.tree_.node_count
  feature = estimator.tree_.feature

  # First let's retrieve the decision path of each sample. The decision_path
  # method allows to retrieve the node indicator functions. A non zero element of
  # indicator matrix at the position (i, j) indicates that the sample i goes
  # through the node j.
  X_test = [inp]
  node_indicator = estimator.decision_path(X_test)
  # Similarly, we can also have the leaves ids reached by each sample.
  leaf_id = estimator.apply(X_test)
  # Now, it's possible to get the tests that were used to predict a sample or
  # a group of samples. First, let's make it for the sample.
  node_index = node_indicator.indices[node_indicator.indptr[0]:
                                      node_indicator.indptr[1]]
  neuron_ids = []
  for node_id in node_index:
    if leaf_id[0] == node_id:
        continue
    neuron_ids.append(feature[node_id])
  return neuron_ids

def get_suffix_cluster(neuron_ids, neuron_sig):
  # Get the cluster of inputs that such that all inputs in the cluster
  # have provided on/off signature for the provided neurons.
  #
  # The returned cluster is an array of indices (into mnist.train.images).
  return np.where((train_suffixes[:, neuron_ids] == neuron_sig).all(axis=1))[0]

def is_consistent_cluster(cluster, predictions):
  # Check if all inputs within the cluster have the same prediction.
  # 'cluster' is an array of input ids.
  pred = predictions[cluster[0]]
  for i in cluster:
    if predictions[i] != pred:
      return False
  return True

def is_misclassified(i):
  return train_predictions[i] != np.argmax(y_train,axis=1)[i]

def visualize_conductances(img, label, neuron_ids, only_on=False):
  # Visualize the conductances for the provided image.
  # Args:
  # - img: the provided mnist image
  # - label: prediction label w.r.t. conductance must be computed
  # - neuron_ids: list of neurons indices from the suffix tensor for which
  #    conductances must be computed.
  # - only_on: If True then conductance is computed only for those neurons
  #    that are on for the given image. 
  vis = [mnist_to_pil_img(img)]
  suffix = fingerprint_suffix([img])
  for i, id in enumerate(neuron_ids):
    if only_on and suffix[i] != 1:
      continue  
    igc = conductance(img, label, neuron_id=id)
    # igc = conductances[id]
    vis.append(visualize_attrs2(255*mnist_to_rgb(img), mnist_to_rgb(igc)))
  return combine(vis)

def get_invariant(estimator, ref_id):
  # Returns an invariant found w.r.t. the provided reference input
  # Args
  #  - ref_id: Index (into mnist.train.images) of the reference input
  # Returns:
  #  - cluster: Indices of training inputs that satisfy the invariant
  #  - neuron_id: A list of neurons such that all inputs that agree with
  #    the reference input on the on/off status of these neurons have the
  #    same prediction as the reference input.
  ref_img = x_train[ref_id]
  ref_suffix = train_suffixes[ref_id]
  neuron_ids = get_decision_path(estimator, ref_suffix)
  neuron_sig = ref_suffix[neuron_ids]
  cluster = get_suffix_cluster(neuron_ids, neuron_sig)
  return cluster, neuron_ids, neuron_sig

def get_all_invariants(estimator):
  # Returns a dictionary mapping each decision tree prediction class
  # to a list of invariants. Each invariant is specified as a triple:
  # - neuron ids
  # - neuron signature (for the neuron ids)
  # - number of training samples that hit it
  # The neuron ids and neuron signature can be supplied to get_suffix_cluster
  # to obtain the cluster of training instances that hit the invariant.
  def is_leaf(node):
    return estimator.tree_.children_left[node] == estimator.tree_.children_right[node]

  def left_child(node):
    return estimator.tree_.children_left[node]

  def right_child(node):
    return estimator.tree_.children_right[node]
  
  def get_all_paths_rec(node):
    # Returns a list of triples corresponding to paths
    # in the decision tree. Each triple consists of
    # - neurons encountered along the path
    # - signature along the path
    # - prediction class at the leaf
    # - number of training samples that hit the path
    # The prediction class and number of training samples
    # are set to -1 when the leaf is "impure".
    feature = estimator.tree_.feature
    if is_leaf(node):
      values = estimator.tree_.value[node][0]
      if len(np.where(values != 0)[0]) == 1:
        cl = estimator.classes_[np.where(values != 0)[0][0]]
        nsamples = estimator.tree_.n_node_samples[node]
      else:
        # impure node
        cl = -1
        nsamples = -1
      return [[[], [], cl, nsamples]]
    # If it is not a leaf both left and right childs must exist
    paths = [[[feature[node]] + p[0], [0] + p[1], p[2], p[3]] for p in get_all_paths_rec(left_child(node))]
    paths += [[[feature[node]] + p[0], [1] + p[1], p[2], p[3]] for p in get_all_paths_rec(right_child(node))]
    return paths
  paths =  get_all_paths_rec(0)
  print "Obtained all paths"
  invariants = {}
  for p in tqdm(paths):
    neuron_ids, neuron_sig, cl, nsamples = p
    if cl not in invariants:
      invariants[cl] = []
    # cluster = get_suffix_cluster(neuron_ids, neuron_sig)
    invariants[cl].append([neuron_ids, neuron_sig, nsamples])
  for cl in invariants.keys():
    invariants[cl] = sorted(invariants[cl], key=operator.itemgetter(2), reverse=True)
  return invariants

def describe_cluster(cluster, neuron_ids):
  neuron_sig = train_suffixes[cluster[0]][neuron_ids]
  print "Num neurons in invariant", len(neuron_ids)
  print "Neuron id and signature", zip(neuron_ids, neuron_sig)
  print "Cluster size: ", len(cluster)
  print "Num misclassified", len([i for i in cluster if is_misclassified(i)])

def describe_all_invariants(all_invariants):
  df = []
  for cl, invs in all_invariants.iteritems():
    # Note the number of invariants, and size of the largest invariant cluster
    df.append([cl, sum([inv[2] for inv in invs]), len(invs), len([inv for inv in invs if inv[2]>=10]), invs[0][2]])
  df = pd.DataFrame(df, columns=['Prediction Class', 'Num Instances', 'Num Invariants', 'Num Invariants with cluster size >= 10', 'Size of largest invariant cluster'])
  return df

In [50]:
get_all_invariants(basic_estimator)
# Examine cluster/invariants containing a given reference input
# ref_id is the index of the reference input
ref_id =  1
print "### Reference Sentence ###"
#TODO Convert reference sentence back to words
describe_input(ref_id)
print "### Cluster ###"
cluster, neuron_ids, neuron_sig = get_invariant(fine_grained_estimator, ref_id)
describe_cluster(cluster, neuron_ids)

# Visualize  10 inputs in the cluster
for i in cluster[:10]:
  describe_input(i)
  

100%|██████████| 897/897 [00:00<00:00, 353432.66it/s]

Obtained all paths
### Reference Sentence ###
Input in words: at 90 minutes this movie is short , but it feels much longer
Groundtruth: 0
Prediction: 0
Fine-grained prediction 0
### Cluster ###
Num neurons in invariant 18
Neuron id and signature [(54, 0), (389, 0), (457, 1), (1462, 0), (901, 0), (4632, 0), (49, 0), (1709, 0), (527, 0), (2639, 0), (306, 0), (1620, 0), (4285, 0), (1476, 0), (3532, 0), (219, 0), (178, 1), (17, 0)]
Cluster size:  27
Num misclassified 0
Input in words: at 90 minutes this movie is short , but it feels much longer
Groundtruth: 0
Prediction: 0
Fine-grained prediction 0
Input in words: the premise for this kegger comedy probably sounded brilliant four six packs and a pitcher of margaritas in , but the film must have been written in the thrall of a vicious hangover
Groundtruth: 0
Prediction: 0
Fine-grained prediction 0
Input in words: a long winded , predictable scenario
Groundtruth: 0
Prediction: 0
Fine-grained prediction 0
Input in words: all in all , road to 




In [51]:
# Get all fine_grained_estimator invariants
fge_all_invariants = get_all_invariants(fine_grained_estimator)
# Print invariant stats
df = describe_all_invariants(fge_all_invariants)
print "Total num invariants:", df['Num Invariants'].sum()
print "Total num invariants with cluster size >= 10:", df['Num Invariants with cluster size >= 10'].sum()
print df.to_string(index=False)



100%|██████████| 901/901 [00:00<00:00, 218002.19it/s]

Obtained all paths
Total num invariants: 901
Total num invariants with cluster size >= 10: 190
Prediction Class  Num Instances  Num Invariants  Num Invariants with cluster size >= 10  Size of largest invariant cluster
               0           4780             449                                      95                                352
              11           4816             452                                      95                                399





### Analyzing clusters of misclassified inputs

In [53]:
# Examine the cluster for a misclasification (Groundtruth: 4, Prediction: 49)
invs = fge_all_invariants[00]
neuron_ids, neuron_sig, _ = invs[0]
cluster = get_suffix_cluster(neuron_ids, neuron_sig)
describe_cluster(cluster, neuron_ids)

# Visualize  10 inputs in the cluster
for i in cluster[:10]:
  describe_input(i)
  # show_img(visualize_conductances(mnist.train.images[i], train_predictions[i], neuron_ids, only_on=False))

Num neurons in invariant 17
Neuron id and signature [(54, 0), (389, 0), (457, 1), (1462, 0), (901, 0), (4632, 0), (49, 0), (1709, 0), (527, 0), (2639, 0), (306, 0), (1620, 0), (4285, 0), (1476, 0), (3532, 0), (219, 0), (178, 0)]
Cluster size:  352
Num misclassified 0
Input in words: a rote exercise in both animation and storytelling
Groundtruth: 0
Prediction: 0
Fine-grained prediction 0
Input in words: two hours of junk
Groundtruth: 0
Prediction: 0
Fine-grained prediction 0
Input in words: in execution , this clever idea is far less funny than the original , killers from space
Groundtruth: 0
Prediction: 0
Fine-grained prediction 0
Input in words: i have to admit that i am baffled by jason x
Groundtruth: 0
Prediction: 0
Fine-grained prediction 0
Input in words: this thing is just garbage
Groundtruth: 0
Prediction: 0
Fine-grained prediction 0
Input in words: the idea is more interesting than the screenplay , which lags badly in the middle and lurches between not very funny comedy , uncon

### Test Accuracy Improvements

In [0]:
# We use the fine_grained_estimator to check if an input belongs
# to a pure cluster (i.e., prediction id of the form 10*label + label).
# If so, we declare the network's prediction as a "condident prediction".
# We measure the accuracy of confident_predictions.
fine_grained_estimator_test_predictions = fine_grained_estimator.predict(test_suffixes)
fine_grained_estimator_leaf_nodes = fine_grained_estimator.apply(test_suffixes)

In [0]:
def test_accuracy_for_label(label):
  def get_confidence():
    is_confident = (fine_grained_estimator_test_predictions == 10*label + label)
    sufficient_samples = fine_grained_estimator.tree_.n_node_samples[fine_grained_estimator_leaf_nodes] >= 10
    is_confident *= sufficient_samples
    return is_confident
  # Following are boolean array. For e.g., with_label[i] is True if
  # image i has the given label
  with_label = (np.argmax(y_test, axis=1) == label)
  print with_label
  is_correct = (test_predictions == np.argmax(y_test, axis=1))
  with_label_and_correct = with_label*is_correct
  is_confident = get_confidence()
  with_label_and_correct_and_confident = with_label_and_correct*is_confident
  with_label_and_confident = with_label*is_confident

  total = np.sum(with_label)
  num_conf = np.sum(with_label_and_confident) 
  num_correct = np.sum(with_label_and_correct)
  num_correct_conf = np.sum(with_label_and_correct_and_confident)
  return total, num_conf, num_correct, num_correct_conf

In [68]:
df = []
grand_total = 0
grand_correct = 0
grand_conf = 0
grand_correct_conf = 0
for i in range(10):
  total, num_conf, num_correct, num_correct_conf = test_accuracy_for_label(i)
  grand_total += total
  grand_conf += num_conf
  grand_correct += num_correct
  grand_correct_conf += num_correct_conf
  acc = 1.0*num_correct/total
  conf_acc = 1.0*num_correct_conf/num_conf
  df.append([i, total, num_conf, acc, conf_acc])
df = pd.DataFrame(df, columns=['Label', 'Instances', 'ConfidentInstances',  'Acc', 'ConfidentAcc',])
display(df)
print "Total Instances", grand_total
print "Num Confident Instances", grand_conf
print "Orig Accuracy", 1.0*grand_correct/grand_total
print "Confident Accuracy", 1.0*grand_correct_conf/grand_conf

[ True False  True ... False  True False]
[False  True False ...  True False  True]
[False False False ... False False False]


ZeroDivisionError: ignored

### Visualizing the Decision Tree

In [69]:
!apt-get install graphviz
!pip install graphviz
import graphviz

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following additional packages will be installed:
  fontconfig libann0 libcairo2 libcdt5 libcgraph6 libdatrie1 libgd3
  libgts-0.7-5 libgts-bin libgvc6 libgvpr2 libjbig0 liblab-gamut1 libltdl7
  libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 libpathplan4
  libpixman-1-0 libthai-data libthai0 libtiff5 libwebp6 libxaw7 libxcb-render0
  libxcb-shm0 libxmu6 libxpm4 libxt6
Suggested packages:
  gsfonts graphviz-doc libgd-tools
The following NEW packages will be installed:
  fontconfig graphviz libann0 libcairo2 libcdt5 libcgraph6 libdatrie1 libgd3
  libgts-0.7-5 libgts-bin libgvc6 libgvpr2 libjbig0 liblab-gamut1 libltdl7
  libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 libpathplan4
  libpixman-1-0 libthai-data libthai0 libtiff5 libwebp6 libxaw7 libxcb-render0
  libxcb-shm0 libxmu6 libxpm4 libxt6
0 upgraded, 30 newly installed, 0 to remove and 2 not upgraded.
Need to get 4,154 kB of

In [0]:
dot_data = tree.export_graphviz(basic_estimator, out_file=None) 
graph = graphviz.Source(dot_data)  
graph

In [0]:
dot_data = tree.export_graphviz(fine_grained_estimator, out_file=None) 
graph = graphviz.Source(dot_data)  
graph