# Experimenting with Deep Scite

Recall the model, for reference.

![](deep-scite-model-1.png)

In [1]:
import os
import tensorflow as tf
from deepscite import model
from deepscite import utils
from deepscite import train
import ruamel.yaml
from IPython.core.display import display, HTML

In [2]:
base_dir = "../"
data_dir = os.path.join(base_dir, "data/noon/")

Let's define the parameters we want to use during training/inference.

In [3]:
# Update the `conf` global state that we use in various places in the model.
conf = tf.app.flags.FLAGS

conf.embedded_word_size  = 250
conf.word_vector_size    = 500
conf.conv_size           = 3
conf.conv_stride         = 1
conf.conv_features       = 1
conf.iterations          = 100
conf.learning_rate       = 1e-3
conf.weights_reg_scale   = 1e-6
conf.activity_reg_scale  = 1e-6
conf.embedding_reg_scale = 1e-6
conf.save_path           = os.path.join(base_dir, "./checkpoints/noon")
conf.log_path            = "/tmp/tf-checkpoints/deepscite-noon"
conf.data_dir            = data_dir

checkpoint_path    = os.path.join(base_dir, "checkpoints/noon/")

## Training Step

In [4]:
conf.minibatch_size = 500
tf.reset_default_graph()
train.main(_)

Initialising new model...
Instructions for updating:
Use `tf.global_variables_initializer` instead.
Iteration #0, Loss: 0.6931469440460205, α: 0.5.
Checkpointed: /tmp/tf-checkpoints/deepscite-noon/def/checkpoint-0.
Iteration #0, Validation-set accuracy: 0.4659999907016754.
Iteration #10, Loss: 2.2052810192108154, α: 0.500728964805603.
Checkpointed: /tmp/tf-checkpoints/deepscite-noon/def/checkpoint-10.
Iteration #10, Validation-set accuracy: 0.7639999985694885.
Iteration #20, Loss: 1.027888536453247, α: 0.5028527975082397.
Checkpointed: /tmp/tf-checkpoints/deepscite-noon/def/checkpoint-20.
Iteration #20, Validation-set accuracy: 0.8180000185966492.
Iteration #30, Loss: 0.7648000121116638, α: 0.505841851234436.
Checkpointed: /tmp/tf-checkpoints/deepscite-noon/def/checkpoint-30.
Iteration #30, Validation-set accuracy: 0.8240000009536743.
Iteration #40, Loss: 0.6196095943450928, α: 0.5092655420303345.
Checkpointed: /tmp/tf-checkpoints/deepscite-noon/def/checkpoint-40.
Iteration #40, Valida

Let's feed in a single paper (title, abstract) into DeepScite and see what it thinks.

## Inference step

We need to convert the text into the format needed by the model. Each word is mapped to the index of the vector in the word embedding matrix (i.e. it's index in the `vocab.txt` file.)

![](deep-scite-model-with-vectors.png)

In [5]:
vocab_list = utils.load_vocabulary(data_dir)
vocab_dict = {}
for k, w in enumerate(vocab_list):
    vocab_dict[w] = k

In [6]:
def get_wordids_for(s):
    r = [vocab_dict[w] for w in utils.to_words(s) if w in vocab_dict ]
    if r == []:
        raise Exception("Found no words at all!")
    return " ".join(map(str, r))

def words_to_html(words, activations, threshold=5):
    good_words = []
    bad_words  = []

    elts = []

    for k, w in enumerate(words):
        activation = round(float(activations[k]), 2)

        style = ""
        if activation > threshold:
            good_words.append(w)
            style = "color: blue !important;"

        if activation < -threshold:
            bad_words.append(w)
            style = "color: red !important;"

        elts.append("<span style='{}' title='({},{})'>{}</span>".format(
                style,
                activation,
                round(float(activations[k]), 2), w))
    
    return " ".join(elts)

## Load the model and emit a prediction

In [10]:
def infer(title, abstract):
    inputs = [ {"id": "", 
                "wordset_1_ids": get_wordids_for(title), 
                "wordset_2_ids": get_wordids_for(abstract) } ]

    m = model.JointEmbeddingModelForBinaryClassification(conf.embedded_word_size)

    # TensorFlow is uses a lot of global state. As a result, if we 
    # wish to re-run this cell many times, we need to have this
    # statement here to ensure nothing is kept over.
    tf.reset_default_graph()

    # We're only inputting one piece of data - a single paper.
    conf.minibatch_size = 1

    with tf.Session() as sess:

        model_params = m.graph(
            conf.minibatch_size,
            len(vocab_list),
            conf.word_vector_size,
            conf.conv_size,
            conf.conv_stride,
            conf.conv_features
        )

        # Load the trained weights
        saver = tf.train.Saver()
        checkpoint = tf.train.latest_checkpoint(checkpoint_path)

        if not checkpoint:
            raise Exception("Couldn't find checkpoint at: {}".format(checkpoint_path))

        saver.restore(sess, checkpoint)

        X1, X2, _, M1, M2, S1, S2, subset = train.get_datapoints(inputs)
        data = {model_params.wordset_1: X1,
                model_params.wordset_2: X2,
                model_params.wordset_1_masks: M1,
                model_params.wordset_2_masks: M2,
                model_params.wordset_1_lengths: S1,
                model_params.wordset_2_lengths: S2}


        # Calculate the recommendations
        set1_activations, set2_activations, final_probs, alpha = sess.run([
            tf.squeeze(model_params.conv_wordset_1_activity, [2,3]),
            tf.squeeze(model_params.conv_wordset_2_activity, [2,3]),
            model_params.final_probs,
            model_params.alpha], 
            feed_dict=data)
    
    return set1_activations[0], set2_activations[0], final_probs[0], alpha

## With what probability would Noon *scite* this paper?

Enter candidate tiles and abstracts below. You can find inspiration over at [SciRate](https://scirate.com).

In [37]:
# Very good
title = r"""
Universal Quantum Hamiltonians
"""

abstract = r"""
Quantum many-body systems exhibit a bewilderingly
diverse range of behaviours. Here, we prove that
all the physics of every other quantum many-body
system is replicated in certain simple, "universal"
quantum spin-lattice models. We first characterise 
precisely and in full generality what it means for 
one quantum many-body system to replicate the entire
physics of another. We then fully classify two-qubit
interactions, determining which are universal in this
very strong sense and showing that certain simple 
spin-lattice models are already universal. Examples
include the Heisenberg and XY models on a 2D square 
lattice (with non-uniform coupling strengths). This
shows that locality, symmetry, and spatial dimension 
need not constrain the physics of quantum many-body systems.
Our results put the practical field of analogue Hamiltonian
simulation on a rigorous footing and show that far simpler
systems than previously thought may be viable simulators.
We also take a first step towards justifying why
error correction may not be required for this application
of quantum information technology.
"""

In [38]:
set1_activations, set2_activations, final_probs, alpha = infer(title, abstract)
print("Scite Probability: {0:2.2f}%".format(final_probs*100))

Scite Probability: 99.80%


## Why?

In [39]:
import numpy as np

nonzero   = [abs(x) for x in set1_activations if abs(x) > 0]
threshold = np.mean(nonzero) / 2 

title_words    = utils.to_words(title)
abstract_words = utils.to_words(abstract)

display(HTML(words_to_html(title_words,    set1_activations, threshold)))
display(HTML(words_to_html(abstract_words, set2_activations, threshold)))

## Weighting parameter

$$
    p = \alpha * \text{titles} + (1-\alpha) * \text{abstracts}
$$

In [40]:
alpha

0.52650487