In [None]:

import tensorflow as tf
import numpy as np
import sys
import feedparser
import time
from datetime import datetime, timedelta
import requests
import base64
from ttp import ttp

sys.path.append('/content/grover')
from lm.modeling import GroverConfig, sample
from sample.encoder import get_encoder, _tokenize_article_pieces, extract_generated_target
import random

def generate_article_attribute(sess, encoder, tokens, probs, article, target='article'):

    """
    Given attributes about an article (title, author, etc), use that context to generate
    a replacement for one of those attributes using the Grover model.
    This function is based on the Grover examples distributed with the Grover code.
    """

    # Tokenize the raw article text
    article_pieces = _tokenize_article_pieces(encoder, article)
    print (article_pieces)

    # Grab the article elements the model careas about - domain, date, title, etc.
    context_formatted = []
    for key in ['domain', 'date', 'authors', 'title', 'article']:
        if key != target:
            context_formatted.extend(article_pieces.pop(key, []))

    # Start formatting the tokens in the way the model expects them, starting with
    # which article attribute we want to generate.
    context_formatted.append(encoder.__dict__['begin_{}'.format(target)])
    # Tell the model which special tokens (such as the end token) aren't part of the text
    ignore_ids_np = np.array(encoder.special_tokens_onehot)
    ignore_ids_np[encoder.__dict__['end_{}'.format(target)]] = 0

    # We are only going to generate one article attribute with a fixed
    # top_ps cut-off of 95%. This simple example isn't processing in batches.
    gens = []
    article['top_ps'] = [0.95]

    # Run the input through the TensorFlow model and grab the generated output
    tokens_out, probs_out = sess.run(
        [tokens, probs],
        feed_dict={
            # Pass real values for the inputs that the
            # model needs to be able to run.
            initial_context: [context_formatted],
            eos_token: encoder.__dict__['end_{}'.format(target)],
            ignore_ids: ignore_ids_np,
            p_for_topp: np.array([0.95]),
        }
    )

    # The model is done! Grab the results it generated and format the results into normal text.
    for t_i, p_i in zip(tokens_out, probs_out):
        extraction = extract_generated_target(output_tokens=t_i, encoder=encoder, target=target)
        gens.append(extraction['extraction'])

    # Return the generated text.
    return gens[-1]

In [None]:
article = {
    'summary': 'a velvet wrapped blade',
    'title': "the mailbox was full",
    'text': '',
    'authors': [],
    'publish_date': '04-04-2012',
    'iso_date': datetime.now().isoformat(),
    'domain': 'nosleep',
    'image_url': None,
    'tags': None,
        }

In [None]:
%cd /content/

# Load the pre-trained "huge" Grover model with 1.5 billion params
model_config_fn = '/content/grover/lm/configs/base.json'
model_ckpt = 'gs://misrak_capstone/LayerUpdate/model.ckpt-850000'#'/content/model.ckpt-850000'
encoder = get_encoder()
news_config = GroverConfig.from_json_file(model_config_fn)

# Set up TensorFlow session to make predictions
tf_config = tf.ConfigProto(allow_soft_placement=True)

with tf.Session(config=tf_config, graph=tf.Graph()) as sess:
    # Create the placehodler TensorFlow input variables needed to feed data to Grover model
    # to make new predictions.
    initial_context = tf.placeholder(tf.int32, [1, None])
    p_for_topp = tf.placeholder(tf.float32, [1])
    eos_token = tf.placeholder(tf.int32, [])
    ignore_ids = tf.placeholder(tf.bool, [news_config.vocab_size])

    

    # Load the model config to get it set up to match the pre-trained model weights
    tokens, probs = sample(
        news_config=news_config,
        initial_context=initial_context,
        eos_token=eos_token,
        ignore_ids=ignore_ids,
        p_for_topp=p_for_topp,
        do_topk=False
    )

    # Restore the pre-trained Grover model weights
    saver = tf.train.Saver()
    saver.restore(sess, model_ckpt)
    

    print(f"Building article from headline, summary and text'")
    article['title'] = generate_article_attribute(sess, encoder, tokens, probs, article, target="title")
    article['text'] = generate_article_attribute(sess, encoder, tokens, probs, article, target="article")
    #article['summary'] = generate_article_attribute(sess, encoder, tokens, probs, article, target="summary")

/content
Tensor("Placeholder:0", shape=(1, ?), dtype=int32)
Tensor("Placeholder_1:0", shape=(1,), dtype=float32)
Tensor("Placeholder_2:0", shape=(), dtype=int32)
Tensor("Placeholder_3:0", shape=(50270,), dtype=bool)
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from gs://misrak_capstone/LayerUpdate/model.ckpt-850000
Building article from headline, summary and text'
{'article': [50265, 50266], 'domain': [50257, 39370, 8893, 50258], 'title': [50263, 1170, 37283, 374, 1337, 50264], 'date': [50259, 16785, 8703, 12, 2322, 50260]}
{'article': [50265, 50266], 'domain': [50257, 39370, 8893, 50258], 'title': [50263, 28255, 5784, 31, 4519, 17690, 14952, 31, 1401, 735, 1725, 531, 319, 263, 6773, 14, 785, 2751, 32725, 2546, 510, 14, 199, 8, 6192, 26, 310, 7903, 12, 384, 2298, 17690, 9, 417, 2009, 12538, 287, 263, 310, 829, 417, 7624, 12538, 573, 465, 5399, 12764, 12, 384, 2298, 17690, 562, 589, 512, 8459, 656, 285, 1561,

In [None]:
article['title']

'Hell Hell? Blue Sox Superman? No two means one is the cycle. – By hobbit K.\n(Photo: TCS, The Red Sox) by video courtesy of the T), by Video courtesy offThe Canadian highway, The Red Sox would like their luck just to tell you any threat to the north, in the next eight weeks. Town – any? Hyun, road? Epic traffic, storm? I-kangazon, snow? Why not we tell the community something I-T-S, I-T-I?? It’s the logic of my column.\nChris Greitnger headed to Times Tier Tier Town, the Port Galle Power Project. T’s up in a rubber-striped Blue-Knebthsome whistle activated, of course, to announce exactly how was going to be played, and that would mean a nearly two hour long presentation of the latest information, emphasizing not only the problems we talked about just then.\n• T’s manager – I Trust c and said that the river was going to become an amazing beach of its own. This given to it. The action, the public to perceive the river would be more exciting than the pen ever had, and that was C (sic) th

In [None]:

article['text']

'What it means as it is one'

In [None]:
article['summary']