# Model Architecture

In [4]:
from keras.layers import (Input,
Embedding, BatchNormalization, GRU, Dense,
merge, TimeDistributed)
from keras.models import Model
from keras import optimizers

In [5]:
LATENT_DIM = 512

BODY_LENGTH = 1000
TITLE_LENGTH = 15

NUM_ENCODER_TOKENS = 8000
NUM_DECODER_TOKENS = 5000

bidirectional = False
attention = False

#### Encoder ####
encoder_inputs = Input(shape=(BODY_LENGTH,), name='Encoder-Input')

x = Embedding(NUM_ENCODER_TOKENS, LATENT_DIM, mask_zero=False, name='Body-Word-Embedding')(encoder_inputs)
x = BatchNormalization(name='Encoder-BatchNorm')(x)

_, state_h = GRU(LATENT_DIM, return_state=True, name='Encoder-Last-GRU')(x)
    
encoder_model = Model(encoder_inputs, state_h, name='Encoder-Model')

encoder_out = encoder_model(encoder_inputs)

#### Decoder ####
decoder_inputs = Input(shape=(None,), name='Decoder-Input')

x = Embedding(NUM_DECODER_TOKENS, LATENT_DIM, mask_zero=False, name='Title-Word-Embedding')(decoder_inputs)
x = BatchNormalization(name='Decoder-BatchNorm-1')(x)

decoder_gru = GRU(LATENT_DIM, return_state=True, return_sequences=True, name='Decoder-GRU')

decoder_gru_out, _ = decoder_gru(x, initial_state=encoder_out)

x = TimeDistributed(BatchNormalization(), name='Decoder-BatchNorm-2')(decoder_gru_out)

decoder_out = Dense(NUM_DECODER_TOKENS, activation='softmax', name='Final-Output-Dense')(x)

#### Seq2Seq Model ####
seq2seq_Model = Model([encoder_inputs, decoder_inputs], decoder_out, name='Seq2Seq-Model')

seq2seq_Model.compile(optimizer=optimizers.Nadam(lr=1e-3), loss='sparse_categorical_crossentropy')

seq2seq_Model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Decoder-Input (InputLayer)      (None, None)         0                                            
__________________________________________________________________________________________________
Title-Word-Embedding (Embedding (None, None, 512)    2560000     Decoder-Input[0][0]              
__________________________________________________________________________________________________
Encoder-Input (InputLayer)      (None, 1000)         0                                            
__________________________________________________________________________________________________
Decoder-BatchNorm-1 (BatchNorma (None, None, 512)    2048        Title-Word-Embedding[0][0]       
__________________________________________________________________________________________________
Encoder-Mo

In [6]:
from keras.utils import plot_model
plot_model(seq2seq_Model, to_file='model.png')

# Train Model

In [7]:
import pandas as pd

df = pd.read_csv('articles1.csv').sample(n=10)
SAMPLE_BODIES = df.content.tolist()
SAMPLE_TITLES = df.title.tolist()

In [8]:
SAMPLE_TITLES

['WATCH: Milo Yiannopoulos Challenges Mark Zuckerberg To A Debate',
 'Review: ‘Nicotine,’ the Stuff of Burning Desire - The New York Times',
 'Arkansas sheriff’s deputy killed suspect in custody',
 'Evictions by Armed Men Rattle a Mexican Tourist Paradise - The New York Times',
 'Supreme Court Upholds Affirmative Action Program at University of Texas - The New York Times',
 'Spirit cancels flight, passenger brawl breaks out ',
 'Retesting of Doping Samples Could Bar Dozens From Rio Olympics - The New York Times',
 'US drone strike kills French ISIS operative',
 '3.1 million acres burned in Alaska in 2015',
 'Florida Man Strangled Wife, Then Impersonated Her on Facebook, Police Say - The New York Times']

In [9]:
from IPython.display import Audio, display
from Helpers import load_tokenizer
from keras.callbacks import Callback

tk_body = load_tokenizer('tk_body.dpkl')
tk_title = load_tokenizer('tk_title.dpkl')

Size of vocabulary for tk_body.dpkl: 8000
Size of vocabulary for tk_title.dpkl: 5000


In [10]:
from Helpers import load_encoder_inputs, load_decoder_inputs

encoder_input_data, _ = load_encoder_inputs('train_body_vecs.npy')
decoder_input_data, decoder_target_data = load_decoder_inputs('train_title_vecs.npy')

Shape of encoder input: (40000, 1000)
Shape of decoder input: (40000, 14)
Shape of decoder target: (40000, 14)


In [17]:
from predict import Predictor

class LossHistory(Callback):
    def on_epoch_end(self, epoch, logs):
        display(Audio(filename='notification.mp3', autoplay=True))
        print('\n')
        print('epoch: {}'.format(epoch))
        print('val_loss: {}'.format(logs.get('val_loss')))
        
        predictor = Predictor(seq2seq_Model, tk_body, tk_title, BODY_LENGTH, 12)
        
        for body in SAMPLE_BODIES:
            title = predictor.create_title(body)
            print('Generated title: {}'.format(title))

In [18]:
# Testing out class LossHistory
predictor = Predictor(seq2seq_Model, tk_body, tk_title, BODY_LENGTH, 12)
print(predictor.create_title(SAMPLE_BODIES[0]))
print(predictor.create_title(SAMPLE_BODIES[1]))
print(predictor.create_title(SAMPLE_BODIES[2]))

milo yiannopoulos leads with facebook to begin despite twitter breitbart
review ’ the of burning the new york times
arkansas deputy killed suspect in custody


In [19]:
import numpy as np
from keras.callbacks import ModelCheckpoint

batch_size = 256
epochs = 10
history = LossHistory()
checkpointer = ModelCheckpoint(filepath='/tmp/articles1.{epoch:02d}-{val_loss:.2f}.h5',
                               verbose=1)

seq2seq_Model.fit([encoder_input_data, decoder_input_data], np.expand_dims(decoder_target_data, -1),
                   batch_size=batch_size,
                   epochs=epochs,
                   validation_split=0.10, 
                   callbacks=[history, checkpointer])

Train on 36000 samples, validate on 4000 samples
Epoch 1/10



epoch: 8
val_loss: 4.931356330871582
Generated title: why left twitter and the left doesn’t like social media breitbart
Generated title: review ’ the of burning the new york times
Generated title: arkansas deputy killed suspect in custody
Generated title: what you need to know about the day of the new york
Generated title: supreme court upholds action program at university of texas the new
Generated title: stunning national anthem being dragged into plane
Generated title: russia’s track is field team banned from rio games the new york
Generated title: us us drone strike kills french president
Generated title: 3 1 million burned in alaska in 2015
Generated title: florida man wife then her on facebook in killing of 2 face
Epoch 00001: saving model to /tmp/articles1.01-4.93.h5


  str(node.arguments) + '. They will not be included '


Epoch 2/10



epoch: 9
val_loss: 5.147194458007813
Generated title: and social media conservative review its milo as forced to fight
Generated title: review ’ the of burning the new york times
Generated title: arkansas deputy killed suspect in custody
Generated title: ‘i you mean it’s like for the new york times
Generated title: supreme court upholds action program at university of texas the new
Generated title: what to learn from being sued over for retirement
Generated title: most athletes study world is the new york times
Generated title: us drone strike kills french isis
Generated title: 3 1 million burned in alaska in 2015
Generated title: florida man wife then her on facebook police say the new york
Epoch 00002: saving model to /tmp/articles1.02-5.15.h5
Epoch 3/10



epoch: 10
val_loss: 5.278330558776855
Generated title: how social media conservative experts free speech online breitbart
Generated title: review ’ the of burning the new york times
Generated title: arkansas deputy killed suspect in custody in
Generated title: ‘i you think that have or mean it’s the new york times
Generated title: supreme court upholds action program at university of texas the new york
Generated title: stunning of being dragged into plane
Generated title: rio olympics already has a long nightmare breitbart
Generated title: us drone strike kills french isis
Generated title: 3 1 million burned in alaska in 2015
Generated title: florida man wife then her on facebook police say the new york
Epoch 00003: saving model to /tmp/articles1.03-5.28.h5
Epoch 4/10



epoch: 11
val_loss: 5.3918232955932615
Generated title: milo yiannopoulos and twitter is on the stock despite campaign
Generated title: review ’ the of burning the new york times
Generated title: arkansas deputy killed suspect
Generated title: what you need to know about the of what’s the new york
Generated title: supreme court upholds action program at university of texas the new york
Generated title: what to boston marathon bombing victims
Generated title: rio olympics already in europe is a too the new york times
Generated title: us drone strike kills french isis
Generated title: 1 1 3 million burned in alaska in 2015
Generated title: florida man wife then her on police shootings in orlando shooting the
Epoch 00004: saving model to /tmp/articles1.04-5.39.h5
Epoch 5/10

KeyboardInterrupt: 

# See Results

In [43]:
import os
from keras.models import load_model
import pandas as pd

df = pd.read_csv('articles1.csv').sample(n=10)
SAMPLE_BODIES = df.content.tolist()
SAMPLE_TITLES = df.title.tolist()


records = []

models_dir = os.path.join(os.getcwd(), 'model_1', 'h5')
for modelFN in os.listdir(models_dir):
    print('\n%s:\n' % modelFN)
    d = {modelFN: []}
    p = Predictor(load_model(os.path.join(models_dir, modelFN)), tk_body, tk_title, BODY_LENGTH, TITLE_LENGTH)
    for body in SAMPLE_BODIES:
        t = p.create_title(body)
        print(t)
        d[modelFN].append(t)
    records.append(d)


articles1.07-4.66.h5:

muslim video girls who don’t the lgbt rights ’ breitbart
penn how we out
watch israeli envoy campaign to being first jewish groups in u s
marine times urges veterans to support more gun control breitbart
exclusive sarah palin to take on twitter breitbart
44 4 to 40 deaths linked gun violence breitbart
9 11 victims law saudis u s alliance against terrorism the new york
video mexican cartel forced to wear a drug cartels
lebron james the drama over whether the cavaliers would match a fan 38
a 8 billion drug users that called the drug overdose

articles1.06-4.29.h5:

muslim video red carpet the memorial to honor justice scalia breitbart
penn how we out letterman
watch israeli arab support trump at netanyahu boycott israel is
marine times urges veterans to support gun control breitbart
breitbart news sunday on the of breitbart
four more gun control after shooting ar 15 demands following veto breitbart
by saudi arabia to 9 11 document leaves many questions the new yor

In [44]:
for t in SAMPLE_TITLES:
    print(t)

Muslim Video: Girls Who Don’t ‘Wear the Hijab’ Are ‘Closer to Satan’
Penn Jillette: How we freaked out Letterman
Jewish Group Praises Trump Administration For ’Strong Stand’ Against Iran
Marine Corps Times Urges Veterans To Support More Gun Control - Breitbart
Breitbart News Announces Addition of Rebecca Mansour
Number of Gun Manufacturers Up ’Nearly 250 Percent’ Under Obama
Angered by 9/11 Victims Law, Saudis Rethink U.S. Alliance - The New York Times
GRAPHIC: Mexican Authorities Use Trash Bags to Interrogate Suspected Female Cartel Member
LeBron James hilariously spoiled the drama over whether the Cavaliers would match a fan favorite’s $38 million contract offer
There’s something odd about the way insulin prices change
