In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv("processed_dataset.csv")

text = list(df['text'])
headlines = list(df['headline'])
temp = []
temp.extend(text)
temp.extend(headlines)

In [2]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np

SENLEN = 400
HEADLINES_SENLEN = 20
MAXWORDS = 20000

HEADLINES_MAXWORDS = 3000

text_tokenizer = Tokenizer(num_words = MAXWORDS)
text_tokenizer.fit_on_texts(text)

headline_tokenizer = Tokenizer(num_words = HEADLINES_MAXWORDS)
headline_tokenizer.fit_on_texts(headlines)

def preprocess_sequences(text, seq_type):
    
    if(seq_type == "text"):
        return pad_sequences(text_tokenizer.texts_to_sequences(text), maxlen = SENLEN, padding='pre')        
    elif(seq_type == "headline"):
        return pad_sequences(headline_tokenizer.texts_to_sequences(text), maxlen = HEADLINES_SENLEN, padding='pre')


In [3]:
text_sequences = preprocess_sequences(text, "text")
headline_sequences = preprocess_sequences(headlines, "headline")

In [4]:
embedding_dict = {}

with open("../glove/archive/glove.6B.100d.txt", "r") as f:
    for line in f:
        values = line.split()
        word = values[0]
        vectors = np.asarray(values[1:],"float32")
        embedding_dict[word] = vectors
f.close()

embedding_dim = 100
num_words = MAXWORDS + 1
embedding_matrix = np.zeros((num_words, embedding_dim))

for word, i in text_tokenizer.word_index.items():
    if i < num_words:
        embedding_vector = embedding_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector
            
num_words_headlines = MAXWORDS + 1
embedding_matrix_headlines = np.zeros((num_words_headlines, embedding_dim))

for word, i in headline_tokenizer.word_index.items():
    if i < num_words_headlines:
        embedding_vector = embedding_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix_headlines[i] = embedding_vector

In [5]:
import tensorflow as tf

latent_dim = 100

input_layer = tf.keras.layers.Input(shape = (None,), name="input_layer")
embedding_layer = tf.keras.layers.Embedding(num_words, embedding_dim, weights = [embedding_matrix], input_length = SENLEN,trainable = False, name = "Embedding_layer_enc")(input_layer)

#Encoder

#encoder lstm 1
encoder_lstm1 = tf.keras.layers.LSTM(latent_dim,return_sequences=True,return_state=True,dropout=0.2,recurrent_dropout=0.2)
encoder_output1, state_h1, state_c1 = encoder_lstm1(embedding_layer)

#encoder lstm 2
encoder_lstm2 = tf.keras.layers.LSTM(latent_dim,return_sequences=True,return_state=True,dropout=0.2,recurrent_dropout=0.2)
encoder_output2, state_h, state_c = encoder_lstm2(encoder_output1)

decoder_inputs = tf.keras.layers.Input(shape=(None,))

#embedding layer
dec_emb_layer = tf.keras.layers.Embedding(num_words_headlines,
                                          embedding_dim, 
                                          weights = [embedding_matrix_headlines], 
                                          input_length = HEADLINES_SENLEN,
                                          trainable = False, 
                                          name = "Embedding_layer_dec"
                                         )

dec_emb = dec_emb_layer(decoder_inputs)

decoder_lstm = tf.keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True,dropout=0.2,recurrent_dropout=0.2)
decoder_outputs,decoder_fwd_state, decoder_back_state = decoder_lstm(dec_emb,initial_state=[state_h, state_c])

#dense layer
decoder_dense =  tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(HEADLINES_SENLEN, activation='relu'))
decoder_outputs = decoder_dense(decoder_outputs)

model = tf.keras.models.Model([input_layer, decoder_inputs], decoder_outputs)
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_layer (InputLayer)       [(None, None)]       0           []                               
                                                                                                  
 Embedding_layer_enc (Embedding  (None, None, 100)   2000100     ['input_layer[0][0]']            
 )                                                                                                
                                                                                                  
 input_1 (InputLayer)           [(None, None)]       0           []                               
                                                                                                  
 lstm (LSTM)                    [(None, None, 100),  80400       ['Embedding_layer_enc[0][0]']

In [6]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy')
es = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', verbose=1,patience=2)

In [7]:
history = model.fit([text_sequences, headline_sequences], headline_sequences,epochs = 50, callbacks=[es], batch_size=32,verbose = 1)

Epoch 1/50


InvalidArgumentError: Graph execution error:

Detected at node 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits' defined at (most recent call last):
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/ipykernel_launcher.py", line 16, in <module>
      app.launch_new_instance()
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/traitlets/config/application.py", line 664, in launch_instance
      app.start()
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 612, in start
      self.io_loop.start()
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 149, in start
      self.asyncio_loop.run_forever()
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/ioloop.py", line 690, in <lambda>
      lambda f: self._run_callback(functools.partial(callback, future))
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/ioloop.py", line 743, in _run_callback
      ret = callback()
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/gen.py", line 787, in inner
      self.run()
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/gen.py", line 748, in run
      yielded = self.gen.send(value)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 381, in dispatch_queue
      yield self.process_one()
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/gen.py", line 225, in wrapper
      runner = Runner(result, future, yielded)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/gen.py", line 714, in __init__
      self.run()
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/gen.py", line 748, in run
      yielded = self.gen.send(value)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 365, in process_one
      yield gen.maybe_future(dispatch(*args))
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/gen.py", line 209, in wrapper
      yielded = next(result)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 268, in dispatch_shell
      yield gen.maybe_future(handler(stream, idents, msg))
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/gen.py", line 209, in wrapper
      yielded = next(result)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 543, in execute_request
      self.do_execute(
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/tornado/gen.py", line 209, in wrapper
      yielded = next(result)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 306, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 536, in run_cell
      return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2866, in run_cell
      result = self._run_cell(
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2895, in _run_cell
      return runner(coro)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 68, in _pseudo_sync_runner
      coro.send(None)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3071, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3263, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3343, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "<ipython-input-7-8796c7512865>", line 1, in <module>
      history = model.fit([text_sequences, headline_sequences], headline_sequences,epochs = 50, callbacks=[es], batch_size=32,verbose = 1)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 860, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/engine/training.py", line 918, in compute_loss
      return self.compiled_loss(
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/engine/compile_utils.py", line 201, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 141, in __call__
      losses = call_fn(y_true, y_pred)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 245, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/losses.py", line 1862, in sparse_categorical_crossentropy
      return backend.sparse_categorical_crossentropy(
    File "/Users/sampathroutu/opt/anaconda3/lib/python3.8/site-packages/keras/backend.py", line 5202, in sparse_categorical_crossentropy
      res = tf.nn.sparse_softmax_cross_entropy_with_logits(
Node: 'sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits'
Received a label value of 2911 which is outside the valid range of [0, 20).  Label values: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 977 797 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 388 1366 137 232 57 29 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 904 835 2 927 309 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 354 5 241 597 2458 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1842 749 1843 4 134 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1275 1 2365 282 586 1293 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 332 1314 819 373 1299 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 176 891 505 1655 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1659 675 1660 530 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1806 1807 1082 7 10 1808 0 0 0 0 0 0 0 0 0 0 0 0 0 0 219 11 17 24 18 16 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 68 1 235 521 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 262 2 38 170 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 256 345 303 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 94 369 1594 176 2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 581 585 2175 1216 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 46 8 341 1901 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 520 278 1 50 429 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 490 3 184 198 1645 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 390 1370 19 1371 1372 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 1513 1320 968 115 1514 0 0 0 0 0 0 0 0 0 0 0 0 0 0 910 113 2911 33 2 919 0 0 0 0 0 0 0 0 0 0 0 0 0 0 639 140 391 24 242 77 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 517 342 972 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 367 237 2372 3 2373 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2814 2815 930 1344 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 9 263 1516 1 673 0 0 0 0 0 0 0 0 0 0 0 0 0 2475 40 15 4 2476 102 494 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 2500 63 2501 2502 0 0 0 0 0 0 0 0 0 0 0 0 0 1 177 4 14 18 16 1350 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 144 166 279 74 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 230 794 1397 2770
	 [[{{node sparse_categorical_crossentropy/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits}}]] [Op:__inference_train_function_10547]

(2225, 400)