In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv("processed_dataset.csv")

text = list(df['text'])
headlines = list(df['headline'])
temp = []
temp.extend(text)
temp.extend(headlines)

In [2]:
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
import numpy as np

SENLEN = 400
HEADLINES_SENLEN = 20
MAXWORDS = 20000

text_tokenizer = Tokenizer(num_words = MAXWORDS)
text_tokenizer.fit_on_texts(text)

headline_tokenizer = Tokenizer(num_words = MAXWORDS)
headline_tokenizer.fit_on_texts(headlines)

def preprocess_sequences(text, seq_type):
    
    if(seq_type == "text"):
        return pad_sequences(text_tokenizer.texts_to_sequences(text), maxlen = SENLEN, padding='pre')        
    elif(seq_type == "headline"):
        return pad_sequences(headline_tokenizer.texts_to_sequences(text), maxlen = HEADLINES_SENLEN, padding='pre')


In [3]:
text_sequences = preprocess_sequences(text, "text")
headline_sequences = preprocess_sequences(headlines, "headline")

In [4]:
embedding_dict = {}

with open("../glove/archive/glove.6B.100d.txt", "r") as f:
    for line in f:
        values = line.split()
        word = values[0]
        vectors = np.asarray(values[1:],"float32")
        embedding_dict[word] = vectors
f.close()

embedding_dim = 100
num_words = MAXWORDS + 1
embedding_matrix = np.zeros((MAXWORDS, embedding_dim))

for word, i in text_tokenizer.word_index.items():
    if i < MAXWORDS:
        embedding_vector = embedding_dict.get(word)
        if embedding_vector is not None:
            embedding_matrix[i] = embedding_vector

In [7]:
import tensorflow as tf

latent_dim = 300

input_layer = tf.keras.layers.Input(shape = (SENLEN,), dtype = np.int32, name="input_layer")
embedding_layer = tf.keras.layers.Embedding(MAXWORDS, embedding_dim, weights = [embedding_matrix], input_length = SENLEN,trainable = False, name = "Embedding_layer")(input_layer)

#Encoder

#encoder lstm 1
encoder_lstm1 = tf.keras.layers.LSTM(latent_dim,return_sequences=True,return_state=True,dropout=0.2,recurrent_dropout=0.2)
encoder_output1, state_h1, state_c1 = encoder_lstm1(embedding_layer)

#encoder lstm 2
encoder_lstm2 = tf.keras.layers.LSTM(latent_dim,return_sequences=True,return_state=True,dropout=0.2,recurrent_dropout=0.2)
encoder_output2, state_h, state_c = encoder_lstm2(encoder_output1)

decoder_inputs = tf.keras.layers.Input(shape=(None,))

#embedding layer
dec_emb_layer = tf.keras.layers.Embedding(MAXWORDS, embedding_dim, weights = [embedding_matrix], input_length = HEADLINES_SENLEN,trainable = False, name = "Embedding_layer")
dec_emb = dec_emb_layer(decoder_inputs)

decoder_lstm = tf.keras.layers.LSTM(latent_dim, return_sequences=True, return_state=True,dropout=0.2,recurrent_dropout=0.2)
decoder_outputs,decoder_fwd_state, decoder_back_state = decoder_lstm(dec_emb,initial_state=[state_h, state_c])

#dense layer
decoder_dense =  tf.keras.layers.TimeDistributed(tf.keras.layers.Dense(HEADLINES_SENLEN, activation='relu'))
decoder_outputs = decoder_dense(decoder_outputs)

model = tf.keras.models.Model([input_layer, decoder_inputs], decoder_outputs)
model.summary()


NameError: name 'encoder_inputs' is not defined