In [None]:
# Imports (add additional for attention)
import numpy as np
import pandas as pd
import re
import pickle
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, dot, concatenate, Activation
from sklearn.model_selection import train_test_split

# Load and clean data
df = pd.read_csv("/content/news_summary_Small.csv", encoding='latin-1')
df = df.dropna(subset=["article", "headline"])

def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

df["article"] = df["article"].apply(clean_text)
df["headline"] = df["headline"].apply(clean_text)
df = df[df["article"].apply(lambda x: len(x.split()) >= 5)]
df = df[df["headline"].apply(lambda x: len(x.split()) >= 3)]
df["headline"] = df["headline"].apply(lambda x: '<sos> ' + x + ' <eos>')

# Prepare data
articles = df["article"].tolist()
headlines = df["headline"].tolist()

# Tokenization
article_tokenizer = Tokenizer(oov_token="<OOV>")
headline_tokenizer = Tokenizer(oov_token="<OOV>")
article_tokenizer.fit_on_texts(articles)
headline_tokenizer.fit_on_texts(headlines)

article_seq = article_tokenizer.texts_to_sequences(articles)
headline_seq = headline_tokenizer.texts_to_sequences(headlines)

article_vocab = len(article_tokenizer.word_index) + 1
headline_vocab = len(headline_tokenizer.word_index) + 1

max_article_len = 100
max_headline_len = 15

encoder_input = pad_sequences(article_seq, maxlen=max_article_len, padding='post')
decoder_input = pad_sequences(headline_seq, maxlen=max_headline_len, padding='post')

decoder_target = np.zeros_like(decoder_input)
decoder_target[:, :-1] = decoder_input[:, 1:]

enc_train, enc_val, dec_in_train, dec_in_val, dec_tgt_train, dec_tgt_val = train_test_split(
    encoder_input, decoder_input, decoder_target, test_size=0.1, random_state=42
)

# Model parameters
embedding_dim = 128
lstm_units = 256

# Encoder
enc_inputs = Input(shape=(max_article_len,))
enc_emb = Embedding(article_vocab, embedding_dim, mask_zero=True)(enc_inputs)
enc_lstm_outputs, state_h, state_c = LSTM(lstm_units, return_sequences=True, return_state=True)(enc_emb)
enc_states = [state_h, state_c]

# Decoder
dec_inputs = Input(shape=(max_headline_len,))
dec_emb = Embedding(headline_vocab, embedding_dim, mask_zero=True)(dec_inputs)
dec_lstm_outputs, _, _ = LSTM(lstm_units, return_sequences=True, return_state=True)(
    dec_emb, initial_state=enc_states
)

# Attention Mechanism
attention_scores = dot([dec_lstm_outputs, enc_lstm_outputs], axes=[2, 2])  # Shape: (batch, dec_len, enc_len)
attention_weights = Activation('softmax')(attention_scores)  # Shape: (batch, dec_len, enc_len)
context_vector = dot([attention_weights, enc_lstm_outputs], axes=[2, 1])  # Shape: (batch, dec_len, lstm_units)

# Concatenate context vector and decoder output
combined = concatenate([context_vector, dec_lstm_outputs])  # Shape: (batch, dec_len, 2*lstm_units)
output = Dense(headline_vocab, activation='softmax')(combined)

# Model
model = Model([enc_inputs, dec_inputs], output)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train
model.fit(
    [enc_train, dec_in_train],
    np.expand_dims(dec_tgt_train, -1),
    epochs=250,
    batch_size=64,
    validation_data=([enc_val, dec_in_val], np.expand_dims(dec_tgt_val, -1))
)

# Save model and tokenizers
model.save("encoder_decoder_attention.keras")
with open("article_tokenizer.pkl", "wb") as f:
    pickle.dump(article_tokenizer, f)
with open("headline_tokenizer.pkl", "wb") as f:
    pickle.dump(headline_tokenizer, f)


Epoch 1/250
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 100ms/step - accuracy: 0.2640 - loss: 7.6946 - val_accuracy: 0.3293 - val_loss: 5.1204
Epoch 2/250
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 47ms/step - accuracy: 0.3437 - loss: 4.8307 - val_accuracy: 0.3680 - val_loss: 5.0533
Epoch 3/250
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.3718 - loss: 4.6176 - val_accuracy: 0.3873 - val_loss: 5.0762
Epoch 4/250
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 41ms/step - accuracy: 0.3951 - loss: 4.5094 - val_accuracy: 0.4127 - val_loss: 5.0684
Epoch 5/250
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step - accuracy: 0.4125 - loss: 4.4014 - val_accuracy: 0.4160 - val_loss: 5.0495
Epoch 6/250
[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 40ms/step - accuracy: 0.4209 - loss: 4.3085 - val_accuracy: 0.4187 - val_loss: 5.0876
Epoch 7/250
[1m15/15[0m 

In [None]:
import numpy as np
import pickle
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, dot, concatenate, Activation
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load model and tokenizers
model = load_model("encoder_decoder_attention.keras")

with open("article_tokenizer.pkl", "rb") as f:
    article_tokenizer = pickle.load(f)
with open("headline_tokenizer.pkl", "rb") as f:
    headline_tokenizer = pickle.load(f)

# Parameters
max_article_len = model.input[0].shape[1]
max_headline_len = model.input[1].shape[1]
lstm_units = model.get_layer(index=4).units
embedding_dim = model.get_layer(index=2).output_dim

article_vocab = model.get_layer(index=2).input_dim
headline_vocab = model.get_layer(index=3).input_dim

# Rebuild encoder model
encoder_inputs = Input(shape=(max_article_len,))
enc_emb_layer = model.get_layer(index=2)
enc_lstm = model.get_layer(index=4)

enc_emb = enc_emb_layer(encoder_inputs)
enc_outputs, state_h_enc, state_c_enc = enc_lstm(enc_emb)
encoder_model = Model(encoder_inputs, [enc_outputs, state_h_enc, state_c_enc])

# Rebuild decoder model with attention
decoder_inputs = Input(shape=(1,))
decoder_state_input_h = Input(shape=(lstm_units,))
decoder_state_input_c = Input(shape=(lstm_units,))
encoder_output_input = Input(shape=(max_article_len, lstm_units))

dec_emb_layer = model.get_layer(index=3)
dec_lstm = model.get_layer(index=5)
dec_dense = model.get_layer(index=7)

dec_emb = dec_emb_layer(decoder_inputs)
dec_outputs, state_h, state_c = dec_lstm(dec_emb, initial_state=[decoder_state_input_h, decoder_state_input_c])

# Attention
attention_scores = dot([dec_outputs, encoder_output_input], axes=[2, 2])
attention_weights = Activation('softmax')(attention_scores)
context_vector = dot([attention_weights, encoder_output_input], axes=[2, 1])
decoder_combined_context = concatenate([context_vector, dec_outputs])

# Final output
decoder_outputs = dec_dense(decoder_combined_context)

decoder_model = Model(
    [decoder_inputs, encoder_output_input, decoder_state_input_h, decoder_state_input_c],
    [decoder_outputs, state_h, state_c]
)

# Reverse word index
reverse_headline_word_index = {index: word for word, index in headline_tokenizer.word_index.items()}
reverse_headline_word_index[0] = ''

# Decode sequence with attention
def decode_sequence_attention(input_seq):
    enc_outs, h, c = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = headline_tokenizer.word_index.get('<sos>', 1)

    decoded_sentence = []
    for _ in range(max_headline_len):
        output_tokens, h, c = decoder_model.predict([target_seq, enc_outs, h, c])
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = reverse_headline_word_index.get(sampled_token_index, '')

        if sampled_word == '<eos>' or sampled_word == '':
            break

        decoded_sentence.append(sampled_word)
        target_seq[0, 0] = sampled_token_index

    return ' '.join(decoded_sentence)

# Summarize function
def summarize_article_attention(article_text):
    seq = article_tokenizer.texts_to_sequences([article_text.lower()])
    seq = pad_sequences(seq, maxlen=max_article_len, padding='post')
    return decode_sequence_attention(seq)

# Run from user input
text = input("Enter the article text: ")
print("Input Article:", text)
print("Predicted Headline:", summarize_article_attention(text))


Enter the article text: Lashkar-e-Taiba's Kashmir commander Abu Dujana, who was killed by security forces, said "Kabhi hum aage, kabhi aap, aaj aapne pakad liya, mubarak ho aapko (Today you caught me. Congratulations)" after being caught
Input Article: Lashkar-e-Taiba's Kashmir commander Abu Dujana, who was killed by security forces, said "Kabhi hum aage, kabhi aap, aaj aapne pakad liya, mubarak ho aapko (Today you caught me. Congratulations)" after being caught
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 141ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━

In [None]:
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.spice.spice import Spice
from rouge_score import rouge_scorer

def evaluate_metrics(references, hypotheses):
    """
    Evaluates text generation using common metrics like BLEU, METEOR, CIDEr, ROUGE, and SPICE.

    Args:
        references (list): A list of reference texts (ground truth).
        hypotheses (list): A list of generated texts (predictions).

    Returns:
        dict: A dictionary containing the scores for each metric.
    """
    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(), "METEOR"),
        (Cider(), "CIDEr"),
        (Spice(), "SPICE"),
    ]

    results = {}
    for scorer, method in scorers:
        score, scores = scorer.compute_score(references, hypotheses)
        if type(score) == list:
            for m, s in zip(method, score):
                results[m] = s
        else:
            results[method] = score

    # Evaluate ROUGE
    rouge = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
    rouge_results = {"ROUGE_1": 0, "ROUGE_2": 0, "ROUGE_L": 0}
    for ref, hyp in zip(references, hypotheses):
        scores = rouge.score(ref, hyp)
        rouge_results["ROUGE_1"] += scores["rouge1"].fmeasure
        rouge_results["ROUGE_2"] += scores["rouge2"].fmeasure
        rouge_results["ROUGE_L"] += scores["rougeL"].fmeasure

    # Average ROUGE scores
    num_samples = len(references)
    for key in rouge_results:
        rouge_results[key] /= num_samples

    # Add ROUGE to final results
    results.update(rouge_results)

    print("Evaluation Results:")
    for metric, score in results.items():
        print(f"{metric}: {score}")

    return results


In [None]:
import numpy as np
import pandas as pd
import re
import pickle
import tensorflow as tf
from tensorflow.keras.models import load_model, Model
from tensorflow.keras.layers import Input, LSTM, Embedding, Dense, Concatenate
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras import layers

# === Optional: for evaluation === #
from pycocoevalcap.bleu.bleu import Bleu
from pycocoevalcap.meteor.meteor import Meteor
from pycocoevalcap.cider.cider import Cider
from pycocoevalcap.spice.spice import Spice

# === Load Model and Tokenizers === #
model = load_model("encoder_decoder_attention.keras", compile=False)
with open("article_tokenizer.pkl", "rb") as f:
    article_tokenizer = pickle.load(f)
with open("headline_tokenizer.pkl", "rb") as f:
    headline_tokenizer = pickle.load(f)

# === Parameters === #
max_article_len = model.input[0].shape[1]
max_headline_len = model.input[1].shape[1]
lstm_units = model.layers[4].units

# === Rebuild Encoder === #
encoder_inputs = model.input[0]
enc_emb = model.layers[2](encoder_inputs)
encoder_outputs, state_h_enc, state_c_enc = model.layers[4](enc_emb)
encoder_model = Model(encoder_inputs, [encoder_outputs, state_h_enc, state_c_enc])

# === Custom Attention Layer === #
class AttentionLayer(layers.Layer):
    def __init__(self, units):
        super(AttentionLayer, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, query, values):
        query_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(query_with_time_axis) + self.W2(values)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector, attention_weights

# === Rebuild Decoder === #
decoder_state_input_h = Input(shape=(lstm_units,))
decoder_state_input_c = Input(shape=(lstm_units,))
decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c]

decoder_inputs = Input(shape=(1,))
dec_emb_layer = model.layers[3]
dec_lstm = model.layers[5]
dec_dense = model.layers[7]  # Correct Dense layer

dec_emb = dec_emb_layer(decoder_inputs)
dec_lstm_out, state_h_dec, state_c_dec = dec_lstm(dec_emb, initial_state=decoder_states_inputs)

attention_layer = AttentionLayer(lstm_units)
enc_out_input = Input(shape=(max_article_len, lstm_units))
context_vector, _ = attention_layer(state_h_dec, enc_out_input)

decoder_concat_input = Concatenate(axis=-1)([layers.Lambda(lambda x: tf.expand_dims(x, 1))(context_vector), dec_lstm_out])
decoder_outputs = dec_dense(decoder_concat_input)

decoder_model = Model(
    [decoder_inputs, enc_out_input] + decoder_states_inputs,
    [decoder_outputs, state_h_dec, state_c_dec]
)

# === Reverse Tokenizer === #
reverse_headline_word_index = {index: word for word, index in headline_tokenizer.word_index.items()}

# === Decode Sequence === #
def decode_sequence(input_seq):
    enc_outs, state_h, state_c = encoder_model.predict(input_seq)

    target_seq = np.zeros((1, 1))
    target_seq[0, 0] = headline_tokenizer.word_index.get('<sos>', 1)

    decoded_words = []
    for _ in range(max_headline_len):
        output_tokens, h, c = decoder_model.predict([target_seq, enc_outs, state_h, state_c])
        sampled_token_index = np.argmax(output_tokens[0, -1, :])
        sampled_word = reverse_headline_word_index.get(sampled_token_index, '')

        if sampled_word == '<eos>' or sampled_word == '':
            break

        decoded_words.append(sampled_word)
        target_seq[0, 0] = sampled_token_index
        state_h, state_c = h, c

    return ' '.join(decoded_words)

# === Clean Input Text === #
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

# === Summarize Article === #
def summarize_article(article_text):
    article_text = clean_text(article_text)
    seq = article_tokenizer.texts_to_sequences([article_text])
    seq = pad_sequences(seq, maxlen=max_article_len, padding='post')
    return decode_sequence(seq)

# === Evaluation Metrics === #
def evaluate_metrics(references, hypotheses):
    refs_dict = {i: [references[i]] for i in range(len(references))}
    hyps_dict = {i: [hypotheses[i]] for i in range(len(hypotheses))}

    scorers = [
        (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]),
        (Meteor(), "METEOR"),
        (Cider(), "CIDEr"),
        (Spice(), "SPICE"),
    ]

    results = {}
    for scorer, method in scorers:
        score, _ = scorer.compute_score(refs_dict, hyps_dict)
        if isinstance(score, list):
            for s, m in zip(score, method):
                results[m] = round(s, 4)
        else:
            results[method] = round(score, 4)

    for metric, score in results.items():
        print(f"{metric}: {score}")

    return results

# === Load Dataset === #
df = pd.read_csv("/content/news_summary_Small.csv", encoding='latin-1')
df = df.dropna(subset=["article", "headline"])
df["article"] = df["article"].apply(clean_text)
df["headline"] = df["headline"].apply(clean_text)
df = df[df["article"].apply(lambda x: len(x.split()) >= 5)]
df = df[df["headline"].apply(lambda x: len(x.split()) >= 3)]
df["headline"] = df["headline"].apply(lambda x: '<sos> ' + x + ' <eos>')

# === Predict and Evaluate === #
references = []
hypotheses = []
num_samples = 50

for i in range(num_samples):
    article = df["article"].iloc[i]
    true_headline = df["headline"].iloc[i].replace("<sos> ", "").replace(" <eos>", "")
    pred_headline = summarize_article(article)

    references.append(true_headline)
    hypotheses.append(pred_headline)

# === Print Metrics === #
evaluate_metrics(references,hypotheses)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

{'Bleu_1': 0.008,
 'Bleu_2': 0.0,
 'Bleu_3': 0.0,
 'Bleu_4': 0.0,
 'METEOR': 0.0117,
 'CIDEr': np.float64(0.0098),
 'SPICE': np.float64(0.0)}