Code for generating the ROUGE-1 and ROUGE-L F-similarity score for the models' predictions on 1000 samples in the validate set
The naming of the models is the same as specifed in the final paper for the project

In [None]:
from inference.lstm_encoder_decoder_inference import convert_sequence_to_title as convert_sequence_to_title_model_1, convert_sequence_to_article as convert_sequence_to_article_model_1, decode_sequence as decode_sequence_model_1

from inference.bi_lstm_encoder_lstm_decoder_inference import convert_sequence_to_title as convert_sequence_to_title_model_2, convert_sequence_to_article as convert_sequence_to_article_model_2, decode_sequence as decode_sequence_model_2

from inference.glove_bi_lstm_encoder_lstm_decoder_inference import convert_sequence_to_title as convert_sequence_to_title_model_3, convert_sequence_to_article as convert_sequence_to_article_model_3, decode_sequence as decode_sequence_model_3

from inference.reverse_input_bi_lstm_encoder_lstm_decoder_inference import convert_sequence_to_title as convert_sequence_to_title_model_4, convert_sequence_to_article as convert_sequence_to_article_model_4, decode_sequence as decode_sequence_model_4

import numpy as np


In [None]:
from rouge_score import rouge_scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

In [None]:
model_name = "bi_lstm_encoder_lstm_decoder"
max_len_full_article = 516

In [None]:
path_to_data = "train_val_nd_array"

# train set and validate sets are the same for all models 
x_train = np.load("{}/{}/x_train.npy".format(path_to_data, model_name))
y_train = np.load("{}/{}/y_train.npy".format(path_to_data, model_name))
x_validate = np.load("{}/{}/x_validate.npy".format(path_to_data, model_name))
y_validate = np.load("{}/{}/y_validate.npy".format(path_to_data, model_name))

In [None]:
np.random.seed(119)
rand_idx = np.random.choice(x_validate.shape[0], 1000)
x_val_random = x_validate[rand_idx]
y_val_random = y_validate[rand_idx]

## Evaluate model 1: 

In [None]:
f_measure_rouge_1_model_1 = 0
f_measure_rouge_L_model_1 = 0

for i in range(0, len(x_val_random)):
    # print("Article:", convert_sequence_to_article_model_1(x_val_random[i]))
    original_title = convert_sequence_to_title_model_1(y_val_random[i])
    predicted_title = decode_sequence_model_1(x_val_random[i].reshape(1, max_len_full_article))
    # print("Original Title:", original_title)
    # print("Predicted Title:", predicted_title)
    rouge_score = scorer.score(original_title, predicted_title)
    # print(rouge_score)
    f_measure_rouge_1_model_1 += rouge_score['rouge1'].fmeasure
    f_measure_rouge_L_model_1 += rouge_score['rougeL'].fmeasure

In [None]:
print(f_measure_rouge_1_model_1/1000)
print(f_measure_rouge_L_model_1/1000)

## Evaluate model 2

In [None]:
f_measure_rouge_1_model_2 = 0
f_measure_rouge_L_model_2 = 0

precision_rouge_1_model_2 = 0
precision_rouge_L_model_2 = 0

recall_rouge_1_model_2 = 0
recall_rouge_L_model_2 = 0
for i in range(0, len(x_val_random)):
    # print("Article:", convert_sequence_to_article_model_1(x_val_random[i]))
    original_title = convert_sequence_to_title_model_2(y_val_random[i])
    predicted_title = decode_sequence_model_2(x_val_random[i].reshape(1, max_len_full_article))
    # print("Original Title:", original_title)
    # print("Predicted Title:", predicted_title)
    rouge_score = scorer.score(original_title, predicted_title)
    # print(rouge_score)
    f_measure_rouge_1_model_2 += rouge_score['rouge1'].fmeasure
    f_measure_rouge_L_model_2 += rouge_score['rougeL'].fmeasure

    precision_rouge_1_model_2 += rouge_score['rouge1'].precision
    precision_rouge_L_model_2 += rouge_score['rougeL'].precision

    recall_rouge_1_model_2 += rouge_score['rouge1'].recall
    recall_rouge_L_model_2 += rouge_score['rougeL'].recall

In [None]:
print(f_measure_rouge_1_model_2/1000)
print(f_measure_rouge_L_model_2/1000)

## Evaluate Model 3

Model 3 and 4 has the fixed size of the source sequence to be 512, instead of 516 like the above 2 models
(because of my mistake that is too late to fix now...)

In [None]:
path_to_data = "train_val_nd_array"
model_3_name = "glove_bi_lstm_encoder_lstm_decoder"
# train set and validate sets are the same for all models 
x_train = np.load("{}/{}/x_train.npy".format(path_to_data, model_3_name))
y_train = np.load("{}/{}/y_train.npy".format(path_to_data, model_3_name))
x_validate = np.load("{}/{}/x_validate.npy".format(path_to_data, model_3_name))
y_validate = np.load("{}/{}/y_validate.npy".format(path_to_data, model_3_name))

rand_idx = np.random.choice(x_validate.shape[0], 1000)
x_val_random = x_validate[rand_idx]
y_val_random = y_validate[rand_idx]

In [None]:
f_measure_rouge_1_model_3 = 0
f_measure_rouge_L_model_3 = 0

for i in range(0, len(x_val_random)):
    # print("Article:", convert_sequence_to_article_model_3(x_val_random[i]))
    original_title = convert_sequence_to_title_model_3(y_val_random[i])
    predicted_title = decode_sequence_model_3(x_val_random[i].reshape(1, 512))
    # print("Original Title:", original_title)
    # print("Predicted Title:", predicted_title)
    rouge_score = scorer.score(original_title, predicted_title)
    # print(rouge_score)
    f_measure_rouge_1_model_3 += rouge_score['rouge1'].fmeasure
    f_measure_rouge_L_model_3 += rouge_score['rougeL'].fmeasure
  

In [None]:
print(f_measure_rouge_1_model_3/1000)
print(f_measure_rouge_L_model_3/1000)

In [None]:
f_measure_rouge_1_model_4 = 0
f_measure_rouge_L_model_4 = 0

for i in range(0, len(x_val_random)):
    # print("Article:", convert_sequence_to_article_model_4(x_val_random[i]))
    original_title = convert_sequence_to_title_model_4(y_val_random[i])
    predicted_title = decode_sequence_model_4(x_val_random[i].reshape(1, 512))
    # print("Original Title:", original_title)
    # print("Predicted Title:", predicted_title)
    rouge_score = scorer.score(original_title, predicted_title)
    # print(rouge_score)
    f_measure_rouge_1_model_4 += rouge_score['rouge1'].fmeasure
    f_measure_rouge_L_model_4 += rouge_score['rougeL'].fmeasure

In [None]:
print(f_measure_rouge_1_model_4/1000)
print(f_measure_rouge_L_model_4/1000)