Let's First import and download all the necessary dependencies

In [1]:
!pip install rouge_score
!pip install textstat
import os
import re
import pandas as pd
from transformers import BartForConditionalGeneration, BartTokenizer
import torch
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu
import textstat

Collecting rouge_score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge_score
  Building wheel for rouge_score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge_score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=49e3f970671089b3398eb9d3d7c51483cac35efd06c2a2abe284e468e67758aa
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge_score
Installing collected packages: rouge_score
Successfully installed rouge_score-0.1.2
Collecting textstat
  Downloading textstat-0.7.4-py3-none-any.whl.metadata (14 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.16.0-py3-none-any.whl.metadata (3.2 kB)
Downloading textstat-0.7.4-py3-none-any.whl (105 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.1/105.1 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyphen-

Let's write a function to load and read the contents of the .txt files

In [2]:
def load_data(data_folder):
    # Initialize an empty list to store the text data
    texts = []

    # Iterate over all files in the specified data folder
    for filename in os.listdir(data_folder):
        # Check if the file has a .txt extension
        if filename.endswith(".txt"):
            # Open the file in read mode with UTF-8 encoding
            with open(os.path.join(data_folder, filename), 'r', encoding='utf-8') as file:
                # Read the content of the file and append it to the texts list
                texts.append(file.read())

    # Return the list of text data
    return texts

Let's Load the text files

In [3]:
# Define the folder where the dataset is stored
data_folder = 'DataSet'

# Load the text data from the specified folder using the load_data function
input_texts = load_data(data_folder)

Now Let's Load the BART Model and the tokenizer and summarize the text

In [4]:
# Specify the pre-trained BART model name
bart_model_name = "facebook/bart-large-cnn"

# Load the BART tokenizer using the specified model name
bart_tokenizer = BartTokenizer.from_pretrained(bart_model_name)

# Load the BART model for conditional generation using the specified model name
bart_model = BartForConditionalGeneration.from_pretrained(bart_model_name)

# Define a function to summarize text using the BART model
def summarize_text(text, max_length=130, min_length=30):
    # Tokenize the input text and convert it to tensor format
    inputs = bart_tokenizer([text], max_length=1024, truncation=True, return_tensors="pt")

    # Generate the summary IDs using the BART model
    summary_ids = bart_model.generate(
        inputs['input_ids'],
        num_beams=4,
        max_length=max_length,
        min_length=min_length,
        length_penalty=2.0,
        early_stopping=True
    )

    # Decode the summary IDs to get the summary text
    summary = bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    # Return the summarized text
    return summary

# Generate summaries for all input texts using the summarize_text function
summarized_texts = [summarize_text(text) for text in input_texts]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Let's Wrtie a function to evaluate the metrics of the model

In [5]:
def evaluate_metrics(original_texts, summarized_texts):
    # Initialize an empty list to store BLEU scores
    bleu_scores = []

    # Initialize the ROUGE scorer with ROUGE-1 and ROUGE-L metrics, using a stemmer
    rouge = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

    # Initialize an empty list to store ROUGE scores
    rouge_scores = []

    # Iterate over pairs of original and summarized texts
    for orig, summ in zip(original_texts, summarized_texts):
        # Calculate the BLEU score for the current pair and append it to the list
        bleu = sentence_bleu([orig.split()], summ.split())
        bleu_scores.append(bleu)

        # Calculate the ROUGE score for the current pair and append it to the list
        rouge_score = rouge.score(orig, summ)
        rouge_scores.append(rouge_score)

    # Calculate the average BLEU score
    avg_bleu = sum(bleu_scores) / len(bleu_scores)

    # Calculate the average ROUGE scores for each metric
    avg_rouge = {key: sum(score[key].fmeasure for score in rouge_scores) / len(rouge_scores) for key in rouge_scores[0]}

    # Return the average BLEU and ROUGE scores
    return avg_bleu, avg_rouge

def evaluate_readability(summarized_texts):
    # Calculate the Flesch Reading Ease score for each summarized text
    readability_scores = [textstat.flesch_reading_ease(text) for text in summarized_texts]

    # Calculate the average readability score
    avg_readability = sum(readability_scores) / len(readability_scores)

    # Return the average readability score
    return avg_readability

Let's Now Evaluate the model and calculate the metrics

In [6]:
# Evaluate the metrics (BLEU and ROUGE) for the original and summarized texts
avg_bleu, avg_rouge = evaluate_metrics(input_texts, summarized_texts)

# Print the average BLEU score
print(f"Average BLEU Score: {avg_bleu}")

# Print the average ROUGE score
print(f"Average ROUGE Score: {avg_rouge}")

# Evaluate the readability of the summarized texts
avg_readability = evaluate_readability(summarized_texts)

# Print the average readability score
print(f"Average Readability Score: {avg_readability}")

Average BLEU Score: 0.008720311454902591
Average ROUGE Score: {'rouge1': 0.09170449768308458, 'rougeL': 0.08785116593068928}
Average Readability Score: 45.68344827586206
