# **TEXT SUMMARIZATION :**
# **Amazon Reviews Summarization using BART model**  
Created on : 13th Oct 2025

---



---



# **Load BART Model**

**Releasing hugging face and GPU cache**

In [None]:
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [None]:
!rm -rf ~/.cache/huggingface
!rm -rf ~/.cache/torch

**Loading BART model and tokenizer : bart-large , from transformers library**

In [None]:
from transformers import BartForConditionalGeneration, BartTokenizer

In [None]:
model=BartForConditionalGeneration.from_pretrained("facebook/bart-large",device_map=None).to("cpu")

In [None]:
print(type(model))

In [None]:
tokenizer=BartTokenizer.from_pretrained("facebook/bart-large")

# **Load Dataset**

**Loading Amazon Reviews Dataset, using datasets library**

In [None]:
from datasets import load_dataset,Dataset

In [None]:
dataset = load_dataset("csv", data_files="file1.csv")["train"]

In [None]:
def remove_nulls(example):
    return example["Text"] is not None and example["Summary"] is not None

dataset = dataset.filter(remove_nulls)

# Remove duplicates
df = dataset.to_pandas()

# Drop duplicates and NaN
df = df.drop_duplicates(subset=["Text", "Summary"])
df = df.dropna(subset=["Text", "Summary"])

dataset = Dataset.from_pandas(df)

In [None]:
dataset = dataset.shuffle(seed=42)
dataset = dataset.select(range(20000))

In [None]:
print(dataset)

In [None]:
data=dataset.remove_columns(['Unnamed: 0', 'Id', 'ProductId', 'UserId', 'ProfileName', 'HelpfulnessNumerator', 'HelpfulnessDenominator', 'Score', 'Time', '__index_level_0__'])

In [None]:
print(data)

In [None]:
split_data=data.train_test_split(test_size=0.3)
eval_split=split_data['test'].train_test_split(test_size=0.5)

train_df=split_data['train']
test_df=eval_split['test']
eval_df=eval_split['train']

In [None]:
print(train_df)

In [None]:
print(test_df)

In [None]:
print(eval_df)

In [None]:
import matplotlib.pyplot as plt

# Dataset split sizes
sizes = [len(train_df), len(eval_df), len(test_df)]
labels = ['Train', 'Eval', 'Test']
plt.bar(labels, sizes)
plt.title("Dataset Split Distribution")
plt.ylabel("Number of Samples")
plt.show()

# **Tokenizing Model Inputs**

**Tokenizing  train_df,  eval_df  to load into model**

In [None]:
def preprocess_function(examples):
    # Convert 'Text' and 'Summary' to strings to handle potential non-string values
    texts = [str(text) for text in examples["Text"]]
    summaries = [str(summary) for summary in examples["Summary"]]

    model_inputs = tokenizer(
        texts,
        truncation=True,
        padding="max_length",
        max_length=512
    )
    labels = tokenizer(
        summaries,
        truncation=True,
        padding="max_length",
        max_length=128
    )
    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

tokenized_datasets = train_df.map(preprocess_function, batched=True)

In [None]:
tokenized_eval=eval_df.map(preprocess_function, batched=True)

In [None]:
tokenized_datasets.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "labels"]
)
tokenized_eval.set_format(
    type="torch",
    columns=["input_ids", "attention_mask", "labels"]
)

In [None]:
print(type(tokenized_datasets))
print(type(tokenized_eval))

# **Training the model**

In [None]:
import os
os.environ["WANDB_DISABLED"] = "true"

In [None]:
from transformers import Trainer, TrainingArguments

In [None]:
trainer = Trainer(
    model=model,
    args=TrainingArguments(
      output_dir="./results",
      per_device_train_batch_size=8,
      gradient_accumulation_steps=2,
      fp16=True,
      dataloader_num_workers=2,
      save_steps=1000,
      save_total_limit=2,
      report_to="none",
      num_train_epochs=3,
      logging_steps=200,
    ),
    train_dataset=tokenized_datasets,
    eval_dataset=tokenized_eval
)

In [None]:
import torch
torch.cuda.is_available()

In [None]:
trainer.train()

*If training needs to be started again  
First remove **results** folder : **!rm -rf ./results**  
Also clear GPU and hugging face cache*

In [None]:
#!rm -rf ./results

**Evaluating Trained Model on eval_df and test_df**

In [None]:
metrics=trainer.evaluate()
print(metrics)

In [None]:
tokenized_test = test_df.map(preprocess_function, batched=True)
metrics=trainer.evaluate(tokenized_test)
print(metrics)

In [None]:
import matplotlib.pyplot as plt

train_losses = trainer.state.log_history
train_loss = [x['loss'] for x in train_losses if 'loss' in x]
eval_loss = [x['eval_loss'] for x in train_losses if 'eval_loss' in x]

plt.plot(train_loss, label='Training Loss')
plt.plot(eval_loss, label='Validation Loss')
plt.legend()
plt.title('Training vs Validation Loss Curve')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.show()


In [None]:
len(train_loss), len(eval_loss)

**Saving the trained Model in Drive**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
save_path = "/content/drive/MyDrive/V10_final_model"
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

In [None]:
import json
with open("/content/drive/MyDrive/V10_final_model/training_metrics.json", "w") as f:
    json.dump(trainer.state.log_history, f)

In [None]:
from google.colab import drive
drive.flush_and_unmount()

# **Model Testing using Evaluation Metrics**

**Load Model from Drive**

In [None]:
import gc
import torch

gc.collect()
torch.cuda.empty_cache()

In [None]:
!rm -rf ~/.cache/huggingface
!rm -rf ~/.cache/torch

In [None]:
from transformers import BartForConditionalGeneration, BartTokenizer

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
model=BartForConditionalGeneration.from_pretrained("/content/drive/MyDrive/V10_final_model")

In [None]:
tokenizer=BartTokenizer.from_pretrained("/content/drive/MyDrive/V10_final_model")

**Load Testing Dataset**

In [None]:
from datasets import load_dataset,Dataset

In [None]:
dataset = load_dataset("csv", data_files="file1.csv")["train"]

In [None]:
def remove_nulls(example):
    return example["Text"] is not None and example["Summary"] is not None

dataset = dataset.filter(remove_nulls)

# Remove duplicates
df = dataset.to_pandas()

# Drop duplicates and NaN
df = df.drop_duplicates(subset=["Text", "Summary"])
df = df.dropna(subset=["Text", "Summary"])

dataset = Dataset.from_pandas(df)

In [None]:
dataset = dataset.shuffle(seed=42)
dataset = dataset.select(range(200))

**Tokenizing Inputs for Testing**

In [None]:
texts = [t[0] if isinstance(t, list) else t for t in dataset["Text"]]

In [None]:
inputs = tokenizer(
    texts,
    truncation=True,
    padding=True,
    return_tensors="pt"
)

**Generating Summaries for Testing in Batches**

In [None]:
from tqdm import tqdm
import torch

preds = []
batch_size = 8

# Make sure model is on GPU
model.to("cuda")
model.eval()

input_ids = inputs["input_ids"]
attention_mask = inputs["attention_mask"]

for i in tqdm(range(0, len(input_ids), batch_size), desc="Generating summaries"):
    batch_input_ids = input_ids[i:i+batch_size].to("cuda")
    batch_attention_mask = attention_mask[i:i+batch_size].to("cuda")

    with torch.no_grad():
        outputs = model.generate(
            input_ids=batch_input_ids,
            attention_mask=batch_attention_mask,
            max_length=128,
            num_beams=4,
            early_stopping=True
        )

    preds.extend(tokenizer.batch_decode(outputs, skip_special_tokens=True))

torch.cuda.empty_cache()

In [None]:
print(len(preds))

**Calculating Bert_Score and Rouge_Score**

In [None]:
pip install evaluate

In [None]:
pip install rouge_score

In [None]:
pip install bert_score

In [None]:
from evaluate import load

# References
refs = dataset["Summary"]

rouge = load("rouge")
bertscore = load("bertscore")
bertscore_results = bertscore.compute(predictions=preds, references=refs, lang="en")
rouge_results = rouge.compute(predictions=preds, references=refs)
print("ROUGE:", rouge_results)


import numpy as np


print(f"Average BERTScore Precision: {np.mean(bertscore_results['precision']):.4f}")
print(f"Average BERTScore Recall:    {np.mean(bertscore_results['recall']):.4f}")
print(f"Average BERTScore F1:        {np.mean(bertscore_results['f1']):.4f}")

In [None]:
metrics = {
    "ROUGE-1": rouge_results["rouge1"],
    "ROUGE-2": rouge_results["rouge2"],
    "ROUGE-L": rouge_results["rougeL"],
    "BERTScore-F1": sum(bertscore_results["f1"]) / len(bertscore_results["f1"])
}

plt.bar(metrics.keys(), metrics.values())
plt.title("Evaluation Metrics")
plt.ylabel("Score")
plt.show()

In [None]:
# Consistency of Model

In [None]:
plt.hist(bertscore_results["f1"], bins=20)
plt.title("Distribution of BERTScore F1 across samples")
plt.xlabel("BERT F1 Score")
plt.ylabel("Count")
plt.show()