<a href="https://colab.research.google.com/github/tamaskecskemeti/nlp_thesis/blob/main/Large_Language_Models_based_Automatic_Text_Summarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install huggingface_hub
!pip install datasets
!pip install evaluate
!pip install rouge_score
!pip install bert_score
!pip install meteor_score

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.1.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (1

In [2]:
from pathlib import Path
import torch
import itertools
import random
from huggingface_hub import login
from datasets import Dataset, load_dataset
import evaluate
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model

In [3]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"

In [4]:
hf_token = "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"
login(token=hf_token,add_to_git_credential=True)

In [5]:
random.seed(42)

In [6]:
df = pd.read_csv("news_and_summaries.csv", sep=',')

In [7]:
df.head()

Unnamed: 0,text,summary
0,Labour plans maternity pay rise\n\nMaternity p...,She said her party would boost maternity pay i...
1,Watchdog probes e-mail deletions\n\nThe inform...,All e-mails are subject to the freedom of info...
2,Hewitt decries 'career sexism'\n\nPlans to ext...,Ms Hewitt also announced a new drive to help w...
3,Labour chooses Manchester\n\nThe Labour Party ...,The Labour Party will hold its 2006 autumn con...
4,Brown ally rejects Budget spree\n\nChancellor ...,"But Mr Balls, a prospective Labour MP, said he..."


In [8]:
dataset = Dataset.from_pandas(df)

In [9]:
# Perform the 80-20 train-test split
train_test_split = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = train_test_split['train']
test_dataset = train_test_split['test']

In [10]:
model_name = "facebook/bart-large-cnn"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.58k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

In [11]:
# Function to generate summary
def generate_summary(text):
    inputs = tokenizer("summarize: " + text, return_tensors="pt", max_length=512, truncation=True).to(device)
    summary_ids = model.generate(inputs['input_ids'], max_length=150, min_length=10, length_penalty=2.0, num_beams=4, early_stopping=True).to(device)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Apply the summarization function on the test set
test_summaries = [generate_summary(text) for text in test_dataset['text']]

In [12]:
# Load the necessary metrics
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")
meteor = evaluate.load("meteor")

Downloading builder script:   0%|          | 0.00/6.27k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/5.94k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/1.55k [00:00<?, ?B/s]

Downloading extra modules:   0%|          | 0.00/3.34k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.02k [00:00<?, ?B/s]

[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


In [13]:
# Compute ROUGE
rouge_score = rouge.compute(predictions=test_summaries, references=test_dataset['summary'])
print("ROUGE Score:", rouge_score)

ROUGE Score: {'rouge1': 0.2124526107738947, 'rouge2': 0.14132093280255006, 'rougeL': 0.16198730004877715, 'rougeLsum': 0.16243902000859414}


In [14]:
# Compute BLEU
bleu_predictions = [summary for summary in test_summaries]
bleu_references = [[ref] for ref in test_dataset['summary']]

bleu_score = bleu.compute(predictions=bleu_predictions, references=bleu_references)
print("BLEU Score:", bleu_score)

BLEU Score: {'bleu': 0.0015607417680619175, 'precisions': [0.7094755661501788, 0.4731051344743276, 0.3996235884567127, 0.3592139175257732], 'brevity_penalty': 0.0033312421986585916, 'length_ratio': 0.14915555555555557, 'translation_length': 3356, 'reference_length': 22500}


In [15]:
# Compute METEOR
meteor_score = meteor.compute(predictions=test_summaries, references=test_dataset['summary'])
print("METEOR Score:", meteor_score)

METEOR Score: {'meteor': 0.11578474633228335}


In [16]:
test_dataset[0]["text"]

'Goldsmith denies war advice claim\n\nThe attorney general has denied his statement to Parliament about the legality of the Iraq war was drafted by Downing Street officials.\n\nLord Goldsmith said Lord Falconer and Baroness Morgan played no part in drafting the answer. He added the answer represented his view that the war was legal, but was not a summary of his advice to the PM. The government has resisted calls to publish the full advice, saying such papers are always kept confidential.\n\nIn a statement, Lord Goldsmith said: "I was fully involved throughout the drafting process and personally finalised, and of course approved, the answer." He said the answer had been prepared in his office with the involvement of Solicitor General Harriet Harman, two of his own officials, three Foreign Office officials, a QC, Christopher Greenwood and the then Lord Chancellor, Lord Irvine of Lairg.\n\n"No other minister or official was involved in any way." He suggested the claim that Lord Falconer a

In [17]:
test_dataset[0]["summary"]

'Former minister Clare Short, who resigned from the government over the Iraq war, said the ministerial answer was the same statement that was earlier shown to the cabinet as it discussed military action.Former foreign secretary Robin Cook said Lord Goldsmith\'s admission that his parliamentary answer was not a summary of his legal opinion suggested Parliament may have been misled.In a statement, Lord Goldsmith said: "I was fully involved throughout the drafting process and personally finalised, and of course approved, the answer."He added the answer represented his view that the war was legal, but was not a summary of his advice to the PM.Lord Goldsmith said Lord Falconer and Baroness Morgan played no part in drafting the answer."The attorney general may never have presented his answer as a summary, but others certainly did," he said."If his original advice of 7 March accepted that military action might be illegal, how was it that he resolved any such doubts by the time the Parliamenta

In [18]:
def preprocess_data(examples):
    inputs = ["summarize: " + doc for doc in examples['text']]
    model_inputs = tokenizer(inputs, max_length=512, truncation=True, padding="max_length")

    # Tokenize summaries
    labels = tokenizer(examples['summary'], max_length=128, truncation=True, padding="max_length")
    model_inputs["labels"] = labels["input_ids"]

    # Ensure padding tokens are ignored in the loss calculation
    model_inputs["labels"] = [
        [(label if label != tokenizer.pad_token_id else -100) for label in labels]
        for labels in model_inputs["labels"]
    ]
    return model_inputs

# Tokenize dataset
tokenized_dataset = dataset.map(preprocess_data, batched=True)
tokenized_dataset

Map:   0%|          | 0/417 [00:00<?, ? examples/s]

Dataset({
    features: ['text', 'summary', 'input_ids', 'attention_mask', 'labels'],
    num_rows: 417
})

In [19]:
train_test_split = tokenized_dataset.train_test_split(test_size=0.2, seed=42)

In [20]:
train_dataset = train_test_split['train']
validation_dataset = train_test_split['test']

In [21]:
from transformers import Trainer
from transformers import TrainingArguments
from peft import LoraConfig, get_peft_model

learning_rates = [1e-05, 2e-05, 3e-05]
batch_sizes = [4, 8]
combinations = [(lr, bs) for lr in learning_rates for bs in batch_sizes]

model_name = "facebook/bart-large-cnn"

for lr, bs in combinations:
  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.save_pretrained(f'./lora_finetune_results_{lr}_{bs}')
  lora_config = LoraConfig(r=8, lora_alpha=32, lora_dropout=0.1)
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
  model = get_peft_model(model, lora_config)
  model.save_pretrained(f'./lora_finetune_results_{lr}_{bs}')
  training_args = TrainingArguments(
      output_dir=f'./lora_finetune_results_{lr}_{bs}',
      evaluation_strategy="epoch",
      learning_rate=lr,
      per_device_train_batch_size=bs,
      per_device_eval_batch_size=4,
      num_train_epochs=4,
      weight_decay=0.01,
      save_strategy="epoch",
      remove_unused_columns=False
  )

  trainer = Trainer(
      model=model.to(device),
      args=training_args,
      train_dataset=train_dataset,
      eval_dataset=validation_dataset
  )

  # Start training
  trainer.train()
  trainer.save_model()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss
1,No log,No log
2,No log,No log
3,No log,No log
4,No log,No log




Epoch,Training Loss,Validation Loss
1,No log,No log
2,No log,No log
3,No log,No log
4,No log,No log




Epoch,Training Loss,Validation Loss
1,No log,No log
2,No log,No log
3,No log,No log
4,No log,No log




Epoch,Training Loss,Validation Loss
1,No log,No log
2,No log,No log
3,No log,No log
4,No log,No log




Epoch,Training Loss,Validation Loss
1,No log,No log
2,No log,No log
3,No log,No log
4,No log,No log




Epoch,Training Loss,Validation Loss
1,No log,No log
2,No log,No log
3,No log,No log
4,No log,No log


In [22]:
# Compute ROUGE
tokenizer_trained = AutoTokenizer.from_pretrained("lora_finetune_results_1e-05_4")
model_trained = AutoModelForSeq2SeqLM.from_pretrained("lora_finetune_results_1e-05_4").to(device)

# Generate summaries for validation set
val_summaries = []
model_trained.eval()  # Set model to evaluation mode

for sample in validation_dataset:
    # Tokenize and move to the same device
    inputs = tokenizer(
        "summarize: " + sample['text'],
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).to(device)

    with torch.no_grad():  # No gradient calculation during inference
        summary_ids = model_trained.generate(
            inputs["input_ids"],
            max_length=128,
            min_length=10,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )
    summary = tokenizer_trained.decode(summary_ids[0], skip_special_tokens=True)
    val_summaries.append(summary)

rouge_score = rouge.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("ROUGE Score:", rouge_score)

# Compute BLEU
bleu_predictions = [summary for summary in val_summaries]
bleu_references = [[ref] for ref in validation_dataset['summary']]

bleu_score = bleu.compute(predictions=bleu_predictions, references=bleu_references)
print("BLEU Score:", bleu_score)

# Compute METEOR
meteor_score = meteor.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("METEOR Score:", meteor_score)

ROUGE Score: {'rouge1': 0.27452358899355556, 'rouge2': 0.19927492044124842, 'rougeL': 0.21072897433136673, 'rougeLsum': 0.21165630313419959}
BLEU Score: {'bleu': 0.009433927899815293, 'precisions': [0.7515865820489573, 0.5614602587800369, 0.5103675777568332, 0.48701923076923076], 'brevity_penalty': 0.016577183542115667, 'length_ratio': 0.1960888888888889, 'translation_length': 4412, 'reference_length': 22500}
METEOR Score: {'meteor': 0.15804138008975885}


In [28]:
# Compute ROUGE
tokenizer_trained = AutoTokenizer.from_pretrained("lora_finetune_results_1e-05_8")
model_trained = AutoModelForSeq2SeqLM.from_pretrained("lora_finetune_results_1e-05_8").to(device)

# Generate summaries for validation set
val_summaries = []
model_trained.eval()  # Set model to evaluation mode

for sample in validation_dataset:
    # Tokenize and move to the same device
    inputs = tokenizer(
        "summarize: " + sample['text'],
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).to(device)

    with torch.no_grad():  # No gradient calculation during inference
        summary_ids = model_trained.generate(
            inputs["input_ids"],
            max_length=128,
            min_length=10,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )
    summary = tokenizer_trained.decode(summary_ids[0], skip_special_tokens=True)
    val_summaries.append(summary)

rouge_score = rouge.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("ROUGE Score:", rouge_score)

# Compute BLEU
bleu_predictions = [summary for summary in val_summaries]
bleu_references = [[ref] for ref in validation_dataset['summary']]

bleu_score = bleu.compute(predictions=bleu_predictions, references=bleu_references)
print("BLEU Score:", bleu_score)

# Compute METEOR
meteor_score = meteor.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("METEOR Score:", meteor_score)

ROUGE Score: {'rouge1': 0.22881060108050705, 'rouge2': 0.15367080757461515, 'rougeL': 0.17981481049893389, 'rougeLsum': 0.179964003917074}
BLEU Score: {'bleu': 0.0022103517021075615, 'precisions': [0.7361031518624642, 0.5093951849677041, 0.446417820590006, 0.41352686843730696], 'brevity_penalty': 0.004309249999647986, 'length_ratio': 0.15511111111111112, 'translation_length': 3490, 'reference_length': 22500}
METEOR Score: {'meteor': 0.12532450894899735}


In [29]:
# Compute ROUGE
tokenizer_trained = AutoTokenizer.from_pretrained("lora_finetune_results_2e-05_4")
model_trained = AutoModelForSeq2SeqLM.from_pretrained("lora_finetune_results_2e-05_4").to(device)

# Generate summaries for validation set
val_summaries = []
model_trained.eval()  # Set model to evaluation mode

for sample in validation_dataset:
    # Tokenize and move to the same device
    inputs = tokenizer(
        "summarize: " + sample['text'],
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).to(device)

    with torch.no_grad():  # No gradient calculation during inference
        summary_ids = model_trained.generate(
            inputs["input_ids"],
            max_length=128,
            min_length=10,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )
    summary = tokenizer_trained.decode(summary_ids[0], skip_special_tokens=True)
    val_summaries.append(summary)

rouge_score = rouge.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("ROUGE Score:", rouge_score)

# Compute BLEU
bleu_predictions = [summary for summary in val_summaries]
bleu_references = [[ref] for ref in validation_dataset['summary']]

bleu_score = bleu.compute(predictions=bleu_predictions, references=bleu_references)
print("BLEU Score:", bleu_score)

# Compute METEOR
meteor_score = meteor.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("METEOR Score:", meteor_score)

ROUGE Score: {'rouge1': 0.4100865686829611, 'rouge2': 0.29678010829994894, 'rougeL': 0.2860729044639791, 'rougeLsum': 0.2879394960270336}
BLEU Score: {'bleu': 0.09898773283342222, 'precisions': [0.7206536104769915, 0.5356232552494234, 0.4877988963825874, 0.4640069384215091], 'brevity_penalty': 0.18207216551558525, 'length_ratio': 0.36991111111111113, 'translation_length': 8323, 'reference_length': 22500}
METEOR Score: {'meteor': 0.26142665337372406}


In [30]:
# Compute ROUGE
tokenizer_trained = AutoTokenizer.from_pretrained("lora_finetune_results_2e-05_8")
model_trained = AutoModelForSeq2SeqLM.from_pretrained("lora_finetune_results_2e-05_8").to(device)

# Generate summaries for validation set
val_summaries = []
model_trained.eval()  # Set model to evaluation mode

for sample in validation_dataset:
    # Tokenize and move to the same device
    inputs = tokenizer(
        "summarize: " + sample['text'],
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).to(device)

    with torch.no_grad():  # No gradient calculation during inference
        summary_ids = model_trained.generate(
            inputs["input_ids"],
            max_length=128,
            min_length=10,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )
    summary = tokenizer_trained.decode(summary_ids[0], skip_special_tokens=True)
    val_summaries.append(summary)

rouge_score = rouge.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("ROUGE Score:", rouge_score)

# Compute BLEU
bleu_predictions = [summary for summary in val_summaries]
bleu_references = [[ref] for ref in validation_dataset['summary']]

bleu_score = bleu.compute(predictions=bleu_predictions, references=bleu_references)
print("BLEU Score:", bleu_score)

# Compute METEOR
meteor_score = meteor.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("METEOR Score:", meteor_score)

ROUGE Score: {'rouge1': 0.27735539925483504, 'rouge2': 0.20128132169338173, 'rougeL': 0.21366251912532633, 'rougeLsum': 0.21471601750983715}
BLEU Score: {'bleu': 0.010003514107291613, 'precisions': [0.7592176258992805, 0.5705774518790101, 0.5207943925233645, 0.49833174451858914], 'brevity_penalty': 0.01727571989738882, 'length_ratio': 0.1976888888888889, 'translation_length': 4448, 'reference_length': 22500}
METEOR Score: {'meteor': 0.15991628366119132}


In [31]:
# Compute ROUGE
tokenizer_trained = AutoTokenizer.from_pretrained("lora_finetune_results_3e-05_4")
model_trained = AutoModelForSeq2SeqLM.from_pretrained("lora_finetune_results_3e-05_4").to(device)

# Generate summaries for validation set
val_summaries = []
model_trained.eval()  # Set model to evaluation mode

for sample in validation_dataset:
    # Tokenize and move to the same device
    inputs = tokenizer(
        "summarize: " + sample['text'],
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).to(device)

    with torch.no_grad():  # No gradient calculation during inference
        summary_ids = model_trained.generate(
            inputs["input_ids"],
            max_length=128,
            min_length=10,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )
    summary = tokenizer_trained.decode(summary_ids[0], skip_special_tokens=True)
    val_summaries.append(summary)

rouge_score = rouge.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("ROUGE Score:", rouge_score)

# Compute BLEU
bleu_predictions = [summary for summary in val_summaries]
bleu_references = [[ref] for ref in validation_dataset['summary']]

bleu_score = bleu.compute(predictions=bleu_predictions, references=bleu_references)
print("BLEU Score:", bleu_score)

# Compute METEOR
meteor_score = meteor.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("METEOR Score:", meteor_score)

ROUGE Score: {'rouge1': 0.4725369351531599, 'rouge2': 0.3590267718484499, 'rougeL': 0.3199887381131168, 'rougeLsum': 0.32131772486428184}
BLEU Score: {'bleu': 0.14540560135121036, 'precisions': [0.74389466978127, 0.5794943218341547, 0.5314594594594595, 0.5050185468034039], 'brevity_penalty': 0.24931396840843567, 'length_ratio': 0.41857777777777777, 'translation_length': 9418, 'reference_length': 22500}
METEOR Score: {'meteor': 0.3058730517807752}


In [32]:
# Compute ROUGE
tokenizer_trained = AutoTokenizer.from_pretrained("lora_finetune_results_3e-05_8")
model_trained = AutoModelForSeq2SeqLM.from_pretrained("lora_finetune_results_3e-05_8").to(device)

# Generate summaries for validation set
val_summaries = []
model_trained.eval()  # Set model to evaluation mode

for sample in validation_dataset:
    # Tokenize and move to the same device
    inputs = tokenizer(
        "summarize: " + sample['text'],
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).to(device)

    with torch.no_grad():  # No gradient calculation during inference
        summary_ids = model_trained.generate(
            inputs["input_ids"],
            max_length=128,
            min_length=10,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )
    summary = tokenizer_trained.decode(summary_ids[0], skip_special_tokens=True)
    val_summaries.append(summary)

rouge_score = rouge.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("ROUGE Score:", rouge_score)

# Compute BLEU
bleu_predictions = [summary for summary in val_summaries]
bleu_references = [[ref] for ref in validation_dataset['summary']]

bleu_score = bleu.compute(predictions=bleu_predictions, references=bleu_references)
print("BLEU Score:", bleu_score)

# Compute METEOR
meteor_score = meteor.compute(predictions=val_summaries, references=validation_dataset['summary'])
print("METEOR Score:", meteor_score)

ROUGE Score: {'rouge1': 0.33834919813081077, 'rouge2': 0.23823992508275788, 'rougeL': 0.2447968052880823, 'rougeLsum': 0.24549029805694128}
BLEU Score: {'bleu': 0.039110976623433505, 'precisions': [0.7271260287235759, 0.5306723376410928, 0.48598440869132525, 0.4637510513036165], 'brevity_penalty': 0.07202160817165752, 'length_ratio': 0.27542222222222223, 'translation_length': 6197, 'reference_length': 22500}
METEOR Score: {'meteor': 0.20183396003615628}


In [33]:
# Compute ROUGE
tokenizer_final = AutoTokenizer.from_pretrained("lora_finetune_results_3e-05_4")
model_final = AutoModelForSeq2SeqLM.from_pretrained("lora_finetune_results_3e-05_4").to(device)

# Generate summaries for validation set
val_summaries = []
model_final.eval()  # Set model to evaluation mode

for sample in validation_dataset:
    # Tokenize and move to the same device
    inputs = tokenizer(
        "summarize: " + sample['text'],
        return_tensors="pt",
        max_length=512,
        truncation=True
    ).to(device)

    with torch.no_grad():  # No gradient calculation during inference
        summary_ids = model_final.generate(
            inputs["input_ids"],
            max_length=128,
            min_length=10,
            length_penalty=2.0,
            num_beams=4,
            early_stopping=True
        )
    summary = tokenizer_final.decode(summary_ids[0], skip_special_tokens=True)
    val_summaries.append(summary)

In [None]:
# Function to generate summary
def generate_summary(text):
    inputs = tokenizer("summarize: " + text, return_tensors="pt", max_length=512, truncation=True).to(device)
    summary_ids = model.generate(inputs['input_ids'], max_length=128, min_length=10, length_penalty=2.0, num_beams=4, early_stopping=True).to(device)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Apply the summarization function on the test set
test_summaries = [generate_summary(text) for text in test_dataset['text']]

In [34]:
test_summaries[0]

'Lord Goldsmith said Lord Falconer and Baroness Morgan played no part in drafting the answer. Government has resisted calls to publish the full advice, saying such papers are always kept confidential.'

In [35]:
val_summaries[0]

'Lord Goldsmith said Lord Falconer and Baroness Morgan played no part in drafting the answer. He added the answer represented his view that the war was legal, but was not a summary of his advice to the PM.The attorney general has denied his statement to Parliament about the legality of the Iraq war was drafted by Downing Street officials.The government has resisted calls to publish the full advice, saying such papers are always kept confidential.On the question of whether such papers have always been kept confidential, Tory MP Michael Mates, who is a member of the Commons intelligence and security committee and was part of the Butler inquiry'

In [39]:
df['summary'][183]

'Mr Blair said the statement was a "fair summary" of Lord Goldsmith\'s opinion.On Thursday, Lord Goldsmith said his statement had not been "written by or at Number 10".Former minister Clare Short, who resigned from the government over the Iraq war, said it was the same statement that was earlier shown to the cabinet as it discussed military action.But a short statement about Lord Goldsmith\'s position was presented in a written parliamentary answer on 17 March 2003 - just before a crucial Commons vote on the military action.This is argument was rejected by Mr Blair, who said: "Firstly, we haven\'t broken the precedent, and secondly Peter Goldsmith has made his statement and I have got absolutely nothing to add to it.""That\'s what he (Lord Goldsmith) said and that\'s what I say.Lord Goldsmith has denied being "leaned on" and says the words written were his.In a book published this week, Philippe Sands QC, a member of Cherie Blair\'s Matrix Chambers, says Lord Goldsmith warned Tony Blai