In [None]:


import numpy as np # linear algebra
import pandas as pd # data processing, CSV file


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:
!pip install rouge-score


Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=244e88e858e361facebcf26d14386fc0a80bee547dd1d5058db322c4df1b1a3a
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [3]:
!pip install torchviz graphviz


Collecting torchviz
  Downloading torchviz-0.0.3-py3-none-any.whl.metadata (2.1 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->torchviz)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->torchviz)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->torchviz)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-curand-cu12==10.3.5.147 (from torch->torchviz)
  Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch->torchviz)
  Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch->torchviz)
  Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manyl

In [4]:
import torch
import datasets
from torch.utils.data import Dataset, DataLoader
from transformers import BartTokenizer, BartForConditionalGeneration
from transformers import get_cosine_schedule_with_warmup
from torch.cuda.amp import autocast, GradScaler
from tqdm import tqdm
import pandas as pd
from datasets import load_dataset
from rouge_score import rouge_scorer
import os
from torch.optim import AdamW

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load dataset
dataset = load_dataset("cnn_dailymail", "3.0.0", split="train[:1%]+validation[:1%]")  # Small subset for demo
train_data = dataset.select(range(0, int(0.8 * len(dataset))))
val_data = dataset.select(range(int(0.8 * len(dataset)), len(dataset)))

# Tokenizer
tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")

# Custom Dataset
class CNNDailyMailDataset(Dataset):
    def __init__(self, dataset, tokenizer, max_input_len=512, max_target_len=128):
        self.dataset = dataset
        self.tokenizer = tokenizer
        self.max_input_len = max_input_len
        self.max_target_len = max_target_len

    def __len__(self):
        return len(self.dataset)

    def __getitem__(self, idx):
        article = self.dataset[idx]["article"]
        summary = self.dataset[idx]["highlights"]

        source = self.tokenizer(
            article,
            max_length=self.max_input_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        target = self.tokenizer(
            summary,
            max_length=self.max_target_len,
            padding="max_length",
            truncation=True,
            return_tensors="pt"
        )

        return {
            "input_ids": source["input_ids"].squeeze(0),
            "attention_mask": source["attention_mask"].squeeze(0),
            "labels": target["input_ids"].squeeze(0)
        }

# Dataloaders
train_dataset = CNNDailyMailDataset(train_data, tokenizer)
val_dataset = CNNDailyMailDataset(val_data, tokenizer)
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=4)

# Model
model = BartForConditionalGeneration.from_pretrained("facebook/bart-base")
model = model.to(device)

# Training configuration
EPOCHS = 20
STEPS_PER_EPOCH = min(500, len(train_loader))
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 3e-5
WARMUP_STEPS = int(0.1 * EPOCHS * STEPS_PER_EPOCH)
TOTAL_TRAINING_STEPS = EPOCHS * STEPS_PER_EPOCH

optimizer = AdamW(model.parameters(), lr=LEARNING_RATE, weight_decay=0.01)
scheduler = get_cosine_schedule_with_warmup(
    optimizer,
    num_warmup_steps=WARMUP_STEPS,
    num_training_steps=TOTAL_TRAINING_STEPS
)
scaler = GradScaler()

# ROUGE evaluation
def evaluate(model, val_loader, tokenizer):
    model.eval()
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    rouge1, rouge2, rougel = 0, 0, 0
    count = 0

    with torch.no_grad():
        for batch in tqdm(val_loader, desc="Evaluating", leave=False):
            batch = {k: v.to(device) for k, v in batch.items()}
            generated_ids = model.generate(
                input_ids=batch["input_ids"],
                attention_mask=batch["attention_mask"],
                max_length=128,
                num_beams=4,
                early_stopping=True
            )
            preds = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
            targets = tokenizer.batch_decode(batch["labels"], skip_special_tokens=True)

            for pred, target in zip(preds, targets):
                scores = scorer.score(target, pred)
                rouge1 += scores["rouge1"].fmeasure
                rouge2 += scores["rouge2"].fmeasure
                rougel += scores["rougeL"].fmeasure
                count += 1

    return {
        "rouge1": rouge1 / count,
        "rouge2": rouge2 / count,
        "rougeL": rougel / count
    }

# Early stopping
best_loss = float("inf")
patience = 3
patience_counter = 0

# Training loop
for epoch in range(EPOCHS):
    model.train()
    total_loss = 0.0
    optimizer.zero_grad()
    loop = tqdm(train_loader, total=STEPS_PER_EPOCH, desc=f"Epoch {epoch+1}/{EPOCHS}", leave=False)

    for step, batch in enumerate(loop):
        if step >= STEPS_PER_EPOCH:
            break
        batch = {k: v.to(device) for k, v in batch.items()}

        with autocast():
            outputs = model(**batch)
            loss = outputs.loss
            loss = loss / GRADIENT_ACCUMULATION_STEPS

        scaler.scale(loss).backward()

        if (step + 1) % GRADIENT_ACCUMULATION_STEPS == 0:
            scaler.step(optimizer)
            scaler.update()
            scheduler.step()
            optimizer.zero_grad()

        total_loss += loss.item() * GRADIENT_ACCUMULATION_STEPS
        loop.set_postfix(loss=loss.item() * GRADIENT_ACCUMULATION_STEPS)

    avg_train_loss = total_loss / STEPS_PER_EPOCH
    print(f"\n✅ Epoch {epoch+1} completed | Avg Loss: {avg_train_loss:.4f}")

    # Validation
    val_scores = evaluate(model, val_loader, tokenizer)
    print(f"📊 Validation ROUGE Scores - R1: {val_scores['rouge1']:.4f}, R2: {val_scores['rouge2']:.4f}, RL: {val_scores['rougeL']:.4f}")

    # Early stopping
    if avg_train_loss < best_loss:
        best_loss = avg_train_loss
        model.save_pretrained("best_bart_model")
        tokenizer.save_pretrained("best_bart_model")
        patience_counter = 0
        print("💾 Model improved and saved to 'best_bart_model'")
    else:
        patience_counter += 1
        print(f"⏸️ No improvement. Patience: {patience_counter}/{patience}")
        if patience_counter >= patience:
            print("⏹️ Early stopping triggered.")
            break

# Save final model
model.save_pretrained("bart_summarization_model")
tokenizer.save_pretrained("bart_summarization_model")
print("✅ Final model saved to 'bart_summarization_model'")


2025-04-22 06:27:07.883840: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745303228.081297      31 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745303228.138623      31 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


README.md:   0%|          | 0.00/15.6k [00:00<?, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/259M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

  scaler = GradScaler()
  with autocast():
                                                                        


✅ Epoch 1 completed | Avg Loss: 9.3960




📊 Validation ROUGE Scores - R1: 0.2972, R2: 0.1136, RL: 0.1924
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                        


✅ Epoch 2 completed | Avg Loss: 4.9511


                                                             

📊 Validation ROUGE Scores - R1: 0.3063, R2: 0.1176, RL: 0.2100
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                        


✅ Epoch 3 completed | Avg Loss: 2.4034


                                                             

📊 Validation ROUGE Scores - R1: 0.3369, R2: 0.1324, RL: 0.2328
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                         


✅ Epoch 4 completed | Avg Loss: 1.3868


                                                             

📊 Validation ROUGE Scores - R1: 0.3448, R2: 0.1377, RL: 0.2404
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                         


✅ Epoch 5 completed | Avg Loss: 1.1523


                                                             

📊 Validation ROUGE Scores - R1: 0.3423, R2: 0.1334, RL: 0.2350
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                         


✅ Epoch 6 completed | Avg Loss: 1.0598


                                                             

📊 Validation ROUGE Scores - R1: 0.3424, R2: 0.1359, RL: 0.2385
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                         


✅ Epoch 7 completed | Avg Loss: 0.9878


                                                             

📊 Validation ROUGE Scores - R1: 0.3447, R2: 0.1358, RL: 0.2383
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                         


✅ Epoch 8 completed | Avg Loss: 0.9397


                                                             

📊 Validation ROUGE Scores - R1: 0.3426, R2: 0.1328, RL: 0.2381
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                         


✅ Epoch 9 completed | Avg Loss: 0.8830


                                                             

📊 Validation ROUGE Scores - R1: 0.3445, R2: 0.1336, RL: 0.2375
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 10 completed | Avg Loss: 0.8386


                                                             

📊 Validation ROUGE Scores - R1: 0.3481, R2: 0.1367, RL: 0.2419
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 11 completed | Avg Loss: 0.7878


                                                             

📊 Validation ROUGE Scores - R1: 0.3537, R2: 0.1396, RL: 0.2425
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 12 completed | Avg Loss: 0.7484


                                                             

📊 Validation ROUGE Scores - R1: 0.3467, R2: 0.1355, RL: 0.2420
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 13 completed | Avg Loss: 0.7003


                                                             

📊 Validation ROUGE Scores - R1: 0.3531, R2: 0.1427, RL: 0.2478
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 14 completed | Avg Loss: 0.6655


                                                             

📊 Validation ROUGE Scores - R1: 0.3526, R2: 0.1395, RL: 0.2468
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 15 completed | Avg Loss: 0.6260


                                                             

📊 Validation ROUGE Scores - R1: 0.3456, R2: 0.1373, RL: 0.2440
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 16 completed | Avg Loss: 0.5898


                                                             

📊 Validation ROUGE Scores - R1: 0.3552, R2: 0.1437, RL: 0.2488
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 17 completed | Avg Loss: 0.5580


                                                             

📊 Validation ROUGE Scores - R1: 0.3467, R2: 0.1359, RL: 0.2422
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 18 completed | Avg Loss: 0.5225


                                                             

📊 Validation ROUGE Scores - R1: 0.3475, R2: 0.1361, RL: 0.2419
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 19 completed | Avg Loss: 0.4936


                                                             

📊 Validation ROUGE Scores - R1: 0.3499, R2: 0.1385, RL: 0.2437
💾 Model improved and saved to 'best_bart_model'


  with autocast():
                                                                          


✅ Epoch 20 completed | Avg Loss: 0.4595


                                                             

📊 Validation ROUGE Scores - R1: 0.3566, R2: 0.1383, RL: 0.2460
💾 Model improved and saved to 'best_bart_model'
✅ Final model saved to 'bart_summarization_model'


In [11]:
from transformers import BartForConditionalGeneration, BartTokenizer

model = BartForConditionalGeneration.from_pretrained("best_bart_model")
tokenizer = BartTokenizer.from_pretrained("best_bart_model")

model.eval()


BartForConditionalGeneration(
  (model): BartModel(
    (shared): BartScaledWordEmbedding(50265, 768, padding_idx=1)
    (encoder): BartEncoder(
      (embed_tokens): BartScaledWordEmbedding(50265, 768, padding_idx=1)
      (embed_positions): BartLearnedPositionalEmbedding(1026, 768)
      (layers): ModuleList(
        (0-5): 6 x BartEncoderLayer(
          (self_attn): BartSdpaAttention(
            (k_proj): Linear(in_features=768, out_features=768, bias=True)
            (v_proj): Linear(in_features=768, out_features=768, bias=True)
            (q_proj): Linear(in_features=768, out_features=768, bias=True)
            (out_proj): Linear(in_features=768, out_features=768, bias=True)
          )
          (self_attn_layer_norm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
          (activation_fn): GELUActivation()
          (fc1): Linear(in_features=768, out_features=3072, bias=True)
          (fc2): Linear(in_features=3072, out_features=768, bias=True)
          (final_lay

In [12]:
def generate_summary(text, model, tokenizer, max_input_length=512, max_output_length=128):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        max_length=max_input_length,
        truncation=True,
        padding="max_length"
    )
    input_ids = inputs["input_ids"]
    attention_mask = inputs["attention_mask"]

    with torch.no_grad():
        summary_ids = model.generate(
            input_ids=input_ids,
            attention_mask=attention_mask,
            max_length=max_output_length,
            num_beams=4,
            early_stopping=True
        )

    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


In [13]:
text = """The White House on Tuesday said it had nothing to do with a tabloid report claiming President Obama would soon be divorcing First Lady Michelle Obama..."""
summary = generate_summary(text, model, tokenizer)
print("📄 Original Text:\n", text)
print("\n📝 Generated Summary:\n", summary)


📄 Original Text:
 The White House on Tuesday said it had nothing to do with a tabloid report claiming President Obama would soon be divorcing First Lady Michelle Obama...

📝 Generated Summary:
 White House says it had nothing to do with a tabloid report claiming President Obama would soon be divorcing First Lady Michelle Obama .
