In [1]:
import os 

In [2]:
os.chdir("../")

In [3]:
from transformers import Trainer, TrainingArguments
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from src.textsummarizer.logging import logger
from src.textsummarizer.entity.config_entity import DataTransformationConfig
from src.textsummarizer.config.configuration import ConfigurationManager

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_ckpt: str
    num_train_epochs: int
    warmup_steps: int
    per_device_train_batch_size: int
    weight_decay: float
    logging_steps: int
    evaluation_strategy: str
    eval_steps: int
    save_steps: float
    gradient_accumulation_steps: int

In [5]:
from src.textsummarizer.utils.common import read_yaml, create_directories
from src.textsummarizer.constants import CONFIG_FILE_PATH, PARAMS_FILE_PATH

In [6]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
    ):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self) -> ModelTrainerConfig:
        model_trainer_config = self.config.model_trainer
        training_args = self.params.TrainingArguments

        model_trainer = ModelTrainerConfig(
            root_dir=Path(model_trainer_config.root_dir),
            data_path=Path(model_trainer_config.data_path),
            model_ckpt=model_trainer_config.model_ckpt,
            num_train_epochs=training_args.num_train_epochs,
            warmup_steps=training_args.warmup_steps,
            per_device_train_batch_size=training_args.per_device_train_batch_size,
            weight_decay=training_args.weight_decay,
            logging_steps=training_args.logging_steps,
            evaluation_strategy=training_args.evaluation_strategy,
            eval_steps=training_args.eval_steps,
            save_steps=training_args.save_steps,
            gradient_accumulation_steps=training_args.gradient_accumulation_steps,
        )

        return model_trainer 

In [7]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from transformers import Trainer, TrainingArguments
from transformers import DataCollatorForSeq2Seq
from datasets import load_from_disk
from src.textsummarizer.logging import logger
import torch


In [8]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config

    def train(self):
        device = "cuda" if torch.cuda.is_available() else "cpu"
        logger.info("Loading the dataset from disk...")
        dataset = load_from_disk(str(self.config.data_path))

        logger.info("Loading the tokenizer and model...")
        tokenizer = AutoTokenizer.from_pretrained(self.config.model_ckpt)
        model = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_ckpt)

        logger.info("Setting up data collator...")
        data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

        logger.info("Setting up training arguments...")
        training_args = TrainingArguments(
            output_dir=str(self.config.root_dir),
            num_train_epochs=self.config.num_train_epochs,
            warmup_steps=self.config.warmup_steps,
            per_device_train_batch_size=self.config.per_device_train_batch_size,
            weight_decay=self.config.weight_decay,
            logging_steps=self.config.logging_steps,
            eval_strategy=self.config.evaluation_strategy,
            eval_steps=self.config.eval_steps,
            save_steps=int(float(self.config.save_steps)),
            gradient_accumulation_steps=self.config.gradient_accumulation_steps,
        )

        logger.info("Initializing Trainer...")
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=dataset["train"],
            eval_dataset=dataset["validation"],
            data_collator=data_collator,
            tokenizer=tokenizer,
        )

        logger.info("Starting training...")
        trainer.train()
        logger.info("Training completed.")
        model.save_pretrained(self.config.root_dir / "final_model")
        tokenizer.save_pretrained(self.config.root_dir / "final_model")

In [9]:
config = ConfigurationManager()
model_trainer_config = config.get_model_trainer_config()
model_trainer = ModelTrainer(config=model_trainer_config)       
model_trainer.train()


2025-11-11 10:31:30,807 - INFO -  File contents : ]
2025-11-11 10:31:30,808 - INFO - YAML file: config\config.yaml loaded successfully]
2025-11-11 10:31:30,812 - INFO -  File contents : ]
2025-11-11 10:31:30,814 - INFO - YAML file: config\params.yaml loaded successfully]
2025-11-11 10:31:30,815 - INFO - Directory created at: artifacts]
2025-11-11 10:31:30,817 - INFO - Loading the dataset from disk...]
2025-11-11 10:31:30,858 - INFO - Loading the tokenizer and model...]


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


2025-11-11 10:31:39,397 - INFO - Setting up data collator...]
2025-11-11 10:31:39,398 - INFO - Setting up training arguments...]
2025-11-11 10:31:39,414 - INFO - Initializing Trainer...]
2025-11-11 10:31:39,445 - INFO - Starting training...]


  trainer = Trainer(
The tokenizer has new PAD/BOS/EOS tokens that differ from the model config and generation config. The model config and generation config were aligned accordingly, being updated with the tokenizer's values. Updated tokens: {'bos_token_id': None}.


Step,Training Loss,Validation Loss


KeyboardInterrupt: 

In [None]:
!pip install --upgrade accelerate 
!pip uninstall -y transformers accelerate
!pip install transformers accelerate

Found existing installation: transformers 4.57.1
Uninstalling transformers-4.57.1:
  Successfully uninstalled transformers-4.57.1
Found existing installation: accelerate 1.11.0
Uninstalling accelerate-1.11.0:
  Successfully uninstalled accelerate-1.11.0
Collecting transformers
  Using cached transformers-4.57.1-py3-none-any.whl.metadata (43 kB)
Collecting accelerate
  Using cached accelerate-1.11.0-py3-none-any.whl.metadata (19 kB)
Using cached transformers-4.57.1-py3-none-any.whl (12.0 MB)
Using cached accelerate-1.11.0-py3-none-any.whl (375 kB)
Installing collected packages: accelerate, transformers

   ---------------------------------------- 0/2 [accelerate]
   ---------------------------------------- 0/2 [accelerate]
   ---------------------------------------- 0/2 [accelerate]
   ---------------------------------------- 0/2 [accelerate]
   ---------------------------------------- 0/2 [accelerate]
   ---------------------------------------- 0/2 [accelerate]
   ---------------------