In [1]:
import os

In [2]:
%pwd

'C:\\Users\\godar\\Desktop\\Text-Summarize-Project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'C:\\Users\\godar\\Desktop\\Text-Summarize-Project'

In [5]:
from dataclasses import dataclass
from pathlib import Path
# Entity(which defines the return type of function
@dataclass(frozen=True)
class ModelTrainerConfig:
      root_dir: Path
      data_path : Path
      model_ckpt : Path
      num_train_epochs: int
      warmup_steps: int
      per_device_train_batch_size: int
      per_device_eval_batch_size: int
      weight_decay: float
      logging_steps: int
      evaluation_strategy: str
      eval_steps: int
      save_steps: float
      gradient_accumulation_steps: int

In [6]:
from src.textSummarizer.constants import *
from src.textSummarizer.utils.common import read_yaml,create_directories

In [7]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH):
        self.config =read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_trainer_config(self)->ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.Training_Arguments
        create_directories([config.root_dir])
        model_trainer_config=ModelTrainerConfig(root_dir=config.root_dir,
                                                  data_path=config.data_path,
                                                  model_ckpt=config.model_ckpt ,
                                                  num_train_epochs=params.num_train_epochs,
                                                  warmup_steps=params.warmup_steps,
                                                  per_device_train_batch_size=params.per_device_train_batch_size,
                                                  per_device_eval_batch_size=params.per_device_eval_batch_size,
                                                  weight_decay=params.weight_decay,
                                                  logging_steps=params.logging_steps,
                                                  evaluation_strategy=params.evaluation_strategy,
                                                  eval_steps=params.eval_steps,
                                                  save_steps=params.save_steps,
                                                  gradient_accumulation_steps=params.gradient_accumulation_steps  )
        return model_trainer_config

In [8]:
from transformers import DataCollatorForSeq2Seq

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
from transformers import TrainingArguments, Trainer
from transformers import DataCollatorForSeq2Seq
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset, load_from_disk
import torch

In [10]:
!pip install --upgrade accelerate
!pip uninstall -y transformers accelerate
!pip install transformers accelerate
!pip install transformers[sentencepiece] datasets sacrebleu rouge_score py7zr -q

Found existing installation: transformers 4.50.3
Uninstalling transformers-4.50.3:
  Successfully uninstalled transformers-4.50.3
Found existing installation: accelerate 1.5.2
Uninstalling accelerate-1.5.2:
  Successfully uninstalled accelerate-1.5.2
Collecting transformers
  Using cached transformers-4.50.3-py3-none-any.whl.metadata (39 kB)
Collecting accelerate
  Using cached accelerate-1.5.2-py3-none-any.whl.metadata (19 kB)
Using cached transformers-4.50.3-py3-none-any.whl (10.2 MB)
Using cached accelerate-1.5.2-py3-none-any.whl (345 kB)
Installing collected packages: accelerate, transformers
Successfully installed accelerate-1.5.2 transformers-4.50.3


In [11]:
class ModelTrainer:
        def __init__(self,config:ModelTrainerConfig):
           print("Loaded Config:", config)
           self.config = config
        def train(self):
            device = "cuda" if torch.cuda.is_available() else "cpu"
            tokenizer = AutoTokenizer.from_pretrained(self.config.model_ckpt)
            model_pegasus = AutoModelForSeq2SeqLM.from_pretrained(self.config.model_ckpt).to(device)
            seq2seq_data_collator = DataCollatorForSeq2Seq(tokenizer, model=model_pegasus)

            dataset_samsum_pt = load_from_disk(self.config.data_path)

            trainer_args = TrainingArguments(
                output_dir=self.config.root_dir, num_train_epochs=self.config.num_train_epochs, warmup_steps=self.config.warmup_steps,
                per_device_train_batch_size=self.config.per_device_train_batch_size, per_device_eval_batch_size=self.config.per_device_eval_batch_size,
                weight_decay=self.config.weight_decay, logging_steps=self.config.logging_steps, evaluation_strategy=self.config.evaluation_strategy,
                 eval_steps=self.config.eval_steps, save_steps=1e6,
                gradient_accumulation_steps=self.config.gradient_accumulation_steps,
            )

            trainer = Trainer(model=model_pegasus, args=trainer_args,
                  tokenizer=tokenizer, data_collator=seq2seq_data_collator,
                  train_dataset=dataset_samsum_pt["test"],
                  eval_dataset=dataset_samsum_pt["validation"])

            trainer.train()

            #save the model
            model_pegasus.save_pretrained(os.path.join(self.config.root_dir, "pegasus_samsum_model"))
            tokenizer.save_pretrained(os.path.join(self.config.root_dir, "tokenizer"))

In [None]:
try:
    config=ConfigurationManager()
    model_trainer_config=config.get_model_trainer_config()
    model_trainer=ModelTrainer(config=model_trainer_config)
    model_trainer.train()
except Exception as e:
    raise e

[2025-03-31 16:07:14,584] : INFO : common : Loaded config\config.yaml successfully
[2025-03-31 16:07:14,587] : INFO : common : Loaded params.yaml successfully
[2025-03-31 16:07:14,589] : INFO : common : Created directory artifacts successfully
[2025-03-31 16:07:14,590] : INFO : common : Created directory artifacts/model_trainer successfully
Loaded Config: ModelTrainerConfig(root_dir='artifacts/model_trainer', data_path='artifacts/data_transformation/samsum_dataset', model_ckpt='google/pegasus-cnn_dailymail', num_train_epochs=1, warmup_steps=500, per_device_train_batch_size=1, per_device_eval_batch_size=1, weight_decay=0.01, logging_steps=10, evaluation_strategy='steps', eval_steps=500, save_steps='1e6', gradient_accumulation_steps=16)


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(model=model_pegasus, args=trainer_args,


Step,Training Loss,Validation Loss
