In [1]:
import torch
from torch.utils.data import Dataset
from transformers import PegasusForConditionalGeneration, PegasusTokenizer, Trainer, TrainingArguments
import pandas as pd

class CustomDataset(Dataset):
    def __init__(self, tokenizer, data_file, max_input_length, max_target_length):
        self.tokenizer = tokenizer
        self.data = pd.read_csv(data_file)
        self.max_input_length = max_input_length
        self.max_target_length = max_target_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        input_text = self.data.iloc[index]["text"]
        target_text = self.data.iloc[index]["titles"]

        # Tokenize the input and target texts
        inputs = self.tokenizer(input_text, padding="max_length", truncation=True, max_length=self.max_input_length, return_tensors="pt")
        targets = self.tokenizer(target_text, padding="max_length", truncation=True, max_length=self.max_target_length, return_tensors="pt")

        return {
            "input_ids": inputs.input_ids[0],
            "attention_mask": inputs.attention_mask[0],
            "labels": targets.input_ids[0],
        }

# Check if CUDA is available and use the GPU if so
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the tokenizer and the Pegasus model pretrained on the GPU
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
model_pegasus = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum").to(device)

# Create the training dataset
train_dataset = CustomDataset(tokenizer, "data/train.csv", max_input_length=128, max_target_length=128)

# Define the training arguments
training_args = TrainingArguments(
    output_dir="./pegasus_trained_model",
    num_train_epochs=3,  # Number of training epochs
    per_device_train_batch_size=4,  # Batch size per GPU
    save_steps=100,  # Model saving frequency
    logging_dir="./logs",
)

# Define the Trainer for training
trainer = Trainer(
    model=model_pegasus,
    args=training_args,
    train_dataset=train_dataset,
)

# Train the model
trainer.train()

# Save the trained model
model_pegasus.save_pretrained("trained_pegasus_model")


  from .autonotebook import tqdm as notebook_tqdm





Downloading: 100%|██████████| 87.0/87.0 [00:00<00:00, 94.6kB/s]
Downloading: 100%|██████████| 1.82M/1.82M [00:00<00:00, 7.86MB/s]
Downloading: 100%|██████████| 65.0/65.0 [00:00<00:00, 32.5kB/s]
Downloading: 100%|██████████| 3.36M/3.36M [00:00<00:00, 7.15MB/s]
Downloading: 100%|██████████| 1.36k/1.36k [00:00<00:00, 696kB/s]
Downloading: 100%|██████████| 2.12G/2.12G [29:16<00:00, 1.30MB/s]
***** Running training *****
  Num examples = 21401
  Num Epochs = 3
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 16053
  1%|          | 100/16053 [03:08<8:13:49,  1.86s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-100
Configuration saved in ./pegasus_trained_model\checkpoint-100\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-100\pytorch_model.bin
  1%|          | 200/16053 [06:20<8:10:29,  1.86s/it] Saving model checkpoint to ./pe

{'loss': 3.9945, 'learning_rate': 4.844265869307918e-05, 'epoch': 0.09}


Model weights saved in ./pegasus_trained_model\checkpoint-500\pytorch_model.bin
  4%|▎         | 600/16053 [19:08<7:58:09,  1.86s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-600
Configuration saved in ./pegasus_trained_model\checkpoint-600\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-600\pytorch_model.bin
  4%|▍         | 700/16053 [22:20<7:57:23,  1.87s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-700
Configuration saved in ./pegasus_trained_model\checkpoint-700\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-700\pytorch_model.bin
  5%|▍         | 800/16053 [25:33<7:53:08,  1.86s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-800
Configuration saved in ./pegasus_trained_model\checkpoint-800\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-800\pytorch_model.bin
  6%|▌         | 900/16053 [28:48<7:53:31,  1.87s/it] Saving model checkpoint to ./pegasus_trained

{'loss': 1.8334, 'learning_rate': 4.688531738615835e-05, 'epoch': 0.19}


Model weights saved in ./pegasus_trained_model\checkpoint-1000\pytorch_model.bin
  7%|▋         | 1100/16053 [35:12<7:44:42,  1.86s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-1100
Configuration saved in ./pegasus_trained_model\checkpoint-1100\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-1100\pytorch_model.bin
  7%|▋         | 1200/16053 [38:24<7:42:53,  1.87s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-1200
Configuration saved in ./pegasus_trained_model\checkpoint-1200\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-1200\pytorch_model.bin
  8%|▊         | 1300/16053 [41:46<8:31:34,  2.08s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-1300
Configuration saved in ./pegasus_trained_model\checkpoint-1300\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-1300\pytorch_model.bin
  9%|▊         | 1400/16053 [45:18<8:17:06,  2.04s/it] Saving model checkpoint to ./p

{'loss': 1.7429, 'learning_rate': 4.5327976079237525e-05, 'epoch': 0.28}


Model weights saved in ./pegasus_trained_model\checkpoint-1500\pytorch_model.bin
 10%|▉         | 1600/16053 [52:21<8:19:09,  2.07s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-1600
Configuration saved in ./pegasus_trained_model\checkpoint-1600\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-1600\pytorch_model.bin
 11%|█         | 1700/16053 [55:50<7:54:37,  1.98s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-1700
Configuration saved in ./pegasus_trained_model\checkpoint-1700\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-1700\pytorch_model.bin
 11%|█         | 1800/16053 [59:09<7:26:00,  1.88s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-1800
Configuration saved in ./pegasus_trained_model\checkpoint-1800\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-1800\pytorch_model.bin
 12%|█▏        | 1900/16053 [1:02:20<7:18:40,  1.86s/it]Saving model checkpoint to ./

{'loss': 1.6752, 'learning_rate': 4.37706347723167e-05, 'epoch': 0.37}


Model weights saved in ./pegasus_trained_model\checkpoint-2000\pytorch_model.bin
 13%|█▎        | 2100/16053 [1:08:41<7:08:33,  1.84s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-2100
Configuration saved in ./pegasus_trained_model\checkpoint-2100\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-2100\pytorch_model.bin
 14%|█▎        | 2200/16053 [1:12:04<7:30:41,  1.95s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-2200
Configuration saved in ./pegasus_trained_model\checkpoint-2200\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-2200\pytorch_model.bin
 14%|█▍        | 2300/16053 [1:15:26<7:27:12,  1.95s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-2300
Configuration saved in ./pegasus_trained_model\checkpoint-2300\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-2300\pytorch_model.bin
 15%|█▍        | 2400/16053 [1:18:50<7:32:56,  1.99s/it] Saving model checkpoin

{'loss': 1.6653, 'learning_rate': 4.2213293465395875e-05, 'epoch': 0.47}


Model weights saved in ./pegasus_trained_model\checkpoint-2500\pytorch_model.bin
 16%|█▌        | 2600/16053 [1:25:41<7:27:24,  2.00s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-2600
Configuration saved in ./pegasus_trained_model\checkpoint-2600\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-2600\pytorch_model.bin
 17%|█▋        | 2700/16053 [1:29:06<7:23:37,  1.99s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-2700
Configuration saved in ./pegasus_trained_model\checkpoint-2700\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-2700\pytorch_model.bin
 17%|█▋        | 2800/16053 [1:32:32<7:04:59,  1.92s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-2800
Configuration saved in ./pegasus_trained_model\checkpoint-2800\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-2800\pytorch_model.bin
 18%|█▊        | 2900/16053 [1:36:07<7:40:36,  2.10s/it] Saving model checkpoin

{'loss': 1.6031, 'learning_rate': 4.0655952158475054e-05, 'epoch': 0.56}


Model weights saved in ./pegasus_trained_model\checkpoint-3000\pytorch_model.bin
 19%|█▉        | 3100/16053 [1:43:32<7:36:14,  2.11s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-3100
Configuration saved in ./pegasus_trained_model\checkpoint-3100\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-3100\pytorch_model.bin
 20%|█▉        | 3200/16053 [1:47:14<7:29:43,  2.10s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-3200
Configuration saved in ./pegasus_trained_model\checkpoint-3200\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-3200\pytorch_model.bin
 21%|██        | 3300/16053 [1:50:55<7:26:28,  2.10s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-3300
Configuration saved in ./pegasus_trained_model\checkpoint-3300\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-3300\pytorch_model.bin
 21%|██        | 3400/16053 [1:54:38<7:19:46,  2.09s/it] Saving model checkpoin

{'loss': 1.5823, 'learning_rate': 3.909861085155423e-05, 'epoch': 0.65}


Model weights saved in ./pegasus_trained_model\checkpoint-3500\pytorch_model.bin
 22%|██▏       | 3600/16053 [2:02:04<7:12:44,  2.08s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-3600
Configuration saved in ./pegasus_trained_model\checkpoint-3600\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-3600\pytorch_model.bin
 23%|██▎       | 3700/16053 [2:05:38<6:49:12,  1.99s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-3700
Configuration saved in ./pegasus_trained_model\checkpoint-3700\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-3700\pytorch_model.bin
 24%|██▎       | 3800/16053 [2:09:04<6:57:02,  2.04s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-3800
Configuration saved in ./pegasus_trained_model\checkpoint-3800\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-3800\pytorch_model.bin
 24%|██▍       | 3900/16053 [2:12:30<6:43:58,  1.99s/it] Saving model checkpoin

{'loss': 1.5779, 'learning_rate': 3.7541269544633404e-05, 'epoch': 0.75}


Model weights saved in ./pegasus_trained_model\checkpoint-4000\pytorch_model.bin
 26%|██▌       | 4100/16053 [2:19:24<7:00:28,  2.11s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-4100
Configuration saved in ./pegasus_trained_model\checkpoint-4100\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-4100\pytorch_model.bin
 26%|██▌       | 4200/16053 [2:23:04<6:55:42,  2.10s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-4200
Configuration saved in ./pegasus_trained_model\checkpoint-4200\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-4200\pytorch_model.bin
 27%|██▋       | 4300/16053 [2:26:46<6:50:57,  2.10s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-4300
Configuration saved in ./pegasus_trained_model\checkpoint-4300\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-4300\pytorch_model.bin
 27%|██▋       | 4400/16053 [2:30:28<6:48:20,  2.10s/it] Saving model checkpoin

{'loss': 1.5465, 'learning_rate': 3.5983928237712576e-05, 'epoch': 0.84}


Model weights saved in ./pegasus_trained_model\checkpoint-4500\pytorch_model.bin
 29%|██▊       | 4600/16053 [2:37:51<6:42:30,  2.11s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-4600
Configuration saved in ./pegasus_trained_model\checkpoint-4600\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-4600\pytorch_model.bin
 29%|██▉       | 4700/16053 [2:41:33<6:38:08,  2.10s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-4700
Configuration saved in ./pegasus_trained_model\checkpoint-4700\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-4700\pytorch_model.bin
 30%|██▉       | 4800/16053 [2:45:15<6:32:36,  2.09s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-4800
Configuration saved in ./pegasus_trained_model\checkpoint-4800\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-4800\pytorch_model.bin
 31%|███       | 4900/16053 [2:48:56<6:31:01,  2.10s/it] Saving model checkpoin

{'loss': 1.5528, 'learning_rate': 3.4426586930791755e-05, 'epoch': 0.93}


Model weights saved in ./pegasus_trained_model\checkpoint-5000\pytorch_model.bin
 32%|███▏      | 5100/16053 [2:56:19<6:25:57,  2.11s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-5100
Configuration saved in ./pegasus_trained_model\checkpoint-5100\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-5100\pytorch_model.bin
 32%|███▏      | 5200/16053 [3:00:00<6:19:06,  2.10s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-5200
Configuration saved in ./pegasus_trained_model\checkpoint-5200\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-5200\pytorch_model.bin
 33%|███▎      | 5300/16053 [3:03:43<6:17:28,  2.11s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-5300
Configuration saved in ./pegasus_trained_model\checkpoint-5300\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-5300\pytorch_model.bin
 34%|███▎      | 5400/16053 [3:07:26<6:12:15,  2.10s/it] Saving model checkpoin

{'loss': 1.5249, 'learning_rate': 3.286924562387093e-05, 'epoch': 1.03}


Model weights saved in ./pegasus_trained_model\checkpoint-5500\pytorch_model.bin
 35%|███▍      | 5600/16053 [3:14:49<6:06:39,  2.10s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-5600
Configuration saved in ./pegasus_trained_model\checkpoint-5600\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-5600\pytorch_model.bin
 36%|███▌      | 5700/16053 [3:18:30<5:59:13,  2.08s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-5700
Configuration saved in ./pegasus_trained_model\checkpoint-5700\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-5700\pytorch_model.bin
 36%|███▌      | 5800/16053 [3:22:13<6:02:25,  2.12s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-5800
Configuration saved in ./pegasus_trained_model\checkpoint-5800\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-5800\pytorch_model.bin
 37%|███▋      | 5900/16053 [3:25:57<5:57:27,  2.11s/it] Saving model checkpoin

{'loss': 1.4855, 'learning_rate': 3.1311904316950105e-05, 'epoch': 1.12}


Model weights saved in ./pegasus_trained_model\checkpoint-6000\pytorch_model.bin
 38%|███▊      | 6100/16053 [3:33:26<5:50:50,  2.12s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-6100
Configuration saved in ./pegasus_trained_model\checkpoint-6100\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-6100\pytorch_model.bin
 39%|███▊      | 6200/16053 [3:37:10<5:54:15,  2.16s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-6200
Configuration saved in ./pegasus_trained_model\checkpoint-6200\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-6200\pytorch_model.bin
 39%|███▉      | 6300/16053 [3:40:58<5:51:40,  2.16s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-6300
Configuration saved in ./pegasus_trained_model\checkpoint-6300\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-6300\pytorch_model.bin
 40%|███▉      | 6400/16053 [3:44:44<5:45:45,  2.15s/it] Saving model checkpoin

{'loss': 1.4751, 'learning_rate': 2.975456301002928e-05, 'epoch': 1.21}


Model weights saved in ./pegasus_trained_model\checkpoint-6500\pytorch_model.bin
 41%|████      | 6600/16053 [3:52:19<5:37:21,  2.14s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-6600
Configuration saved in ./pegasus_trained_model\checkpoint-6600\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-6600\pytorch_model.bin
 42%|████▏     | 6700/16053 [3:55:38<4:44:32,  1.83s/it] Saving model checkpoint to ./pegasus_trained_model\checkpoint-6700
Configuration saved in ./pegasus_trained_model\checkpoint-6700\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-6700\pytorch_model.bin
 42%|████▏     | 6800/16053 [3:58:49<4:47:23,  1.86s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-6800
Configuration saved in ./pegasus_trained_model\checkpoint-6800\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-6800\pytorch_model.bin
 43%|████▎     | 6900/16053 [6:03:22<4:44:11,  1.86s/it]     Saving model checkp

{'loss': 1.4699, 'learning_rate': 2.8197221703108455e-05, 'epoch': 1.31}


Model weights saved in ./pegasus_trained_model\checkpoint-7000\pytorch_model.bin
 44%|████▍     | 7100/16053 [6:10:12<4:59:47,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-7100
Configuration saved in ./pegasus_trained_model\checkpoint-7100\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-7100\pytorch_model.bin
 45%|████▍     | 7200/16053 [6:13:38<4:56:33,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-7200
Configuration saved in ./pegasus_trained_model\checkpoint-7200\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-7200\pytorch_model.bin
 45%|████▌     | 7300/16053 [6:17:04<4:52:46,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-7300
Configuration saved in ./pegasus_trained_model\checkpoint-7300\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-7300\pytorch_model.bin
 46%|████▌     | 7400/16053 [6:20:29<4:50:07,  2.01s/it]Saving model checkpoint to

{'loss': 1.4441, 'learning_rate': 2.6639880396187627e-05, 'epoch': 1.4}


Model weights saved in ./pegasus_trained_model\checkpoint-7500\pytorch_model.bin
 47%|████▋     | 7600/16053 [6:27:21<4:43:00,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-7600
Configuration saved in ./pegasus_trained_model\checkpoint-7600\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-7600\pytorch_model.bin
 48%|████▊     | 7700/16053 [6:30:46<4:38:26,  2.00s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-7700
Configuration saved in ./pegasus_trained_model\checkpoint-7700\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-7700\pytorch_model.bin
 49%|████▊     | 7800/16053 [6:34:12<4:36:00,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-7800
Configuration saved in ./pegasus_trained_model\checkpoint-7800\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-7800\pytorch_model.bin
 49%|████▉     | 7900/16053 [6:37:37<4:33:20,  2.01s/it]Saving model checkpoint to

{'loss': 1.4539, 'learning_rate': 2.5082539089266806e-05, 'epoch': 1.5}


Model weights saved in ./pegasus_trained_model\checkpoint-8000\pytorch_model.bin
 50%|█████     | 8100/16053 [6:44:29<4:26:24,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-8100
Configuration saved in ./pegasus_trained_model\checkpoint-8100\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-8100\pytorch_model.bin
 51%|█████     | 8200/16053 [6:47:55<4:23:29,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-8200
Configuration saved in ./pegasus_trained_model\checkpoint-8200\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-8200\pytorch_model.bin
 52%|█████▏    | 8300/16053 [6:51:22<4:20:15,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-8300
Configuration saved in ./pegasus_trained_model\checkpoint-8300\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-8300\pytorch_model.bin
 52%|█████▏    | 8400/16053 [6:54:47<4:15:52,  2.01s/it]Saving model checkpoint to

{'loss': 1.4177, 'learning_rate': 2.352519778234598e-05, 'epoch': 1.59}


Model weights saved in ./pegasus_trained_model\checkpoint-8500\pytorch_model.bin
 54%|█████▎    | 8600/16053 [7:01:40<4:10:02,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-8600
Configuration saved in ./pegasus_trained_model\checkpoint-8600\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-8600\pytorch_model.bin
 54%|█████▍    | 8700/16053 [7:05:05<4:06:06,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-8700
Configuration saved in ./pegasus_trained_model\checkpoint-8700\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-8700\pytorch_model.bin
 55%|█████▍    | 8800/16053 [7:08:32<4:02:28,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-8800
Configuration saved in ./pegasus_trained_model\checkpoint-8800\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-8800\pytorch_model.bin
 55%|█████▌    | 8900/16053 [7:11:58<4:00:52,  2.02s/it]Saving model checkpoint to

{'loss': 1.4327, 'learning_rate': 2.1967856475425156e-05, 'epoch': 1.68}


Model weights saved in ./pegasus_trained_model\checkpoint-9000\pytorch_model.bin
 57%|█████▋    | 9100/16053 [7:18:51<3:53:10,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-9100
Configuration saved in ./pegasus_trained_model\checkpoint-9100\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-9100\pytorch_model.bin
 57%|█████▋    | 9200/16053 [7:22:19<3:49:56,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-9200
Configuration saved in ./pegasus_trained_model\checkpoint-9200\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-9200\pytorch_model.bin
 58%|█████▊    | 9300/16053 [7:25:45<3:46:43,  2.01s/it]Saving model checkpoint to ./pegasus_trained_model\checkpoint-9300
Configuration saved in ./pegasus_trained_model\checkpoint-9300\config.json
Model weights saved in ./pegasus_trained_model\checkpoint-9300\pytorch_model.bin


RuntimeError: [enforce fail at inline_container.cc:424] . unexpected pos 3628774272 vs 3628774160

In [None]:

# Generate summaries for the test data
batch_size = 4  # Adjust batch size as needed
test_data = pd.read_csv("data/test_text.csv")
num_batches = (len(test_data) + batch_size - 1) // batch_size

generated_summaries = []

# Set the model to evaluation mode
model_pegasus.eval()

with torch.no_grad():
    for i in range(num_batches):
        batch_data = test_data.iloc[i * batch_size: (i + 1) * batch_size]

        # Tokenize the input texts
        input_encodings = tokenizer(batch_data["text"].tolist(), truncation=True, padding=True, return_tensors="pt").to(device)

        # Generate summaries
        outputs = model_pegasus.generate(input_encodings["input_ids"], max_length=50, num_beams=4, early_stopping=True)

        # Decode the generated summaries
        decoded_summaries = tokenizer.batch_decode(outputs, skip_special_tokens=True)

        generated_summaries.extend(decoded_summaries)

# Create a DataFrame for the submission
submission_df = pd.DataFrame({"ID": test_data["ID"], "titles": generated_summaries})

# Save the DataFrame to a CSV file
submission_df.to_csv("pegasus.csv", index=False)