In [1]:
# Run all matra thiche pugcha

In [2]:
import os
import re

# Path to the main directory containing checkpoints
base_dir = "results"

# List all folders in the directory
folders = os.listdir(base_dir)

# Filter for checkpoint folders and extract their numbers
checkpoints = [
    (folder, int(re.search(r"checkpoint-(\d+)", folder).group(1)))
    for folder in folders
    if re.match(r"checkpoint-\d+", folder)
]

# Get the folder with the highest checkpoint number
max_checkpoint = max(checkpoints, key=lambda x: x[1])[0]

# Full path to the checkpoint directory
checkpoint_path = "./results/" + max_checkpoint

# Output
print("Max checkpoint path:", checkpoint_path)
# checkpoint_path = "./results/checkpoint-41000"


Max checkpoint path: ./results/checkpoint-80000


In [3]:
# !pip install transformers==4.41.1 datasets==2.18.0 evaluate==0.4.1 torch==2.3.0 accelerate==0.31.0

In [4]:
import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

In [5]:

from datasets import load_dataset

# Load arXiv summarization dataset
dataset = load_dataset("ccdv/arxiv-summarization")
# dataset = dataset.select(range(1000))   # remove later
sample = dataset["train"][0]
print("Article:", sample["article"][:500])
print("Summary:", sample["abstract"])


  from .autonotebook import tqdm as notebook_tqdm


Article: additive models @xcite provide an important family of models for semiparametric regression or classification . some reasons for the success of additive models are their increased flexibility when compared to linear or generalized linear models and their increased interpretability when compared to fully nonparametric models . 
 it is well - known that good estimators in additive models are in general less prone to the curse of high dimensionality than good estimators in fully nonparametric models
Summary: additive models play an important role in semiparametric statistics . 
 this paper gives learning rates for regularized kernel based methods for additive models . 
 these learning rates compare favourably in particular in high dimensions to recent results on optimal learning rates for purely nonparametric regularized kernel based quantile regression using the gaussian radial basis function kernel , provided the assumption of an additive model is valid . 
 additionally , a conc

In [6]:

# from transformers import LongT5ForConditionalGeneration, AutoTokenizer

# model_name = "google/long-t5-tglobal-base"
# tokenizer = AutoTokenizer.from_pretrained(model_name)
# model = LongT5ForConditionalGeneration.from_pretrained(model_name)
# model.config.use_cache = False
# model.gradient_checkpointing_enable() 


In [7]:
from transformers import LongT5ForConditionalGeneration, AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained(checkpoint_path)
model = LongT5ForConditionalGeneration.from_pretrained(
    checkpoint_path,
    local_files_only=True,
    ignore_mismatched_sizes=True  # Optional: avoid shape mismatch crashes
)

model.config.use_cache = False
model.gradient_checkpointing_enable()


In [8]:
def preprocess_function(batch):
    inputs = ["summarize: " + doc for doc in batch["article"]]
    model_inputs = tokenizer(inputs, max_length=4096, truncation=True, padding="max_length")

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(batch["abstract"], max_length=256, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs


In [9]:
# Assuming your dataset has a 'train' split
# tokenized_dataset = dataset["train"].map(
#     preprocess_function,
#     batched=True,
#     remove_columns=["article", "abstract"]
# )

# tokenized_dataset.save_to_disk("tokenized_dataset")
from datasets import load_from_disk
tokenized_dataset = load_from_disk("tokenized_dataset")

# Then perform train-test split on the tokenized dataset
split_dataset = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]
# 53 minutes

In [10]:
from transformers import DataCollatorForSeq2Seq
from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)


def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    decoded_preds = tokenizer.batch_decode(predictions, skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    accumulated_fmeasures = {
        'rouge1': 0.0,
        'rouge2': 0.0,
        'rougeL': 0.0, 
    }
    num_samples = 0

    for pred_text, ref_text in zip(decoded_preds, decoded_labels):
        scores = scorer.score(ref_text, pred_text)

        for key in accumulated_fmeasures:
            if key in scores:
                accumulated_fmeasures[key] += scores[key].fmeasure
        num_samples += 1

    average_metrics = {}
    if num_samples > 0:
        for key, total_fmeasure in accumulated_fmeasures.items():
            average_metrics[key] = (total_fmeasure / num_samples) * 100

    return average_metrics

data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model=model, label_pad_token_id=-100)

In [11]:
import torch
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0)) 
print(torch.backends.cudnn.version()) 

2.5.1+cu121
12.1
True
NVIDIA GeForce RTX 3060
90100


In [12]:
import torch

def print_gpu_memory():
    if torch.cuda.is_available():
        print("Allocated:", torch.cuda.memory_allocated() / 1024**3, "GB")
        print("Cached:   ", torch.cuda.memory_reserved() / 1024**3, "GB")
    else:
        print("CUDA not available.")

print_gpu_memory()

Allocated: 0.0 GB
Cached:    0.0 GB


In [13]:
from transformers import TrainingArguments
import os

# Make sure results directory exists and is writable
os.makedirs("./results", exist_ok=True)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="no",
    # eval_steps=5000,                      # Reduce frequency if not needed
    save_strategy="steps",
    save_steps=1000,                      # Keep it if you want frequent saves
    save_total_limit=2,
    load_best_model_at_end=False,         # OPTIONAL: Turn off to simplify
    metric_for_best_model="rougeL",
    greater_is_better=True,
    learning_rate=1e-5,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    eval_accumulation_steps=4,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=200,
    # fp16=torch.cuda.is_available(),
    fp16=False,
    gradient_accumulation_steps=2,
    max_grad_norm=1.0,
    disable_tqdm=False,                   # Enable tqdm to check live logs
    report_to=[],                         # Avoid WandB etc.
    save_safetensors=True,                # ✅ Save in safer format
)




In [14]:
# import torch
# torch.cuda.empty_cache()
# from transformers import TrainingArguments


# training_args = TrainingArguments(
#     output_dir="./results",
#     evaluation_strategy="steps",
#     eval_steps=1000,
#     save_strategy="steps",
#     save_steps=1000, 
#     save_total_limit=2,
#     load_best_model_at_end=True,
#     metric_for_best_model="rougeL",
#     greater_is_better=True,
#     learning_rate=3e-5,
#     per_device_train_batch_size=1,
#     per_device_eval_batch_size=1,
#     eval_accumulation_steps=4, 
#     num_train_epochs=1,
#     weight_decay=0.01,
#     logging_dir="./logs",
#     logging_steps=200,

#     fp16=torch.cuda.is_available(),
#     gradient_accumulation_steps=2,
#     max_grad_norm=1.0,
# )




In [15]:
from transformers import TrainerCallback
import torch

class ClipNanGradientsCallback(TrainerCallback):    
    def on_step_end(self, args, state, control, model=None, **kwargs):
        if model is not None:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)


In [16]:

from transformers import Trainer, EarlyStoppingCallback

from transformers import Trainer

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=None,
    callbacks=[ClipNanGradientsCallback()],
)

  return t.to(


In [17]:
# trainer.train()

# trainer.train(resume_from_checkpoint=True)
# checkpoint_path = "./results/checkpoint-7000"
trainer.train(resume_from_checkpoint=checkpoint_path)



There were missing keys in the checkpoint model loaded: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight'].
  torch.load(os.path.join(checkpoint, OPTIMIZER_NAME), map_location=map_location)
  checkpoint_rng_state = torch.load(rng_file)
 88%|████████▊ | 80200/91366 [09:15<8:31:54,  2.75s/it] 

{'loss': 1.6468, 'grad_norm': 1.8130735158920288, 'learning_rate': 1.222117636757656e-06, 'epoch': 0.88}


 88%|████████▊ | 80400/91366 [18:27<8:32:18,  2.80s/it]

{'loss': 1.5932, 'grad_norm': 1.369865894317627, 'learning_rate': 1.2002276558019397e-06, 'epoch': 0.88}


 88%|████████▊ | 80600/91366 [27:38<8:02:15,  2.69s/it]

{'loss': 1.6272, 'grad_norm': 1.2533254623413086, 'learning_rate': 1.178337674846223e-06, 'epoch': 0.88}


 88%|████████▊ | 80800/91366 [36:47<8:00:49,  2.73s/it]

{'loss': 1.6607, 'grad_norm': 1.7352581024169922, 'learning_rate': 1.1564476938905064e-06, 'epoch': 0.88}


 89%|████████▊ | 81000/91366 [45:57<8:05:29,  2.81s/it]

{'loss': 1.7128, 'grad_norm': 1.3939071893692017, 'learning_rate': 1.1345577129347898e-06, 'epoch': 0.89}


 89%|████████▉ | 81200/91366 [55:46<7:55:25,  2.81s/it] 

{'loss': 1.6437, 'grad_norm': 1.634934902191162, 'learning_rate': 1.1126677319790732e-06, 'epoch': 0.89}


 89%|████████▉ | 81400/91366 [1:05:08<7:43:45,  2.79s/it]

{'loss': 1.6044, 'grad_norm': 1.3535339832305908, 'learning_rate': 1.0907777510233566e-06, 'epoch': 0.89}


 89%|████████▉ | 81600/91366 [1:14:28<7:33:05,  2.78s/it]

{'loss': 1.5926, 'grad_norm': 1.5656934976577759, 'learning_rate': 1.0688877700676402e-06, 'epoch': 0.89}


 90%|████████▉ | 81800/91366 [1:23:49<7:25:55,  2.80s/it]

{'loss': 1.641, 'grad_norm': 1.3374749422073364, 'learning_rate': 1.0469977891119236e-06, 'epoch': 0.9}


 90%|████████▉ | 82000/91366 [1:33:16<7:28:29,  2.87s/it]

{'loss': 1.6186, 'grad_norm': 1.4941370487213135, 'learning_rate': 1.025107808156207e-06, 'epoch': 0.9}


 90%|████████▉ | 82200/91366 [1:43:24<7:24:25,  2.91s/it] 

{'loss': 1.687, 'grad_norm': 1.304487943649292, 'learning_rate': 1.0032178272004903e-06, 'epoch': 0.9}


 90%|█████████ | 82400/91366 [1:53:00<7:13:16,  2.90s/it]

{'loss': 1.6383, 'grad_norm': 1.5726165771484375, 'learning_rate': 9.813278462447737e-07, 'epoch': 0.9}


 90%|█████████ | 82600/91366 [2:02:37<7:01:06,  2.88s/it]

{'loss': 1.6089, 'grad_norm': 1.3715708255767822, 'learning_rate': 9.594378652890573e-07, 'epoch': 0.9}


 91%|█████████ | 82800/91366 [2:12:12<6:55:54,  2.91s/it]

{'loss': 1.6628, 'grad_norm': 1.4764100313186646, 'learning_rate': 9.375478843333408e-07, 'epoch': 0.91}


 91%|█████████ | 83000/91366 [2:21:50<6:45:12,  2.91s/it]

{'loss': 1.6189, 'grad_norm': 1.687374234199524, 'learning_rate': 9.156579033776242e-07, 'epoch': 0.91}


 91%|█████████ | 83200/91366 [2:31:57<6:33:06,  2.89s/it] 

{'loss': 1.6254, 'grad_norm': 1.6155369281768799, 'learning_rate': 8.937679224219076e-07, 'epoch': 0.91}


 91%|█████████▏| 83400/91366 [2:41:34<6:19:46,  2.86s/it]

{'loss': 1.6239, 'grad_norm': 1.486980676651001, 'learning_rate': 8.718779414661909e-07, 'epoch': 0.91}


 92%|█████████▏| 83600/91366 [2:51:11<6:15:31,  2.90s/it]

{'loss': 1.6357, 'grad_norm': 1.3865106105804443, 'learning_rate': 8.499879605104744e-07, 'epoch': 0.91}


 92%|█████████▏| 83800/91366 [3:00:48<6:01:25,  2.87s/it]

{'loss': 1.5772, 'grad_norm': 1.5937201976776123, 'learning_rate': 8.280979795547578e-07, 'epoch': 0.92}


 92%|█████████▏| 84000/91366 [3:10:24<5:55:44,  2.90s/it]

{'loss': 1.6719, 'grad_norm': 1.3160320520401, 'learning_rate': 8.062079985990413e-07, 'epoch': 0.92}


 92%|█████████▏| 84200/91366 [3:20:33<5:46:36,  2.90s/it] 

{'loss': 1.6569, 'grad_norm': 1.6407986879348755, 'learning_rate': 7.843180176433248e-07, 'epoch': 0.92}


 92%|█████████▏| 84400/91366 [3:30:11<5:36:36,  2.90s/it]

{'loss': 1.627, 'grad_norm': 1.079128384590149, 'learning_rate': 7.624280366876082e-07, 'epoch': 0.92}


 93%|█████████▎| 84600/91366 [3:39:48<5:24:44,  2.88s/it]

{'loss': 1.6153, 'grad_norm': 1.6658878326416016, 'learning_rate': 7.405380557318915e-07, 'epoch': 0.93}


 93%|█████████▎| 84800/91366 [3:49:24<5:14:19,  2.87s/it]

{'loss': 1.7049, 'grad_norm': 1.5624920129776, 'learning_rate': 7.186480747761749e-07, 'epoch': 0.93}


 93%|█████████▎| 85000/91366 [3:59:00<5:04:51,  2.87s/it]

{'loss': 1.6698, 'grad_norm': 1.0417295694351196, 'learning_rate': 6.967580938204583e-07, 'epoch': 0.93}


 93%|█████████▎| 85200/91366 [4:09:05<4:56:12,  2.88s/it] 

{'loss': 1.6684, 'grad_norm': 1.2860658168792725, 'learning_rate': 6.748681128647419e-07, 'epoch': 0.93}


 93%|█████████▎| 85400/91366 [4:18:42<4:49:32,  2.91s/it]

{'loss': 1.6486, 'grad_norm': 1.6418567895889282, 'learning_rate': 6.529781319090253e-07, 'epoch': 0.93}


 94%|█████████▎| 85600/91366 [4:28:19<4:38:33,  2.90s/it]

{'loss': 1.6406, 'grad_norm': 1.2053253650665283, 'learning_rate': 6.310881509533087e-07, 'epoch': 0.94}


 94%|█████████▍| 85800/91366 [4:37:56<4:28:23,  2.89s/it]

{'loss': 1.6254, 'grad_norm': 1.635663390159607, 'learning_rate': 6.091981699975922e-07, 'epoch': 0.94}


 94%|█████████▍| 86000/91366 [4:47:32<4:18:25,  2.89s/it]

{'loss': 1.6341, 'grad_norm': 1.610552430152893, 'learning_rate': 5.873081890418756e-07, 'epoch': 0.94}


 94%|█████████▍| 86200/91366 [4:57:42<4:09:37,  2.90s/it] 

{'loss': 1.633, 'grad_norm': 1.496829628944397, 'learning_rate': 5.65418208086159e-07, 'epoch': 0.94}


 95%|█████████▍| 86400/91366 [5:07:21<3:58:57,  2.89s/it]

{'loss': 1.749, 'grad_norm': 1.4909229278564453, 'learning_rate': 5.435282271304424e-07, 'epoch': 0.95}


 95%|█████████▍| 86600/91366 [5:16:58<3:49:06,  2.88s/it]

{'loss': 1.6694, 'grad_norm': 1.6730926036834717, 'learning_rate': 5.216382461747259e-07, 'epoch': 0.95}


 95%|█████████▌| 86800/91366 [5:26:34<3:39:41,  2.89s/it]

{'loss': 1.6696, 'grad_norm': 1.9060451984405518, 'learning_rate': 4.997482652190093e-07, 'epoch': 0.95}


 95%|█████████▌| 87000/91366 [5:36:10<3:30:16,  2.89s/it]

{'loss': 1.6269, 'grad_norm': 1.4555584192276, 'learning_rate': 4.778582842632927e-07, 'epoch': 0.95}


 95%|█████████▌| 87200/91366 [5:46:16<3:20:36,  2.89s/it] 

{'loss': 1.6387, 'grad_norm': 1.6321263313293457, 'learning_rate': 4.5596830330757615e-07, 'epoch': 0.95}


 96%|█████████▌| 87400/91366 [5:55:55<3:13:39,  2.93s/it]

{'loss': 1.635, 'grad_norm': 1.9227468967437744, 'learning_rate': 4.340783223518596e-07, 'epoch': 0.96}


 96%|█████████▌| 87600/91366 [6:05:31<3:01:02,  2.88s/it]

{'loss': 1.6246, 'grad_norm': 1.3618534803390503, 'learning_rate': 4.1218834139614297e-07, 'epoch': 0.96}


 96%|█████████▌| 87800/91366 [6:15:08<2:52:51,  2.91s/it]

{'loss': 1.5836, 'grad_norm': 1.379206895828247, 'learning_rate': 3.9029836044042646e-07, 'epoch': 0.96}


 96%|█████████▋| 88000/91366 [6:24:42<2:41:07,  2.87s/it]

{'loss': 1.6663, 'grad_norm': 1.9321445226669312, 'learning_rate': 3.684083794847099e-07, 'epoch': 0.96}


 97%|█████████▋| 88200/91366 [6:34:52<2:29:44,  2.84s/it] 

{'loss': 1.6345, 'grad_norm': 1.5059466361999512, 'learning_rate': 3.465183985289933e-07, 'epoch': 0.97}


 97%|█████████▋| 88400/91366 [6:44:29<2:23:35,  2.90s/it]

{'loss': 1.6238, 'grad_norm': 0.7463447451591492, 'learning_rate': 3.2462841757327676e-07, 'epoch': 0.97}


 97%|█████████▋| 88600/91366 [6:54:07<2:13:43,  2.90s/it]

{'loss': 1.6203, 'grad_norm': 1.728411316871643, 'learning_rate': 3.0273843661756014e-07, 'epoch': 0.97}


 97%|█████████▋| 88800/91366 [7:03:43<2:03:04,  2.88s/it]

{'loss': 1.5867, 'grad_norm': 1.6074610948562622, 'learning_rate': 2.808484556618436e-07, 'epoch': 0.97}


 97%|█████████▋| 89000/91366 [7:13:20<1:52:54,  2.86s/it]

{'loss': 1.662, 'grad_norm': 1.4596309661865234, 'learning_rate': 2.58958474706127e-07, 'epoch': 0.97}


 98%|█████████▊| 89200/91366 [7:23:28<1:44:44,  2.90s/it]

{'loss': 1.648, 'grad_norm': 1.2075036764144897, 'learning_rate': 2.3706849375041047e-07, 'epoch': 0.98}


 98%|█████████▊| 89400/91366 [7:33:05<1:35:10,  2.90s/it]

{'loss': 1.668, 'grad_norm': 1.3779888153076172, 'learning_rate': 2.1517851279469388e-07, 'epoch': 0.98}


 98%|█████████▊| 89600/91366 [7:42:42<1:24:55,  2.89s/it]

{'loss': 1.6368, 'grad_norm': 1.5803570747375488, 'learning_rate': 1.9328853183897732e-07, 'epoch': 0.98}


 98%|█████████▊| 89800/91366 [7:52:19<1:15:05,  2.88s/it]

{'loss': 1.659, 'grad_norm': 1.4741765260696411, 'learning_rate': 1.7139855088326075e-07, 'epoch': 0.98}


 99%|█████████▊| 90000/91366 [8:01:55<1:05:49,  2.89s/it]

{'loss': 1.5966, 'grad_norm': 1.3723140954971313, 'learning_rate': 1.495085699275442e-07, 'epoch': 0.99}


 99%|█████████▊| 90200/91366 [8:12:06<55:09,  2.84s/it]  

{'loss': 1.6433, 'grad_norm': 1.2734404802322388, 'learning_rate': 1.276185889718276e-07, 'epoch': 0.99}


 99%|█████████▉| 90400/91366 [8:21:43<46:18,  2.88s/it]

{'loss': 1.6783, 'grad_norm': 1.3360705375671387, 'learning_rate': 1.0572860801611103e-07, 'epoch': 0.99}


 99%|█████████▉| 90600/91366 [8:31:20<36:59,  2.90s/it]

{'loss': 1.6005, 'grad_norm': 1.5221649408340454, 'learning_rate': 8.383862706039447e-08, 'epoch': 0.99}


 99%|█████████▉| 90800/91366 [8:40:55<27:14,  2.89s/it]

{'loss': 1.6632, 'grad_norm': 1.6639751195907593, 'learning_rate': 6.194864610467789e-08, 'epoch': 0.99}


100%|█████████▉| 91000/91366 [8:50:29<17:35,  2.88s/it]

{'loss': 1.6077, 'grad_norm': 5.3683390617370605, 'learning_rate': 4.005866514896132e-08, 'epoch': 1.0}


100%|█████████▉| 91200/91366 [9:00:36<07:58,  2.88s/it]  

{'loss': 1.6016, 'grad_norm': 1.4305497407913208, 'learning_rate': 1.8168684193244754e-08, 'epoch': 1.0}


100%|██████████| 91366/91366 [9:08:35<00:00,  2.78it/s]

{'train_runtime': 32915.9273, 'train_samples_per_second': 5.552, 'train_steps_per_second': 2.776, 'train_loss': 0.2038577084595285, 'epoch': 1.0}





TrainOutput(global_step=91366, training_loss=0.2038577084595285, metrics={'train_runtime': 32915.9273, 'train_samples_per_second': 5.552, 'train_steps_per_second': 2.776, 'total_flos': 1.001058031632384e+18, 'train_loss': 0.2038577084595285, 'epoch': 0.9999945275347091})

In [18]:

trainer.save_model("./longt5_best_model")
tokenizer.save_pretrained("./longt5_best_model")


('./longt5_best_model\\tokenizer_config.json',
 './longt5_best_model\\special_tokens_map.json',
 './longt5_best_model\\tokenizer.json')

In [None]:
evaluate

In [3]:
from transformers import LongT5ForConditionalGeneration, AutoTokenizer
from datasets import load_dataset
import torch

# Load dataset
dataset = load_dataset("ccdv/arxiv-summarization")

# Load fine-tuned model and tokenizer
model = LongT5ForConditionalGeneration.from_pretrained("./longt5_best_model")
tokenizer = AutoTokenizer.from_pretrained("./longt5_best_model")

# Move model to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Prepare input
text = dataset["test"][0]["article"]
input_text = "summarize: " + text
inputs = tokenizer(
    input_text,
    return_tensors="pt",
    max_length=4096,
    truncation=True
)

# Move input to the same device as model
inputs = {k: v.to(device) for k, v in inputs.items()}

# Generate summary
summary_ids = model.generate(
    inputs["input_ids"],
    max_length=256,
    min_length=30,
    length_penalty=2.0,
    num_beams=4,
    early_stopping=True
)

# Decode summary
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print("\nGenerated Summary:\n", summary)






Generated Summary:
 in this paper the problem of the existence of the periodicity of about 155 days during the maximum activity period for sunspot data from 1923 - 1933 ( cycle 16 ) is considered. the daily sunspot areas, the mean sunspot areas per carrington rotation, the monthly sunspot numbers and their fluctuations, which are obtained after removing the 11-year cycle are analysed. a new method of the diagnosis of an echo - effect in the power spectrum is presented. numerical results of the new method are presented.


In [9]:
from transformers import LongT5ForConditionalGeneration, AutoTokenizer
from datasets import load_dataset
from rouge_score import rouge_scorer
import torch
from tqdm import tqdm

# Load dataset
dataset = load_dataset("ccdv/arxiv-summarization", split="test")

# Load model and tokenizer
model = LongT5ForConditionalGeneration.from_pretrained("./longt5_best_model")
tokenizer = AutoTokenizer.from_pretrained("./longt5_best_model")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Initialize ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Evaluation loop
n_samples = 100  # You can increase to 100 or full len(dataset) for full evaluation
scores = {"rouge1": [], "rouge2": [], "rougeL": []}

for i in tqdm(range(n_samples), desc="Evaluating"):
    article = dataset[i]["article"]
    reference = dataset[i]["abstract"]

    input_text = "summarize: " + article
    inputs = tokenizer(input_text, return_tensors="pt", max_length=4096, truncation=True).to(device)

    summary_ids = model.generate(
        inputs["input_ids"],
        max_length=256,
        min_length=30,
        length_penalty=2.0,
        num_beams=4,
        early_stopping=True
    )

    predicted = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    # Compute ROUGE scores
    score = scorer.score(reference, predicted)
    for key in scores:
        scores[key].append(score[key].fmeasure)

# Average scores
avg_scores = {key: sum(values) / len(values) for key, values in scores.items()}
print("\nAverage ROUGE Scores on Test Set:")
for key, value in avg_scores.items():
    print(f"{key}: {value:.4f}")


Evaluating: 100%|██████████| 100/100 [10:21<00:00,  6.21s/it]


Average ROUGE Scores on Test Set:
rouge1: 0.4047
rouge2: 0.1505
rougeL: 0.2413





In [None]:
from transformers import LongT5ForConditionalGeneration, AutoTokenizer
from rouge_score import rouge_scorer
import torch


text_to_summarize = """
In this paper the problem of the existence of the periodicity of about 155 days during the maximum activity period 
for sunspot data from 1923 - 1933 (cycle 16) is considered. The daily sunspot areas, the mean sunspot areas per 
Carrington rotation, the monthly sunspot numbers and their fluctuations, which are obtained after removing the 11-year 
cycle are analysed. A new method of the diagnosis of an echo-effect in the power spectrum is presented. Numerical results 
of the new method are presented.
"""

reference_summary = """The paper explores the periodicity of approximately 155 days in sunspot activity during 1923–1933, using various data and a new diagnostic method for echo effects in power spectra."""


# Load model and tokenizer
model = LongT5ForConditionalGeneration.from_pretrained("./longt5_best_model")
tokenizer = AutoTokenizer.from_pretrained("./longt5_best_model")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()

# Preprocess and generate
input_text = "summarize: " + text_to_summarize
inputs = tokenizer(input_text, return_tensors="pt", max_length=4096, truncation=True).to(device)

summary_ids = model.generate(
    inputs["input_ids"],
    max_length=256,
    min_length=30,
    length_penalty=2.0,
    repetition_penalty=1.2,
    num_beams=4,
    early_stopping=True
)

generated_summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

print("\nGenerated Summary:\n", generated_summary)

# Evaluate with ROUGE
if reference_summary:
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
    score = scorer.score(reference_summary, generated_summary)

    print("\nROUGE Scores:")
    for k, v in score.items():
        print(f"{k}: {v.fmeasure:.4f}")



Generated Summary:
 sunspot data from 1923 - 1933 (cycle 16) are analysed. a new method of the diagnosis of an echo-effect in the power spectrum is presented. numerical results of the new method are presented.

ROUGE Scores:
rouge1: 0.4516
rouge2: 0.1333
rougeL: 0.3226


In [6]:
! pip list

Package                      Version
---------------------------- ------------
absl-py                      2.3.1
accelerate                   0.31.0
aiohappyeyeballs             2.6.1
aiohttp                      3.12.15
aiosignal                    1.4.0
asttokens                    3.0.0
astunparse                   1.6.3
async-timeout                5.0.1
attrs                        25.3.0
certifi                      2025.7.14
charset-normalizer           3.4.2
click                        8.2.1
colorama                     0.4.6
comm                         0.2.2
datasets                     2.18.0
debugpy                      1.8.14
decorator                    5.2.1
dill                         0.3.8
evaluate                     0.4.1
exceptiongroup               1.3.0
executing                    2.2.0
filelock                     3.18.0
flatbuffers                  25.2.10
frozenlist                   1.7.0
fsspec                       2024.2.0
gast                         0