In [None]:
import os
import torch
from transformers import T5Tokenizer, T5ForConditionalGeneration
from torch.utils.data import DataLoader, Dataset
import torch.optim as optim

# Dataset Class
class SummarizationDataset(Dataset):
    def __init__(self, tokenizer, data_path, max_length=256):
        self.tokenizer = tokenizer
        self.inputs, self.targets = self._load_dataset(data_path)
        self.max_length = max_length

    def _load_dataset(self, path):
        inputs, targets = [], []
        judgement_path = os.path.join(path, "train-data", "judgement")
        summary_path = os.path.join(path, "train-data", "summary")

        for fname in os.listdir(judgement_path):
            with open(os.path.join(judgement_path, fname), 'r', encoding='utf-8') as f:
                inputs.append(f.read())
            with open(os.path.join(summary_path, fname), 'r', encoding='utf-8') as f:
                targets.append(f.read())
        return inputs, targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_enc = self.tokenizer.encode_plus(
            self.inputs[idx], max_length=self.max_length, padding="max_length", truncation=True, return_tensors="pt"
        )
        target_enc = self.tokenizer.encode_plus(
            self.targets[idx], max_length=self.max_length, padding="max_length", truncation=True, return_tensors="pt"
        )
        return input_enc['input_ids'].squeeze(), target_enc['input_ids'].squeeze()

# Load T5 Model and Tokenizer
MODEL_NAME = "t5-small"
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
model = T5ForConditionalGeneration.from_pretrained(MODEL_NAME)

# Dataset and Dataloader
DATASET_PATH = r"C:\\Users\\Ramachandra\\OneDrive\\Desktop\\FYP\\dataset (3)\\dataset\\IN-Abs"
dataset = SummarizationDataset(tokenizer, DATASET_PATH)
dataloader = DataLoader(dataset, batch_size=2, shuffle=False)

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training and Accuracy Calculation
total_correct = 0
total_tokens = 0

model.eval()  # No training, only evaluation
for src, tgt in dataloader:
    src, tgt = src.to(device), tgt.to(device)
    tgt_input = tgt[:, :-1]
    tgt_output = tgt[:, 1:]

    with torch.no_grad():
        outputs = model(input_ids=src, labels=tgt_input)
        predictions = outputs.logits.argmax(dim=-1)

    mask = tgt_output != tokenizer.pad_token_id
    correct = (predictions == tgt_output) & mask
    total_correct += correct.sum().item()
    total_tokens += mask.sum().item()

# Calculate and Print Accuracy
accuracy = (total_correct / total_tokens) * 100
print(f"Final Accuracy: {accuracy:.2f}%")
