In [8]:
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from datasets import load_dataset
import evaluate
import torch.nn.functional as F

# Ensure GPU usage if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Load pre-trained BART model and tokenizer
model = BartForConditionalGeneration.from_pretrained("facebook/bart-large").to(device)
tokenizer = BartTokenizer.from_pretrained("facebook/bart-large")

# Load the XSum dataset (subset for demonstration)
dataset = load_dataset("xsum", split='train[:100]')
rouge = evaluate.load("rouge")

# Compute reward using ROUGE scores
def compute_reward(preds, references):
    """
    Compute the reward using ROUGE scores.
    """
    rouge_scores = rouge.compute(predictions=preds, references=references)
    reward = rouge_scores["rouge1"]  # Use ROUGE-1 F1 score as reward
    return reward

# Supervised fine-tuning function (optional warm-up)
def supervised_fine_tune(model, tokenizer, dataset, epochs=10, lr=5e-5):
    """
    Supervised fine-tuning of the model using labeled data.
    """
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    model.train()
    
    for epoch in range(epochs):
        total_loss = 0
        for i, data in enumerate(dataset):
            input_text = data['document']
            reference_summary = data['summary']

            # Prepare inputs and labels
            inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True, padding=True).to(device)
            labels = tokenizer(reference_summary, return_tensors="pt", max_length=128, truncation=True, padding=True).input_ids.to(device)

            # Forward pass
            outputs = model(input_ids=inputs['input_ids'], labels=labels)
            loss = outputs.loss
            total_loss += loss.item()

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % 10 == 0:
                print(f"Step {i}, Loss: {loss.item()}")

        print(f"Epoch {epoch + 1} completed. Total Loss: {total_loss}")

# Fine-tuning with REINFORCE and Baseline Reward Smoothing
def fine_tune_with_reinforce(model, tokenizer, dataset, epochs=3, lr=5e-5, baseline_reward=0.1):
    """
    Fine-tune the model using REINFORCE with smoothed rewards and baseline subtraction.
    """
    optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
    model.train()

    for epoch in range(epochs):
        total_loss = 0
        print(f"Starting Epoch {epoch + 1}/{epochs}")
        
        for i, data in enumerate(dataset):
            input_text = data['document']
            reference_summary = data['summary']

            # Tokenize inputs
            inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True, padding=True).to(device)

            # Generate summary
            generated_ids = model.generate(
                inputs['input_ids'], max_length=60, min_length=10, length_penalty=2.0, num_beams=4
            )
            generated_summary = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

            # Compute reward
            reward = compute_reward([generated_summary], [reference_summary])
            smoothed_reward = max(reward - baseline_reward, 1e-3)  # Reward smoothing and baseline subtraction

            # Compute log probabilities of the generated summary
            outputs = model(input_ids=inputs['input_ids'], labels=generated_ids)
            logits = outputs.logits
            log_probs = F.log_softmax(logits, dim=-1)

            # Policy gradient loss
            loss = -smoothed_reward * log_probs.mean()
            total_loss += loss.item()

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            if i % 10 == 0:
                print(f"Step {i}, Loss: {loss.item()}, Reward: {reward}, Smoothed Reward: {smoothed_reward}")

        print(f"Epoch {epoch + 1} completed. Total Loss: {total_loss}")

# Evaluate the model
def evaluate_model(model, tokenizer, dataset):
    """
    Evaluate the fine-tuned model using ROUGE scores.
    """
    model.eval()
    predictions = []
    references = []

    for data in dataset:
        input_text = data['document']
        reference_summary = data['summary']

        # Tokenize input
        inputs = tokenizer(input_text, return_tensors="pt", max_length=1024, truncation=True, padding=True).to(device)

        # Generate summary
        generated_ids = model.generate(
            inputs['input_ids'], max_length=60, min_length=10, length_penalty=2.0, num_beams=4
        )
        generated_summary = tokenizer.decode(generated_ids[0], skip_special_tokens=True)

        predictions.append(generated_summary)
        references.append(reference_summary)

    # Compute ROUGE scores
    rouge_scores = rouge.compute(predictions=predictions, references=references)
    print("Evaluation ROUGE Scores:")
    for key, value in rouge_scores.items():
        print(f"{key}: {value}")

# Run the training process
train_data = dataset.select(range(50))  # Subset for fine-tuning
val_data = dataset.select(range(50, 100))  # Subset for evaluation

# Optional supervised fine-tuning
supervised_fine_tune(model, tokenizer, train_data, epochs=1, lr=5e-5)

# Reinforce fine-tuning with baseline and smoothing
fine_tune_with_reinforce(model, tokenizer, train_data, epochs=10, lr=5e-5, baseline_reward=0.1)

# Evaluate the fine-tuned model
evaluate_model(model, tokenizer, val_data)


Using device: cuda
Step 0, Loss: 2.5957894325256348
Step 10, Loss: 1.980647087097168
Step 20, Loss: 1.6816656589508057
Step 30, Loss: 2.129777431488037
Step 40, Loss: 1.3703655004501343
Epoch 1 completed. Total Loss: 144.40478575229645
Starting Epoch 1/10
Step 0, Loss: 7.667163848876953, Reward: 0.4864864864864865, Smoothed Reward: 0.38648648648648654
Step 10, Loss: 0.011640017852187157, Reward: 0.0, Smoothed Reward: 0.001
Step 20, Loss: 0.011866576038300991, Reward: 0.0, Smoothed Reward: 0.001
Step 30, Loss: 0.011917616240680218, Reward: 0.0, Smoothed Reward: 0.001
Step 40, Loss: 0.011769535019993782, Reward: 0.0, Smoothed Reward: 0.001
Epoch 1 completed. Total Loss: 21.425146535038948
Starting Epoch 2/10
Step 0, Loss: 0.011574244126677513, Reward: 0.0, Smoothed Reward: 0.001
Step 10, Loss: 0.011576224118471146, Reward: 0.0, Smoothed Reward: 0.001
Step 20, Loss: 0.011369286105036736, Reward: 0.0, Smoothed Reward: 0.001
Step 30, Loss: 0.011839451268315315, Reward: 0.0, Smoothed Reward: