In [12]:
from transformers import BartForConditionalGeneration, BartTokenizer, Trainer, TrainingArguments
from datasets import load_dataset, DatasetDict

# Load the Pre-Trained Model

In [2]:
model_name = "facebook/bart-large-cnn"
model = BartForConditionalGeneration.from_pretrained(model_name)
tokenizer = BartTokenizer.from_pretrained(model_name)

# Prepare the Dataset
Load your dataset and tokenize it:

In [31]:
dataset = load_dataset("json", data_files="loan_servicing_data.json")


Generating train split: 200 examples [00:00, 37967.81 examples/s]


In [32]:
dataset

DatasetDict({
    train: Dataset({
        features: ['input_text', 'summary'],
        num_rows: 200
    })
})

# Split the dataset into train and validation sets

In [33]:
split_dataset = dataset["train"].train_test_split(test_size=0.1)  # 10% for validation
tokenized_dataset = DatasetDict({
    "train": split_dataset["train"],
    "validation": split_dataset["test"]
})

In [34]:
# Preprocess the dataset
def preprocess_function(examples):
    inputs = [doc for doc in examples["input_text"]]
    model_inputs = tokenizer(inputs, max_length=1024, truncation=True, padding="max_length")

    with tokenizer.as_target_tokenizer():
        labels = tokenizer(examples["summary"], max_length=128, truncation=True, padding="max_length")

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

In [35]:
tokenized_dataset = tokenized_dataset.map(preprocess_function, batched=True)

Map: 100%|██████████| 180/180 [00:00<00:00, 731.07 examples/s]
Map: 100%|██████████| 20/20 [00:00<00:00, 1260.86 examples/s]


In [36]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['input_text', 'summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 180
    })
    validation: Dataset({
        features: ['input_text', 'summary', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 20
    })
})

# Set Up Training Arguments
Define the training parameters:

In [44]:
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=2,  # Reduce batch size (e.g., from 4 to 2)
    per_device_eval_batch_size=2,   # Reduce batch size for evaluation
    gradient_accumulation_steps=4,  # Gradient accumulation
    num_train_epochs=3,
    weight_decay=0.01,
    save_total_limit=2,
    fp16=True,  # Enable mixed precision training if using a GPU
)

# Train the Model
Use the Trainer class to fine-tune the model:

In [45]:
import torch

# Clear GPU memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print("GPU memory cleared.")
else:
    print("No GPU available.")

GPU memory cleared.


In [46]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
)

trainer.train()

OutOfMemoryError: CUDA out of memory. Tried to allocate 100.00 MiB. GPU 0 has a total capacity of 3.81 GiB of which 10.31 MiB is free. Including non-PyTorch memory, this process has 3.74 GiB memory in use. Of the allocated memory 3.62 GiB is allocated by PyTorch, and 27.48 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

# Save the Fine-Tuned Model
After training, save the model and tokenizer:

In [None]:
model.save_pretrained("./fine-tuned-bart-loan-servicing")
tokenizer.save_pretrained("./fine-tuned-bart-loan-servicing")