In [1]:
import torch

# Check if Metal GPU is available
if torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

# Create a tensor on the GPU
x = torch.rand(4, 4, device=device)

# Perform operations on the GPU tensor
y = x.matmul(x.t())

print(y)

tensor([[0.9956, 1.0258, 0.8264, 0.7040],
        [1.0258, 1.2741, 0.9438, 0.7908],
        [0.8264, 0.9438, 1.1586, 0.5190],
        [0.7040, 0.7908, 0.5190, 0.9811]], device='mps:0')


In [2]:
device

device(type='mps')

In [3]:
from datasets import load_dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np

In [4]:
model_name='google/flan-t5-base'

original_model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [5]:
#dataset
huggingface_dataset_name = "knkarthick/dialogsum"

dataset = load_dataset(huggingface_dataset_name)

In [6]:
def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").input_ids
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").input_ids

    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)
tokenized_datasets = tokenized_datasets.remove_columns(['id', 'topic', 'dialogue', 'summary',])

In [7]:
from torch.utils.data import DataLoader

train_dataloader = DataLoader(tokenized_datasets["train"], batch_size=8, shuffle=True, pin_memory=True)


In [8]:
train_dataloader

<torch.utils.data.dataloader.DataLoader at 0x352ae6130>

In [9]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)

In [10]:
peft_model = get_peft_model(original_model,
                            lora_config)
print(peft_model.print_trainable_parameters())

trainable params: 3538944 || all params: 251116800 || trainable%: 1.4092820552029972
None


In [11]:
peft_model = peft_model.to(device)

In [13]:
output_dir = f'./peft-dialogue-summary-training-{str(int(time.time()))}'

In [18]:
peft_training_args = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-3,  # Higher learning rate than full fine-tuning.
    num_train_epochs=4,
    logging_steps=1e-1,
  # This will automatically map the model and data to the available device
)

peft_trainer = Trainer(
    model=peft_model,  # Move the model to the GPU device
    args=peft_training_args,
    train_dataset=tokenized_datasets['train'],  # Move the dataset to the GPU device
)

In [19]:
peft_trainer.train()

Step,Training Loss


KeyboardInterrupt: 