In [1]:
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, TrainingArguments, Trainer
import torch
import time
import evaluate
import pandas as pd
import numpy as np
from peft import PeftModel, PeftConfig


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from accelerate import infer_auto_device_map, init_empty_weights
from transformers import AutoConfig, AutoModelForSeq2SeqLM 

config = AutoConfig.from_pretrained("declare-lab/flan-alpaca-large",torch_dtype=torch.bfloat16)

with init_empty_weights():
  model = AutoModelForSeq2SeqLM.from_config(config)


In [3]:
model

T5ForConditionalGeneration(
  (shared): Embedding(32128, 1024)
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 1024)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=1024, out_features=1024, bias=False)
              (k): Linear(in_features=1024, out_features=1024, bias=False)
              (v): Linear(in_features=1024, out_features=1024, bias=False)
              (o): Linear(in_features=1024, out_features=1024, bias=False)
              (relative_attention_bias): Embedding(32, 16)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (DenseReluDense): T5DenseGatedActDense(
              (wi_0): Linear(in_features=1024, out_features=2816, bias=False)
              (wi_1): Linear(in_features=1024, out_features=2816, bias=False)
       

In [3]:
device_map = infer_auto_device_map(model)

In [4]:
device_map

{'': 0}

In [5]:

model = AutoModelForSeq2SeqLM.from_pretrained("declare-lab/flan-alpaca-large",torch_dtype=torch.bfloat16, device_map=device_map, resume_download=True,offload_folder="offload")
tokenizer = AutoTokenizer.from_pretrained('declare-lab/flan-alpaca-large')

Some weights of T5ForConditionalGeneration were not initialized from the model checkpoint at declare-lab/flan-alpaca-large and are newly initialized: ['encoder.embed_tokens.weight', 'decoder.embed_tokens.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [6]:
lab_dataset_name = "knkarthick/dialogsum"
samsum_dataset = "samsum"
# dataset = load_dataset(huggingface_dataset_name)
dataset = load_dataset(lab_dataset_name)
dataset

DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 12460
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 500
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic'],
        num_rows: 1500
    })
})

In [7]:
def print_number_of_trainable_model_parameters(model):
    trainable_model_params = 0
    all_model_params = 0
    for _, param in model.named_parameters():
        all_model_params += param.numel()
        if param.requires_grad:
            trainable_model_params += param.numel()
    return f"trainable model parameters: {trainable_model_params}\nall model parameters: {all_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"

print(print_number_of_trainable_model_parameters(model))

trainable model parameters: 783150080
all model parameters: 783150080
percentage of trainable model parameters: 100.00%


In [8]:
def tokenize_function(example):
    start_prompt = 'Summarize the following conversation.\n\n'
    end_prompt = '\n\nSummary: '
    prompt = [start_prompt + dialogue + end_prompt for dialogue in example["dialogue"]]
    example['input_ids'] = tokenizer(prompt, padding="max_length", truncation=True, return_tensors="pt").to('cuda').input_ids
    example['labels'] = tokenizer(example["summary"], padding="max_length", truncation=True, return_tensors="pt").to('cuda').input_ids
    
    return example

# The dataset actually contains 3 diff splits: train, validation, test.
# The tokenize_function code is handling all data across all splits in batches.
tokenized_datasets = dataset.map(tokenize_function, batched=True)


In [9]:
tokenized_datasets = tokenized_datasets.filter(lambda example, index: index % 2 == 0, with_indices=True)
tokenized_datasets

Filter: 100%|██████████| 1500/1500 [00:00<00:00, 3685.70 examples/s]


DatasetDict({
    train: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'labels'],
        num_rows: 6230
    })
    validation: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'labels'],
        num_rows: 250
    })
    test: Dataset({
        features: ['id', 'dialogue', 'summary', 'topic', 'input_ids', 'labels'],
        num_rows: 750
    })
})

In [10]:

tokenized_datasets = tokenized_datasets.remove_columns(['id', 'dialogue', 'summary',])

In [11]:
output_dir = f'./dialogue-summary-training-{str(int(time.time()))}'
training_args = TrainingArguments(
    output_dir=output_dir,
    learning_rate=1e-5,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_steps=1,
    max_steps=3
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation']
)

In [18]:
# trainer.train() will run out of mem

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmiklpuerto69[0m. Use [1m`wandb login --relogin`[0m to force relogin


OutOfMemoryError: CUDA out of memory. Tried to allocate 64.00 MiB (GPU 0; 15.99 GiB total capacity; 29.29 GiB already allocated; 0 bytes free; 29.56 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [19]:
dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary']

original_model_summaries = []

for _, dialogue in enumerate(dialogues):
    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """

    input_ids = tokenizer(prompt, return_tensors="pt").to('cuda').input_ids
    original_model_outputs = model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)
    original_model_summaries.append(original_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries))#, instruct_model_summaries))
 
df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries'])
df

Unnamed: 0,human_baseline_summaries,original_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,Person1 and Person2 discuss the new policy reg...
1,In order to prevent employees from wasting tim...,Person1 and Person2 discuss the new policy reg...
2,Ms. Dawson takes a dictation for #Person1# abo...,Person1 and Person2 discuss the new policy reg...
3,#Person2# arrives late because of traffic jam....,Person2 is going to quit driving to work due t...
4,#Person2# decides to follow #Person1#'s sugges...,Person2 is going to quit driving to work due t...
5,#Person2# complains to #Person1# about the tra...,Person2 is going to quit driving to work due t...
6,#Person1# tells Kate that Masha and Hero get d...,Masha and Hero are getting divorced early in t...
7,#Person1# tells Kate that Masha and Hero are g...,Masha and Hero are getting divorced early in t...
8,#Person1# and Kate talk about the divorce betw...,Masha and Hero are getting divorced early in t...
9,#Person1# and Brian are at the birthday party ...,Person1 and Person2 are celebrating Brian's bi...


In [20]:
rouge = evaluate.load('rouge')
original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)
print('ORIGINAL MODEL:')
print(original_model_results)

ORIGINAL MODEL:
{'rouge1': 0.3555259036116062, 'rouge2': 0.11784704735385937, 'rougeL': 0.2942657485579847, 'rougeLsum': 0.2936949640709209}


In [12]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=32, # Rank
    lora_alpha=32,
    target_modules=["q", "v"],
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM # FLAN-T5
)

In [13]:
peft_model = get_peft_model(model, 
                            lora_config)
print(print_number_of_trainable_model_parameters(peft_model))

trainable model parameters: 9437184
all model parameters: 792587264
percentage of trainable model parameters: 1.19%


In [14]:
output_dir = f'./peft-dialogue-summary-training-{str(int(time.time()))}'

peft_training_args = TrainingArguments(
    output_dir=output_dir,
    auto_find_batch_size=True,
    learning_rate=1e-5, # Higher learning rate than full fine-tuning.
    num_train_epochs=10,
    logging_steps=1,
    max_steps=20    
)
    
peft_trainer = Trainer(
    model=peft_model,
    args=peft_training_args,
    train_dataset=tokenized_datasets["train"],
)

In [15]:
peft_trainer.train()

peft_model_path="./peft-dialogue-summary-checkpoint-local"

peft_trainer.model.save_pretrained(peft_model_path)
tokenizer.save_pretrained(peft_model_path)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mmiklpuerto69[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
1,36.0
2,35.75
3,36.5
4,36.0
5,35.75
6,34.5
7,34.75
8,35.0
9,34.5
10,35.5


('./peft-dialogue-summary-checkpoint-local/tokenizer_config.json',
 './peft-dialogue-summary-checkpoint-local/special_tokens_map.json',
 './peft-dialogue-summary-checkpoint-local/tokenizer.json')

In [30]:
from peft import PeftModel, PeftConfig
peft_model_base = AutoModelForSeq2SeqLM.from_pretrained("declare-lab/flan-alpaca-large", torch_dtype=torch.bfloat16)
tokenizer = AutoTokenizer.from_pretrained("declare-lab/flan-alpaca-large")
peft_model = PeftModel.from_pretrained(peft_model_base, 
                                       './peft-dialogue-summary-checkpoint-local/', 
                                       torch_dtype=torch.bfloat16,
                                       is_trainable=False).to('cuda')

In [31]:
peft_model.device

device(type='cuda', index=0)

In [32]:
index = 200
dialogue = dataset['test'][index]['dialogue']
baseline_human_summary = dataset['test'][index]['summary']

prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """

input_ids = tokenizer(prompt, return_tensors="pt").to('cuda').input_ids

original_model_outputs = model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)


peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200, num_beams=1))
peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)
dash_line = "-----------"*10
print(dash_line)
print(f'BASELINE HUMAN SUMMARY:\n{baseline_human_summary}')
print(dash_line)
print(f'ORIGINAL MODEL:\n{original_model_text_output}')
print(dash_line)
print(f'PEFT MODEL: {peft_model_text_output}')

--------------------------------------------------------------------------------------------------------------
BASELINE HUMAN SUMMARY:
#Person1# teaches #Person2# how to upgrade software and hardware in #Person2#'s system.
--------------------------------------------------------------------------------------------------------------
ORIGINAL MODEL:
Person1 suggests upgrading their system to make it more modern and efficient.
--------------------------------------------------------------------------------------------------------------
PEFT MODEL: Person2 is considering upgrading her system. Person1 suggests adding a painting program to the software, upgrading the hardware, and adding a CD-ROM drive.


In [33]:
dialogues = dataset['test'][0:10]['dialogue']
human_baseline_summaries = dataset['test'][0:10]['summary']

original_model_summaries = []
instruct_model_summaries = []
peft_model_summaries = []

for idx, dialogue in enumerate(dialogues):
    prompt = f"""
Summarize the following conversation.

{dialogue}

Summary: """
    
    input_ids = tokenizer(prompt, return_tensors="pt").to('cuda').input_ids

    human_baseline_text_output = human_baseline_summaries[idx]
    
    original_model_outputs = model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

    peft_model_outputs = peft_model.generate(input_ids=input_ids, generation_config=GenerationConfig(max_new_tokens=200))
    peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

    original_model_summaries.append(original_model_text_output)
    # instruct_model_summaries.append(instruct_model_text_output)
    peft_model_summaries.append(peft_model_text_output)

zipped_summaries = list(zip(human_baseline_summaries, original_model_summaries, peft_model_summaries))
 
df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_summaries', 'original_model_summaries', 'peft_model_summaries'])
df

Unnamed: 0,human_baseline_summaries,original_model_summaries,peft_model_summaries
0,Ms. Dawson helps #Person1# to write a memo to ...,Employees are required to stop using Instant M...,Person1 and Person2 discuss the new policy reg...
1,In order to prevent employees from wasting tim...,This intra-office memo outlines a new policy r...,Person1 and Person2 discuss the new policy reg...
2,Ms. Dawson takes a dictation for #Person1# abo...,Person1: All office communications are restric...,Person1 and Person2 discuss the new policy reg...
3,#Person2# arrives late because of traffic jam....,Person2 is stuck in traffic and is considering...,Person2 is going to quit driving to work due t...
4,#Person2# decides to follow #Person1#'s sugges...,Person2 is considering taking the public trans...,Person2 is going to quit driving to work due t...
5,#Person2# complains to #Person1# about the tra...,"Person2 is going to quit driving to work, beca...",Person2 is going to quit driving to work due t...
6,#Person1# tells Kate that Masha and Hero get d...,Person1: Masha and Hero are getting divorced. ...,Masha and Hero are getting divorced early in t...
7,#Person1# tells Kate that Masha and Hero are g...,Person1 and Hero are getting divorced.,Masha and Hero are getting divorced early in t...
8,#Person1# and Kate talk about the divorce betw...,Masha and Hero are getting a divorce.,Masha and Hero are getting divorced early in t...
9,#Person1# and Brian are at the birthday party ...,Person1 and Person2 are celebrating Brian's bi...,Person1 and Person2 are celebrating Brian's bi...


In [34]:
rouge = evaluate.load('rouge')

original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_summaries[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

peft_model_results = rouge.compute(
    predictions=peft_model_summaries,
    references=human_baseline_summaries[0:len(peft_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('ORIGINAL MODEL:')
print(original_model_results)
print(dash_line)
print('PEFT MODEL:')
print(peft_model_results)

ORIGINAL MODEL:
{'rouge1': 0.3217981141980334, 'rouge2': 0.10913502599809125, 'rougeL': 0.25168251357213534, 'rougeLsum': 0.25222310023454186}
--------------------------------------------------------------------------------------------------------------
PEFT MODEL:
{'rouge1': 0.35288076534605095, 'rouge2': 0.11493664411813231, 'rougeL': 0.2844235151687794, 'rougeLsum': 0.283610472781488}
