In [1]:
from peft import LoraConfig, get_peft_model
import torch

from transformers import AutoModelForSequenceClassification, AutoTokenizer
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    GenerationConfig
)
output_dir = f'./peft-dialogue-summary-training-'
import transformers
from transformers import Trainer, TrainingArguments
compute_dtype = getattr(torch, "float16")
bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_quant_type='nf4',
        bnb_4bit_compute_dtype=compute_dtype,
        bnb_4bit_use_double_quant=False,
    )
# Load the Phi2 model and tokenizer
model_name = "microsoft/phi-2"  # Replace with the actual name of the Phi2 model on Hugging Face
tokenizer = AutoTokenizer.from_pretrained(model_name,trust_remote_code=True,padding_side="left",add_eos_token=True,add_bos_token=True,use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
model_name='microsoft/phi-2'
device_map = {"": 0}
model = AutoModelForCausalLM.from_pretrained(model_name, 
                                                      device_map=device_map,
                                                      quantization_config=bnb_config,
                                                      trust_remote_code=True,
                                                      use_auth_token=True) # 3 classes: entailment, contradiction, neutral
#model = prepare_model_for_kbit_training(model)
# Set up LoRA configuration
original_model = prepare_model_for_kbit_training(model)

lora_config =  LoraConfig(
    r=16, #Rank
    lora_alpha=32,
    target_modules=[
        'q_proj',
        'k_proj',
        'v_proj',
        'dense'
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)
original_model.gradient_checkpointing_enable()
# Apply the LoRA configuration to the model
model = get_peft_model(original_model, lora_config)



  from .autonotebook import tqdm as notebook_tqdm
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.20s/it]


In [2]:
from datasets import load_dataset, Dataset

# Load SNLI dataset
snli = load_dataset("snli")

# Selecting samples and converting them to `Dataset` objects
train_data = Dataset.from_dict(snli['train'].select(range(0, len(snli['train']), 550))[:1000])
test_data = Dataset.from_dict(snli['test'].select(range(0, len(snli['test']), 100))[:100])
val_data = Dataset.from_dict(snli['validation'].select(range(0, len(snli['validation']), 100))[:100])

tokenizer.pad_token = tokenizer.eos_token
def preprocess_data(examples):
    # Concatenate premise and hypothesis into a single prompt for NLI
    inputs = ["Premise: " + p + " Hypothesis: " + h for p, h in zip(examples['premise'], examples['hypothesis'])]
    print(inputs)
    # Tokenize the concatenated prompt
    return tokenizer(inputs, truncation=True, padding='max_length', max_length=128)

# Apply preprocessing to each split
train_data = train_data.map(preprocess_data, batched=True)
test_data = test_data.map(preprocess_data, batched=True)
val_data = val_data.map(preprocess_data, batched=True)



Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

['Premise: A person on a horse jumps over a broken down airplane. Hypothesis: A person is training his horse for a competition.', 'Premise: A white bike is tied to a street sign. Hypothesis: the car is parked at the sign', 'Premise: Two women are walking down a dirt path carrying loads on their heads. Hypothesis: Two beautiful women wearing white are walking down a dirt path carrying loads of nuts on their heads.', 'Premise: A young man wearing a backpack and dark glasses approaches the brick building where there is a bit of graffiti on the wall. Hypothesis: The young man has glasses on his face.', 'Premise: a woman walking in front of an outdoor mural, prominently featuring a portrait of a bald man. Hypothesis: a lady walks past a large painting outside', 'Premise: Black and White Team rugby players are struggling over the ball in front of a white wall that has blue writing. Hypothesis: The words on the wall are wrote in blue.', 'Premise: A man plays the piano, while a second man play

Map: 100%|██████████| 1000/1000 [00:00<00:00, 4721.27 examples/s]
Map: 100%|██████████| 100/100 [00:00<00:00, 3290.81 examples/s]


['Premise: This church choir sings to the masses as they sing joyous songs from the book at a church. Hypothesis: The church has cracks in the ceiling.', 'Premise: A woman within an orchestra is playing a violin. Hypothesis: A woman is playing the violin.', 'Premise: Two men climbing on a wooden scaffold. Hypothesis: Two sad men climbing on a wooden scaffold.', 'Premise: A man in a black shirt, in a commercial kitchen, holding up meat he took out of a bag. Hypothesis: A man in a black shirt, in a commercial kitchen, holding up the old meat he took out of a bag.', 'Premise: a woman in a black shirt looking at a bicycle. Hypothesis: A woman dressed in black shops for a bicycle.', 'Premise: many children play in the water. Hypothesis: The children are playing mini golf.', 'Premise: A group of people stand near and on a large black square on the ground with some yellow writing on it. Hypothesis: a group of people wait', 'Premise: A female softball player wearing blue and red crouches in th

Map: 100%|██████████| 100/100 [00:00<00:00, 3290.91 examples/s]

['Premise: Two women are embracing while holding to go packages. Hypothesis: The sisters are hugging goodbye while holding to go packages after just eating lunch.', 'Premise: A girl swings high in the air. Hypothesis: A girl is laying in the pool', 'Premise: A lady with bright orange hair walking in a crowd. Hypothesis: The street performer is entertaining the tourists.', 'Premise: A goalie in white runs for an approaching ball while the opponent in red who kicked it waits. Hypothesis: A person wearing white is running towards a ball that was kicked from another person.', 'Premise: Four young men sit on the floor close to a television that is showing Elmo from Sesame Street. Hypothesis: Four males are playing out in the yard.', 'Premise: Five people are sitting on horses at a rodeo. Hypothesis: A group of cowboys are sitting on horses.', 'Premise: A man rides a kicking bull in a bullpen. Hypothesis: The man is riding a sheep.', 'Premise: The quarterback of the UTEP football team is abo




In [3]:
from transformers import TrainerCallback

class ProgressCallback(TrainerCallback):
    def on_log(self, args, state, control, logs=None, **kwargs):
        if logs is not None:
            print(logs)
            


In [4]:
from transformers import Trainer, TrainingArguments
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
# Define training arguments
training_args = TrainingArguments(
    output_dir="./phi2_nli_finetune",
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    report_to="none",
    logging_dir="./logs",
    logging_strategy="steps",
    logging_steps=10,  # Adjust this for more or fewer updates
    log_level="info",
    evaluation_strategy="steps",
    eval_steps=25,
    do_eval=True,
    save_strategy="steps",
    load_best_model_at_end=True,
)

# Create the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=val_data,
    tokenizer=tokenizer,
    callbacks=[ProgressCallback()],
    #data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

# Start training
trainer.train()


Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
The following columns in the training set don't have a corresponding argument in `PeftModelForCausalLM.forward` and have been ignored: premise, hypothesis. If premise, hypothesis are not expected by `PeftModelForCausalLM.forward`,  you can safely ignore this message.
    There is an imbalance between your GPUs. You may want to exclude GPU 1 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.
***** Running training *****
  Num examples = 1,000
  Num Epochs = 5
  Instantaneous batch size per device = 8
  Training with DataParallel so batch size has been adjusted to: 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulat

TypeError: Caught TypeError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/torch/nn/parallel/parallel_apply.py", line 64, in _worker
    output = module(*input, **kwargs)
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/peft/peft_model.py", line 1644, in forward
    return self.base_model(
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/peft/tuners/tuners_utils.py", line 197, in forward
    return self.model.forward(*args, **kwargs)
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/accelerate/hooks.py", line 170, in new_forward
    output = module._old_forward(*args, **kwargs)
TypeError: forward() got an unexpected keyword argument 'label'


In [5]:
output_dir = f'./peft-dialogue-summary-training-'
import transformers
from transformers import Trainer, TrainingArguments
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
peft_training_args = TrainingArguments(
    output_dir = output_dir,
    warmup_steps=1,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    optim="paged_adamw_8bit",
    logging_steps=25,
    report_to="none",
    group_by_length=True,
    
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=5,
    logging_dir="./logs",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True
)

model.config.use_cache = False

peft_trainer = transformers.Trainer(
    model=model,
    train_dataset=train_data,
    eval_dataset=val_data,
    args=peft_training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [6]:
peft_trainer.train()

    There is an imbalance between your GPUs. You may want to exclude GPU 1 which
    has less than 75% of the memory or cores of GPU 0. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.


TypeError: Caught TypeError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/torch/nn/parallel/parallel_apply.py", line 64, in _worker
    output = module(*input, **kwargs)
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/peft/peft_model.py", line 1644, in forward
    return self.base_model(
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/peft/tuners/tuners_utils.py", line 197, in forward
    return self.model.forward(*args, **kwargs)
  File "/home/nilanjanac/experiments/nilanjanac/lib/python3.8/site-packages/accelerate/hooks.py", line 170, in new_forward
    output = module._old_forward(*args, **kwargs)
TypeError: forward() got an unexpected keyword argument 'label'
