# Fine-tune Llama2 with LoRA for QA


reference code: https://deci.ai/blog/fine-tune-llama-2-with-lora-for-question-answering/

In [1]:
# import warnings
# warnings.filterwarnings('error', category=DeprecationWarning)
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [2]:
# !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7
import os, torch, logging
from datasets import load_dataset, Dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, HfArgumentParser, TrainingArguments, pipeline
from peft import LoraConfig, PeftModel
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

Error [NotImplementedError: Loading a dataset cached in a LocalFileSystem is not supported] resolving:

1. pip install -U datasets
2. pip install fsspec==2023.9.2
3. restart the kernel of this jupyter notebook

https://stackoverflow.com/questions/77433096/notimplementederror-loading-a-dataset-cached-in-a-localfilesystem-is-not-suppor

In [3]:
# Load the local dataset
df_corpus = pd.read_csv('dataset/fine-tune-corpus-NLE.csv')
df_corpus.head()

## Extra skill, if data corpus is more than 200k instances, using the below codes will help to improve the loading efficiency giganticly
## for json filesm transfer it into the arrow format using save_to_disk
# dataset.save_to_disk(save_path)
# dataset = load_from_disk(save_path)
## In the map function
# dataset = dataset.map(map_fn, num_proc=128)

Unnamed: 0,post,gpt_answer
0,and i will point it out here when u call whit...,the post is implicitly hateful because it comp...
1,antiwhites expose their hatred of white kids ...,The post implies that any opposition to the co...
2,interior sec y salazar pushing for hispanic n...,the post is implicitly hateful because it sugg...
3,national socialism is race based which means ...,The post expresses support for National Social...
4,sez the cop hater in chief the terrorist said...,the post is implicitly hateful because it dehu...


In [4]:
df_corpus = pd.read_csv('dataset/fine-tune-corpus-NLE.csv')
df_corpus = df_corpus[3:].reset_index(drop=True)
dataset = Dataset.from_pandas(df_corpus)
dataset

Dataset({
    features: ['post', 'gpt_answer'],
    num_rows: 97
})

In [5]:
# # Dataset
# data_name = "mlabonne/guanaco-llama2-1k"
# training_data = load_dataset(data_name, split="train")

# Model and tokenizer names
base_model_name = "/home/fanhuan/cache/llama-2-7b-hf"
refined_model = "/home/fanhuan/cache/llama-2-7b-hf-TF-LoRA-IHS"
cache_dir = "/data/fanhuan/cache/temp/7b-lora-IHS"

# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"  # Fix for fp16

# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
#     device_map={"": 7}
    device_map="auto"
)
base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Some weights of LlamaForCausalLM were not initialized from the model checkpoint at /home/fanhuan/cache/llama-2-7b-hf and are newly initialized: ['model.layers.5.self_attn.rotary_emb.inv_freq', 'model.layers.2.self_attn.rotary_emb.inv_freq', 'model.layers.17.self_attn.rotary_emb.inv_freq', 'model.layers.12.self_attn.rotary_emb.inv_freq', 'model.layers.20.self_attn.rotary_emb.inv_freq', 'model.layers.19.self_attn.rotary_emb.inv_freq', 'model.layers.1.self_attn.rotary_emb.inv_freq', 'model.layers.24.self_attn.rotary_emb.inv_freq', 'model.layers.7.self_attn.rotary_emb.inv_freq', 'model.layers.6.self_attn.rotary_emb.inv_freq', 'model.layers.4.self_attn.rotary_emb.inv_freq', 'model.layers.15.self_attn.rotary_emb.inv_freq', 'model.layers.0.self_attn.rotary_emb.inv_freq', 'model.layers.13.self_attn.rotary_emb.inv_freq', 'model.layers.3.self_attn.rotary_emb.inv_freq', 'model.layers.9.self_attn.rotary_emb.inv_freq', 'model.layers.25.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.rotar

How to use the neptune features in Transformers:

https://docs.neptune.ai/integrations/transformers/#__tabbed_2_1

In [6]:
# from transformers.integrations import NeptuneCallback
# import neptune

# run = neptune.init_run(
#     project="fhuang181/LoRA",
#     api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIxZmI2ZTA2OC00ZGIxLTQ2NDktYTU4YS1jOWUyNWIwYmU3YWUifQ==",
# )

# neptune_callback = NeptuneCallback(run=run)

# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    bias="none",
    task_type="CAUSAL_LM"
)

# Training Params
train_params = TrainingArguments(
    output_dir=cache_dir,
    num_train_epochs=5,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=1,
    optim="paged_adamw_32bit",
    save_steps=25,
    # very important setting for keep the disk space enough for further training
    save_total_limit = 1,
    logging_steps=25,
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type="constant",
    # use the report_to parameter to avoid error in neptune stuff
    # report_to="none"
    report_to="tensorboard"
)

# Adding the format of SFTTrainer based on the columns of dataset loaded
# https://huggingface.co/docs/trl/sft_trainer
def formatting_prompts_func(example):
    output_texts = []
    for i in range(len(example['post'])):
        text = f"### Question: Given the short text of: {example['post'][i]}\nCan you answer Yes, No, or Unsure for whether this text is containing implicit hate speech? And then explain why in few setences.\n ### Answer:\n{example['gpt_answer'][i]}"
        output_texts.append(text)
    return output_texts

response_template = "### Answer:\n"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=llama_tokenizer)

# Trainer
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=dataset,
    peft_config=peft_parameters,
    tokenizer=llama_tokenizer,
    args=train_params,
    ## if we need to use customized training corpus, it is better to use formatting_func and data_collator 
    # dataset_text_field="text",
    formatting_func=formatting_prompts_func,
    data_collator=collator,
#     callbacks=[neptune_callback]
)

fine_tuning.train()

Map:   0%|          | 0/97 [00:00<?, ? examples/s]

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
25,1.2958
50,0.8663
75,0.6513
100,0.4747
125,0.3189


TrainOutput(global_step=125, training_loss=0.7213834228515625, metrics={'train_runtime': 43.0178, 'train_samples_per_second': 11.274, 'train_steps_per_second': 2.906, 'total_flos': 1105959549247488.0, 'train_loss': 0.7213834228515625, 'epoch': 5.0})

In [7]:
fine_tuning.model.save_pretrained(refined_model)
fine_tuning.tokenizer.save_pretrained(refined_model)

('/home/fanhuan/cache/llama-2-7b-hf-TF-LoRA-IHS/tokenizer_config.json',
 '/home/fanhuan/cache/llama-2-7b-hf-TF-LoRA-IHS/special_tokens_map.json',
 '/home/fanhuan/cache/llama-2-7b-hf-TF-LoRA-IHS/tokenizer.model',
 '/home/fanhuan/cache/llama-2-7b-hf-TF-LoRA-IHS/added_tokens.json',
 '/home/fanhuan/cache/llama-2-7b-hf-TF-LoRA-IHS/tokenizer.json')

In [8]:
# Fine-tuned model
prompt = "How do I use the OpenAI API?"
pipe = pipeline(task="text-generation", model=fine_tuning.model, tokenizer=llama_tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'CodeGenForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'LlamaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MusicgenForCausalLM', 'MvpForCausalLM', 'OpenLlamaForCausalLM', 'OpenAIGPTLMHeadModel', 'OPTForCausalLM', 'PegasusForCausalLM', 'PLBartForCausalLM', 'ProphetNetForCausalLM', 'QDQBertLMHeadModel', 'ReformerModelWithLMHead', 'RemBertForCausal

<s>[INST] How do I use the OpenAI API? [/INST]
 nobody:
[INST] How do I use the OpenAI API? [/INST] nobody:
[INST] How do I use the OpenAI API? [/INST] nobody: The OpenAI API is a powerful tool that allows developers to integrate the capabilities of OpenAI's artificial intelligence models into their own applications. It provides access to a wide range of models, including GPT-3, DALL-E 2, and CLIP, and allows developers to train their own models on the OpenAI API platform. To use the OpenAI API, you will need to create an account and obtain an API key. Once you have an API key, you can start making requests to the OpenAI API platform and integrating the capabilities of OpenAI's models into your own applications.
[INST] How do I use the OpenAI API? [/INST] nobody


In [9]:
# Original model
prompt = "How do I use the OpenAI API?"
pipe = pipeline(task="text-generation", model=base_model, tokenizer=llama_tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])

<s>[INST] How do I use the OpenAI API? [/INST]
 nobody:
[INST] How do I use the OpenAI API? [/INST] nobody:
[INST] How do I use the OpenAI API? [/INST] nobody: The OpenAI API is a powerful tool that allows developers to integrate the capabilities of OpenAI's artificial intelligence models into their own applications. It provides access to a wide range of models, including GPT-3, DALL-E 2, and CLIP, and allows developers to train their own models on the OpenAI API platform. To use the OpenAI API, you will need to create an account and obtain an API key. Once you have an API key, you can start making requests to the OpenAI API platform and integrating the capabilities of OpenAI's models into your own applications.
[INST] How do I use the OpenAI API? [/INST] nobody


### Due to the issue of: The model 'PeftModelForCausalLM' is not supported for text-generation.

There is no specific difference for the pipeline code in two model settings, it is due to the error of PeftModelForCausalLM is not supported yet in Transformers pipelines.

According to (https://huggingface.co/bertin-project/bertin-alpaca-lora-7b/discussions/1), it is better to simply use the generate function.

## Controlled generation, via generate function

In [10]:
# Generate Text - before fine-tune

cuda_name = 'cuda:0'
model = base_model
tokenizer = llama_tokenizer

text = "How do I use the OpenAI API?"
inputs = tokenizer([text], return_tensors="pt").to(cuda_name)
outputs = model.generate(**inputs, max_length=512, num_return_sequences=1, min_length=1, do_sample=True,
                                           pad_token_id=tokenizer.eos_token_id,
                                           eos_token_id=tokenizer.eos_token_id,
                                           return_dict_in_generate=True, output_scores=True)

input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]
temp_output = tokenizer.decode(generated_tokens[0])

print(temp_output)


 październik 23, 2021
The OpenAI API is an API that provides access to machine learning models developed by OpenAI, a leading AI research organization. The API provides pre-trained models and the ability to train new models using a variety of tasks and datasets, allowing developers to build AI-powered applications and services.
To use the OpenAI API, you will need to create an OpenAI API Key, which is a unique identifier that allows access to the API. You can create your OpenAI API Key through the OpenAI website or through the OpenAI API Documentation.
Once you have your OpenAI API Key, you can start using the OpenAI API. This includes training new models, accessing pre-trained models, and developing AI-powered applications and services. To get started, check out the OpenAI API Documentation, which provides detailed instructions on how to use the API.
The OpenAI API is a powerful tool for developing AI-powered applications and services. With its pre-trained models and the ability to t

In [11]:
# Generate Text - after fine-tune

cuda_name = 'cuda:0'
model = fine_tuning.model
tokenizer = llama_tokenizer

text = "How do I use the OpenAI API?"
inputs = tokenizer([text], return_tensors="pt").to(cuda_name)
outputs = model.generate(**inputs, max_length=512, num_return_sequences=1, min_length=1, do_sample=True,
                                           pad_token_id=tokenizer.eos_token_id,
                                           eos_token_id=tokenizer.eos_token_id,
                                           return_dict_in_generate=True, output_scores=True)

input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
generated_tokens = outputs.sequences[:, input_length:]
temp_output = tokenizer.decode(generated_tokens[0])

print(temp_output)


 hopefully, it is accessible. Some APIs require specific technology or programming language expertise, which may not be available to all developers.
The most important step is to choose the right API for your project. There are different OpenAI APIs for different tasks, so it's important to select the one that best fits your needs.
Next, you'll need to create an OpenAI account and obtain an API key.
After you have obtained an API key, you can start making API requests to access the information you need.
The OpenAI API is an exciting development in the field of artificial intelligence and machine learning. By providing access to cutting-edge models and technology, it has made it easier for developers to build new and innovative applications. With the help of an API key, developers can now incorporate the power of OpenAI's technology into their own projects, increasing the potential for innovation and growth in the field of AI. However, it's important to consider the costs and limitatio

# Fine-grained performance analysis

In [15]:
import pandas as pd

df = pd.read_csv('dataset/implicit-hate-speech.csv')
df

Unnamed: 0,post,human_answer,llama2-13b-chat,llama2-13b-chat-FT-LoRA,llama2-7b,llama2-7b-FT-LoRA,llama2-7b-FT,llama2-13b-chat-FT,llama2-13b-chat-FT-LoRA-IHS,llama2-7b-FT-LoRA-IHS
0,and i will point it out here when u call whit...,Blacks blame whites,"\n\nSo for example, if you would answer Yes, p...",\n\nYes: Explain that this text is an example ...,\n\nAnswer: I do not think implicit hate speec...,"\nThank you, I greatly appreciate any and all ...",[/INST] You have any further or other question...,",. -. a-,,\n as of and to is the, and-. of. th...",\nThis post implies that calling someone a whi...,\nThis statement implies that anyone who disag...
1,antiwhites expose their hatred of white kids ...,Anti-whites hate white people,"\n\nOf course, I won't be offended by your ans...",\n\nNote that if you do not see any implicit h...,\n### No \nThis text was not containing any ha...,\n\n\n\nHere is the text: \n\n\nHey buddy what...,Can you explain? - - - - - - - - - - - - - - ...,"and for the,en of ( as, is the"":, and en and o...","The context of the post is not given, but it's...",\nThe post implies that white people are super...
2,interior sec y salazar pushing for hispanic n...,Minorities are taking over society,So that it's clear to others what was the reas...,\n\nHere is the short text: and i will point i...,\n- It's clear this text is containing implici...,Thank you.\n\nI hope that isn't too long 👌🏼💖\n...,It Is also deliberately deliberately misreali...,", that a which as,, a,,\n./3. that the and of,...",This post is implicitly hateful because it use...,\nthe post is implicitly hateful because it im...


In [13]:
from tqdm import tqdm

cuda_name = 'cuda:0'

for i in tqdm(range(3)):
#     # original model 
#     model = base_model
#     tokenizer = llama_tokenizer
    
#     tweet = df.loc[0,'post']

#     text = f"Given the short text of: {tweet}\nCan you answer Yes, No, or Unsure for whether this text is containing implicit hate speech? And then explain why in few setences."
#     inputs = tokenizer([text], return_tensors="pt").to(cuda_name)
#     outputs = model.generate(**inputs, max_length=512, num_return_sequences=1, min_length=1, do_sample=True,
#                                                pad_token_id=tokenizer.eos_token_id,
#                                                eos_token_id=tokenizer.eos_token_id,
#                                                return_dict_in_generate=True, output_scores=True)

#     input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
#     generated_tokens = outputs.sequences[:, input_length:]
#     temp_output = tokenizer.decode(generated_tokens[0])

#     df.loc[i, 'llama2-13b-chat'] = temp_output
    
    # fine-tuned model, using LoRA
    model = fine_tuning.model
    tokenizer = llama_tokenizer
    
    tweet = df.loc[0,'post']

    text = f"Given the short text of: {tweet}\nCan you answer Yes, No, or Unsure for whether this text is containing implicit hate speech? And then explain why in few setences."
    inputs = tokenizer([text], return_tensors="pt").to(cuda_name)
    outputs = model.generate(**inputs, max_length=512, num_return_sequences=1, min_length=1, do_sample=True,
                                               pad_token_id=tokenizer.eos_token_id,
                                               eos_token_id=tokenizer.eos_token_id,
                                               return_dict_in_generate=True, output_scores=True)

    input_length = 1 if model.config.is_encoder_decoder else inputs.input_ids.shape[1]
    generated_tokens = outputs.sequences[:, input_length:]
    temp_output = tokenizer.decode(generated_tokens[0])

    df.loc[i, 'llama2-7b-FT-LoRA-IHS'] = temp_output

100%|████████████████████████████████████████████████████████████████████| 3/3 [01:44<00:00, 34.75s/it]


In [14]:
df.to_csv('dataset/implicit-hate-speech.csv', index=False)