# Train llama-2-7b-fb-chat model

In [None]:
!pip install "transformers==4.31.0" "peft==0.4.0" "accelerate==0.21.0" "bitsandbytes==0.40.2" "trl==0.4.7" "safetensors>=0.3.1" --upgrade!pip install torch --upgrade
!pip install evaluate==0.4.0 rouge_score==0.1.2
!pip install datasets
!pip install torch
!pip install sentencepiece
# !pip install onnx
# !pip install tvm

In [None]:
import time
import os
import torch
import pandas as pd
import locale
locale.getpreferredencoding = lambda: "UTF-8"
import evaluate
import re
import os
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer
from datasets import DatasetDict, load_dataset, DatsetDict

In [None]:
os.environ["HUGGING_FACE_HUB_TOKEN"] = "Your HuggingFace token id"

In [None]:
# Load the DatasetDict from your datset directory
loaded_dataset_dict = DatasetDict.load_from_disk('./small_7400_dataset_dict')


In [None]:
train_dataset = loaded_dataset_dict['train']
# valid_dataset = loaded_dataset_dict['validation']
test_dataset = loaded_dataset_dict['test']

In [None]:
train_dataset

Dataset({
    features: ['Question', 'Answer'],
    num_rows: 6161
})

In [None]:
test_dataset


Dataset({
    features: ['Question', 'Answer'],
    num_rows: 183
})

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig


# Hugging Face model id
# model_id = "NousResearch/Llama-2-7b-hf" # non-gated
model_id = "meta-llama/Llama-2-7b-chat-hf" # gated


# BitsAndBytesConfig int-4 config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=False,
)

# Load the entire model on the GPU 0
device_map = {"": 0}
# device_map="auto"


In [None]:
# Load model and tokenizer
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, use_cache=False, device_map=device_map)
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"


Downloading (…)lve/main/config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

# Processed Fine-tuning Data

In [None]:
def preprocess_function(examples):
    # Instruction = "Please provide detailed answers to the following immigration-related questions."
    # "input_ids": tokenizer("[INST]" + Instruction + " " + examples["Question"] + "[/INST]", truncation=True, max_length=512, padding="max_length")["input_ids"],

    return {
        "input_ids": tokenizer("[INST]" + examples["Question"] + "[/INST]", truncation=True, max_length=512, padding="max_length")["input_ids"],
        "labels": tokenizer(examples["Answer"], truncation=True, max_length=512, padding="max_length")["input_ids"],
        "inputs_text": ("<s>" + "[INST]" + " " + examples["Question"] + "[/INST]" + examples["Answer"] + "</s>"),
        }

processed_dataset = train_dataset.map(preprocess_function)

Map:   0%|          | 0/6161 [00:00<?, ? examples/s]

In [None]:
processed_dataset

Dataset({
    features: ['Question', 'Answer', 'input_ids', 'labels', 'inputs_text'],
    num_rows: 6161
})

In [None]:
processed_dataset_validation = test_dataset.map(preprocess_function)

Map:   0%|          | 0/183 [00:00<?, ? examples/s]

In [None]:
processed_dataset_validation

Dataset({
    features: ['Question', 'Answer', 'input_ids', 'labels', 'inputs_text'],
    num_rows: 183
})

In [None]:
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model

# LoRA config based on QLoRA paper
peft_config = LoraConfig(
        lora_alpha=16,
        lora_dropout=0.1,
        r=64,
        bias="none",
        task_type="CAUSAL_LM",
)

# prepare model for training
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)


In [None]:
# # Set training parameters
output_dir = '/content/drive/MyDrive/llama-2-chat--output_dir_final'
# checkpoint_dir = '/content/drive/MyDrive/llama-2-chat--output_dir/checkpoint-1763'  # Replace with your checkpoint directory


training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=6,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    logging_steps=100,
    max_steps = -1,
    save_strategy="epoch",
    # save_strategy="steps",   # Save checkpoints at specified steps
    save_steps=0,          # Save every 500 steps
    weight_decay = 0.001,
    learning_rate=2e-4,
    bf16=False,
    tf32=False,
    max_grad_norm=0.3,
    group_by_length=True,
    warmup_ratio=0.03,
    lr_scheduler_type="constant",
    # overwrite_output_dir=True,
    # model_name_or_path=checkpoint_dir,  # Specify the checkpoint directory here
)


In [None]:
# Define data collator to handle tokenization and collation
from transformers import DataCollatorForLanguageModeling

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False,
)


In [None]:
# Set supervised fine-tuning parameters


trainer = SFTTrainer(
    model=model,
    train_dataset=processed_dataset,
    eval_dataset=processed_dataset_validation,
    dataset_text_field="inputs_text",
    max_seq_length=512,
    tokenizer=tokenizer,
    args=training_arguments,
    data_collator=data_collator,
    packing=False,
)

# Train model

# trainer.train(resume_from_checkpoint="/content/drive/MyDrive/llama-2-chat--output_dir_final/checkpoint-3850")

trainer.train()

# Save trained model
llama2_model_path="/content/drive/MyDrive/llama-2-chat-model-final"
trainer.model.save_pretrained(llama2_model_path)
tokenizer.save_pretrained(llama2_model_path)

Step,Training Loss
3900,1.2205
4000,1.2325
4100,1.2028
4200,1.233
4300,1.248
4400,1.2513
4500,1.2606
4600,1.1821


('/content/drive/MyDrive/llama-2-chat-model-final/tokenizer_config.json',
 '/content/drive/MyDrive/llama-2-chat-model-final/special_tokens_map.json',
 '/content/drive/MyDrive/llama-2-chat-model-final/tokenizer.json')

In [None]:
# Zip the saved model directory
!zip -r my_model.zip /content/drive/MyDrive/llama_2_model

  adding: content/drive/MyDrive/llama_2_model/ (stored 0%)
  adding: content/drive/MyDrive/llama_2_model/README.md (deflated 40%)
  adding: content/drive/MyDrive/llama_2_model/adapter_model.bin (deflated 7%)
  adding: content/drive/MyDrive/llama_2_model/adapter_config.json (deflated 43%)
  adding: content/drive/MyDrive/llama_2_model/tokenizer_config.json (deflated 67%)
  adding: content/drive/MyDrive/llama_2_model/special_tokens_map.json (deflated 73%)
  adding: content/drive/MyDrive/llama_2_model/tokenizer.model (deflated 55%)
  adding: content/drive/MyDrive/llama_2_model/tokenizer.json (deflated 74%)


# Test Model and run Inference

In [None]:

# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_id,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16,
    device_map=device_map,
)

Downloading (…)lve/main/config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

In [None]:

peft_model = PeftModel.from_pretrained(base_model,
                                       '/content/drive/MyDrive/llama-2-chat--output_dir_final/checkpoint-4620',
                                       torch_dtype=torch.bfloat16,
                                       is_trainable=False)

In [None]:
# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained('/content/drive/MyDrive/llama-2-chat--output_dir_final/checkpoint-4620', trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

### Evaluate the Model Quantitatively (with ROUGE Metric)
Perform inferences for the sample of the test dataset (only 10 question and answer to save time).

In [None]:
prompt_question = test_dataset[10]['Question'].strip()
prompt_answer = test_dataset[10]['Answer'].strip()
# prompt_question = "is it possible to travel with advance parole but expired passport?"
prompt_question = f"[INST] {prompt_question.strip()} [/INST]"

print(prompt_question)
print(prompt_answer)


[INST] what are the eligibility requirements for receiving cat relief and how do criminal convictions affect eligibility? [/INST]
to be eligible for cat relief the person must show that it is more likely than not that they will face torture at the hands of their home government or by a nonstate actor with the consent or acquiescence of the home government. criminal convictions can affect eligibility for cat relief as certain crimes may bar a person from receiving asylum or wor and therefore cat relief. however if a person is ineligible for asylum or wor due to a conviction they may still be eligible for cat relief.


In [None]:
input_ids = tokenizer(prompt_question, return_tensors="pt", truncation=True, max_length=512).input_ids.cuda()
# with torch.inference_mode():
max_length = len(input_ids[0]) + 150
outputs = peft_model.generate(input_ids=input_ids, max_length=max_length, top_k=200, top_p=0.6, temperature=1.0)
text_output = tokenizer.decode(outputs[0], skip_special_tokens=True)
text_output


'[INST] what are the eligibility requirements for receiving cat relief and how do criminal convictions affect eligibility? [/INST]the eligibility requirements for receiving cat relief include being a citizen or national of a designated country being a bona fide resident of the united states and meeting one of the following requirements being subject to persecution in their home country or on account of race nationality religion political opinion or particular social group and having a fear of imperilment on account of that persecution.Ъ relates to the 5th step in the asylum application process where an individual must demonstrate that they have been present in the united states for at least one year prior to filing their asylum application. however if an individual has a criminal conviction they may be ineligible for cat relief. even if an individual is'

In [None]:

questions = test_dataset[0:10]['Question']
human_baseline_answers = test_dataset[0:10]['Answer']

# questions = dataset['test']['Question']
# human_baseline_answers = dataset['test']['Answer']

original_model_summaries = []
peft_model_summaries = []

for idx, question in enumerate(questions):
    prompt = f"[INST] {question.strip()} [/INST]"

    input_ids = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True).input_ids.cuda()
    max_length = len(input_ids[0])
    max_length = max_length + 100

    human_baseline_text_output = human_baseline_answers[idx].strip()

    original_model_outputs = base_model.generate(input_ids=input_ids, max_length=max_length, top_k=200, top_p=0.6, temperature=1.0)
    original_model_text_output = tokenizer.decode(original_model_outputs[0], skip_special_tokens=True)

    peft_model_outputs = peft_model.generate(input_ids=input_ids, max_length=max_length, top_k=200, top_p=0.6, temperature=1.0)
    peft_model_text_output = tokenizer.decode(peft_model_outputs[0], skip_special_tokens=True)

    original_model_summaries.append(original_model_text_output)
    peft_model_summaries.append(peft_model_text_output)



In [None]:
zipped_summaries = list(zip(human_baseline_answers, original_model_summaries, peft_model_summaries))

df = pd.DataFrame(zipped_summaries, columns = ['human_baseline_answers', 'original_model_summaries', 'peft_model_summaries'])
df.to_csv('/content/drive/MyDrive/llama-2-chat--output_dir/llam2_prediction_result_trl.csv', index=False)

In [None]:
df

Unnamed: 0,human_baseline_answers,original_model_summaries,peft_model_summaries
0,we have seen people wait 3 to 5 months. in the...,[INST] hola. i just have a quick question. how...,[INST] hola. i just have a quick question. how...
1,sometimes there is a second officer in the roo...,[INST] dear i filed my asylum case in june 201...,[INST] dear i filed my asylum case in june 201...
2,addressing the problem of inconsistent decisio...,[INST] what can be done to address the problem...,[INST] what can be done to address the problem...
3,the study found that refugees contributed an e...,[INST] what were the results of the study on t...,[INST] what were the results of the study on t...
4,answer the author does not seem to have a favo...,[INST] what is the authors opinion on clients ...,[INST] what is the authors opinion on clients ...
5,i think you take a risk if you go to your coun...,[INST] i have a green card based on an asylum ...,[INST] i have a green card based on an asylum ...
6,i would think they would be happy to get rid o...,[INST] i have been asking the arlington asylum...,[INST] i have been asking the arlington asylum...
7,most likely this would require you to leave th...,[INST] this is my first time leaving a message...,[INST] this is my first time leaving a message...
8,if the judge or dhs asks you about inconsisten...,[INST] what should you do if the judge or dhs ...,[INST] what should you do if the judge or dhs ...
9,only about 1 in 5 applicants received a decisi...,[INST] what is the percentage of cases that re...,[INST] what is the percentage of cases that re...


In [None]:

def remove_incomplete_last_sentence(text):
    # Split the text into sentences using a simple regex pattern
    sentences = re.split(r'(?<=[.!?])\s+', text)

    # Check if the last sentence ends with a period
    if sentences and not sentences[-1].endswith('.'):
        sentences.pop()  # Remove the incomplete last sentence

    return ' '.join(sentences)


# Remove incomplete last sentence
# cleaned_paragraph = remove_incomplete_last_sentence(df['peft_model_summaries'])
df['peft_model_summaries_cleaned'] = df['peft_model_summaries'].apply(remove_incomplete_last_sentence)
df[['peft_model_summaries', 'peft_model_summaries_cleaned']]

In [None]:
rouge = evaluate.load('rouge')

original_model_results = rouge.compute(
    predictions=original_model_summaries,
    references=human_baseline_answers[0:len(original_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)


peft_model_results = rouge.compute(
    predictions=df['peft_model_summaries_cleaned'],
    references=human_baseline_answers[0:len(peft_model_summaries)],
    use_aggregator=True,
    use_stemmer=True,
)

print('baseline MODEL:')
print(original_model_results)
print('PEFT MODEL:')
print(peft_model_results)

baseline MODEL:
{'rouge1': 0.3220285089342042, 'rouge2': 0.12070167004584209, 'rougeL': 0.20584913643158553, 'rougeLsum': 0.20491461479007506}
PEFT MODEL:
{'rouge1': 0.35144601395365105, 'rouge2': 0.16637426900861157, 'rougeL': 0.24598628670844075, 'rougeLsum': 0.2424330985390268}


# Push Model to Hugging Face Hub

 Our weights are merged and we reloaded the tokenizer. We can now push everything to the Hugging Face Hub to save our model.



In [None]:
!huggingface-cli login

# Merge LoRA and base model
peft_model = peft_model.merge_and_unload()


# push merged model to the hub

peft_model.push_to_hub("KedirAhmed/Llama-2-7b-chat-finetune", check_pr=True)

tokenizer.push_to_hub("KedirAhmed/Llama-2-7b-chat-finetune",check_pr=True)



    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|
    
    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Token: 
Add token as git credential? (Y/n) Y
Token is valid (permission: write).
[1m[31mCannot authenticate thro

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/KedirAhmed/Llama-2-7b-chat-finetune/commit/7ae71a8d307b931e3d95fa64884672ad8d7d1f35', commit_message='Upload tokenizer', commit_description='', oid='7ae71a8d307b931e3d95fa64884672ad8d7d1f35', pr_url=None, pr_revision=None, pr_num=None)