In [1]:
%cd ../
%ls

/workspace/finetuning-model-qlora
README.md     bash.sh    [0m[01;34mmodel_lora[0m/  requirements.txt  train.py
[01;34m__pycache__[0m/  config.py  [01;34mnotebook[0m/    [01;34mresults[0m/          [01;34mwandb[0m/


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:

import os
from huggingface_hub import login

# !git config --global credential.helper store
login(token=os.environ.get("HF_TOKEN"), add_to_git_credential=True)

Token is valid (permission: write).
Your token has been saved in your configured git credential helpers (store).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [4]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import pandas as pd
from string import Template
from pathlib import Path
from tqdm.notebook import tqdm

# for training
from peft import LoraConfig, get_peft_model
from transformers import TrainingArguments, EarlyStoppingCallback
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

# for traing set
from datasets import load_dataset, Dataset, concatenate_datasets
# from langchain.prompts import PromptTemplate
import matplotlib.pyplot as plt
import bitsandbytes as bnb
import numpy as np
import pandas as pd

# notebook specific
from IPython.display import Markdown, display
from tqdm import tqdm


import config

In [5]:
def modify_dataset_2_record(record):
    input_value = record['input'] if record['input'] is not None else ""
    context_value = record['context'] if record['context'] is not None else ""
    
    return {
        'instruction': record['instruction'],
        'context': input_value + context_value[:512],
        'response': record['output'],
        'category': None  
    }

def modify_dataset_3_record(record):
    input_value = record['input'] if record['input'] is not None else ""
    context_value = record['_context'] if '_context' in record and record['_context'] is not None else ""
    combined_context = input_value + " " + context_value  

    return {
        'instruction': f"(Quality: {record['quality_gain']}) + {record['instruction']}",
        'context': combined_context,
        'response': record['output'],
        'category': None 
    }

def modify_dataset_4_record(record):
    context_value = record['input'] + record['conversations'] if (record['input'] is not None and record['conversations'] is not None) else ""
    instruction_value = record['prompt'] if record['prompt'] is not None else ""
    response_value = record['completion'] if record['completion'] is not None else ""
    
    return {
        'instruction': instruction_value,
        'context': context_value,
        'response': response_value,
        'category': None 
    }

def create_combined_text(record):
    instruction_value = record['instruction'] if record['instruction'] is not None else ""
    context_value = record['context'] if record['context'] is not None else ""
    response_value = record['response'] if record['response'] is not None else ""
    
    # Wrapping the instruction with [INST] tokens
    instruction_formatted = f"<s> [INST] {instruction_value} [/INST]"
    
    return {
        'combined_text': instruction_formatted +
                         '### context: ' + context_value[:1280] +
                         '### response: ' + response_value +' </s>' 
    }

In [6]:
dataset_1 = load_dataset("lingjoor/databricks-dolly-15k-context-32k-rag")
dataset_1

DatasetDict({
    train: Dataset({
        features: ['instruction', 'context', 'response', 'category'],
        num_rows: 15011
    })
})

In [7]:
# dataset_2 =  load_dataset("lingjoor/longalpaca-12k-context-32k-rag")
# dataset_2

In [8]:
dataset_3 =  load_dataset("lingjoor/platypus_with_quality_score")
dataset_3

DatasetDict({
    train: Dataset({
        features: ['input', 'output', 'instruction', 'data_source', '_input', 'reward', 'ppl', 'len', '_context', 'naturalness', 'coherence', 'groundedness', 'understandability', 'seq_length', 'knn_1', 'knn_2', 'knn_3', 'knn_4', 'knn_5', 'knn_7', 'knn_8', 'knn_9', 'knn_10', 'expected_loss', 'row_id', 'quality_gain'],
        num_rows: 24000
    })
})

In [9]:
dataset_4 =  load_dataset("lingjoor/lima_with_scores")
dataset_4

DatasetDict({
    train: Dataset({
        features: ['conversations', 'source', 'prompt', 'completion', 'input', 'reward', 'len', 'knn_1', 'knn_2', 'knn_3', 'knn_4', 'knn_5', 'knn_6', 'knn_7', 'knn_8', 'knn_9', 'knn_10', 'expected_loss'],
        num_rows: 1000
    })
})

In [10]:
dataset_5 =  load_dataset("alexMTL/guanaco_q_a_dataset_1k")
dataset_5

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 1000
    })
})

In [11]:
# dataset_2_modified = dataset_2['train'].map(modify_dataset_2_record)

In [12]:
dataset_3_modified = dataset_3['train'].map(modify_dataset_3_record)

In [13]:
dataset_4_modified = dataset_4['train'].map(modify_dataset_4_record)

In [14]:
dataset_5_modified = dataset_5['train'].map(lambda record: {'combined_text': record['text']})
dataset_5_modified

Dataset({
    features: ['text', 'combined_text'],
    num_rows: 1000
})

In [15]:
concatenated_dataset = concatenate_datasets([dataset_1['train'], dataset_3_modified, dataset_4_modified, dataset_5_modified])


In [16]:
final_dataset = concatenated_dataset.map(create_combined_text)

In [17]:
# support llama base only

model_name = config.model_name

bnb_config = BitsAndBytesConfig(
    load_in_4bit=config.use_4bit,
    bnb_4bit_use_double_quant=config.use_nested_quant,
    bnb_4bit_quant_type=config.bnb_4bit_quant_type,
    bnb_4bit_compute_dtyp=config.bnb_4bit_compute_dtype,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    trust_remote_code=True
)
# this should be set as False for finetuning
model.config.use_cache = False
model.config.pretraining_tp = 1

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True,  use_fast=True)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [18]:
model

MistralForCausalLM(
  (model): MistralModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x MistralDecoderLayer(
        (self_attn): MistralAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): MistralRotaryEmbedding()
        )
        (mlp): MistralMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): MistralRMSNorm()
        (post_attention_layernorm): MistralRMSNorm()
      )

In [19]:
def find_linear_layers(model):
    """ find linear layers in given transformer model """
    lora_module_names = set()
    for name, module in model.named_modules():
        # 4 bits for qlora
        if isinstance(module, bnb.nn.Linear4bit): 
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    if 'lm_head' in lora_module_names:
        lora_module_names.remove('lm_head')
    print(f"LoRA module names: {list(lora_module_names)}")
    return list(lora_module_names)


target_modules = find_linear_layers(model)

#for llama 2 (they need different target module)
qlora_config = LoraConfig(
            r=config.lora_r,
            lora_alpha=config.lora_alpha,
            target_modules=target_modules,
            lora_dropout=config.lora_dropout,
            bias="none",
            task_type="CAUSAL_LM",
        )

LoRA module names: ['gate_proj', 'o_proj', 'down_proj', 'up_proj', 'q_proj', 'v_proj', 'k_proj']


In [20]:
import wandb
wandb.init(project="1-epoch-dolly-15k-context-32k-rag-platypus-lima-guanaco-neft-qlora")

[34m[1mwandb[0m: Currently logged in as: [33mnat-nitarach[0m ([33mlingjoor[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [21]:
# "max_steps=1" is just for testing execution
training_args = TrainingArguments(
    output_dir=config.output_dir,
    num_train_epochs=config.num_train_epochs,
    per_device_train_batch_size=config.per_device_train_batch_size,
    # gradient_accumulation_steps=config.gradient_accumulation_steps,
    optim=config.optim,
    # logging_steps=config.logging_steps,
    learning_rate=config.learning_rate,
    weight_decay=config.weight_decay,
    fp16=config.fp16,
    bf16=config.bf16,
    max_grad_norm=config.max_grad_norm,
    # max_steps=config.max_steps,
    warmup_ratio=config.warmup_ratio,
    group_by_length=config.group_by_length,
    lr_scheduler_type=config.lr_scheduler_type,
    save_total_limit=config.save_total_limit,
    # evaluation_strategy="no",
    save_strategy="epoch",
    report_to=config.report_to,
    # load_best_model_at_end=config.load_best_model_at_end,
)

In [22]:
# %pip install wandb

In [23]:
# %pip install -U git+https://github.com/huggingface/trl.git -q

In [24]:
trainer = SFTTrainer(
    model=model,
    train_dataset=final_dataset,
    args=training_args,
    tokenizer=tokenizer,
    peft_config=qlora_config,
    dataset_text_field='combined_text',  
    max_seq_length=config.max_seq_length,
    neftune_noise_alpha=config.neftune_noise_alpha,
)



Map:   0%|          | 0/41011 [00:00<?, ? examples/s]



In [None]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss


In [None]:
model_to_save = trainer.model.module if hasattr(trainer.model, 'module') else trainer.model
model_to_save.save_pretrained(config.save_dir)

In [21]:
# merge

In [21]:
# lora_config = LoraConfig.from_pretrained(config.save_dir)
# model_lora = get_peft_model(model, lora_config)

In [22]:
# # List parameter names for both models
# base_model_params = list(model.named_parameters())
# lora_model_params = list(model_lora.named_parameters())

In [23]:
# # Create dictionaries for easier access
# base_model_dict = dict(base_model_params)
# lora_model_dict = dict(lora_model_params)

In [25]:
# for name in base_model_dict:
#     if name in lora_model_dict:
#         base_model_dict[name].data.copy_(lora_model_dict[name].data)

In [31]:
# from peft import prepare_model_for_int8_training

In [22]:
# base_model = AutoModelForCausalLM.from_pretrained(model_name, load_in_8bit=True, torch_dtype=torch.float16, device_map="auto")
# base_model = prepare_model_for_int8_training(base_model)

In [23]:
# model_to_merge = model_lora.from_pretrained(AutoModelForCausalLM.from_pretrained(base_model).to("cuda"), config.save_dir)

In [24]:
# merged_model = model_to_merge.merge_and_unload()
# merged_model.save_pretrained(merged_model)

In [24]:
import torch
from peft import PeftModel
import transformers
import os, time
import tempfile
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel, PeftConfig
from transformers import OPTForCausalLM, AutoTokenizer

In [25]:
BASE_MODEL = model_name
LORA_WEIGHTS = config.save_dir


In [26]:
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
    
model = OPTForCausalLM.from_pretrained(
    BASE_MODEL,
    load_in_8bit=False,
    torch_dtype=torch.float16,
    device_map="auto",
    offload_folder="offload", 
)
    
model = PeftModel.from_pretrained(
    model, 
    LORA_WEIGHTS, 
    torch_dtype=torch.float16,
    device_map="auto",
    offload_folder="offload", 

)


You are using a model of type mistral to instantiate a model of type opt. This is not supported for all configurations of models and can yield errors.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of OPTForCausalLM were not initialized from the model checkpoint at teknium/CollectiveCognition-v1.1-Mistral-7B and are newly initialized: ['model.decoder.layers.2.self_attn.v_proj.weight', 'model.decoder.layers.5.fc2.bias', 'model.decoder.layers.19.self_attn_layer_norm.weight', 'model.decoder.layers.19.fc1.weight', 'model.decoder.layers.15.self_attn.v_proj.weight', 'model.decoder.layers.16.final_layer_norm.bias', 'model.decoder.layers.26.fc1.weight', 'model.decoder.layers.11.self_attn_layer_norm.weight', 'model.decoder.layers.20.fc1.weight', 'model.decoder.layers.27.self_attn_layer_norm.weight', 'model.decoder.layers.19.final_layer_norm.weight', 'model.decoder.layers.17.fc2.weight', 'model.decoder.layers.22.self_attn.v_proj.weight', 'model.decoder.layers.18.self_attn.q_proj.bias', 'model.decoder.layers.1.final_layer_norm.bias', 'model.decoder.layers.27.self_attn.q_proj.bias', 'model.decoder.layers.15.self_attn.q_proj.weight', 'model.decoder.layers.10.self_attn.q_proj.weig

In [27]:
model = model.merge_and_unload()
model.save_pretrained("lingjoor/Mistral-7B-v1.1-Dolly-Longalpaca-Platypus-FT")

In [30]:
model.config.save_pretrained("lingjoor/Mistral-7B-v1.1-Dolly-Longalpaca-Platypus-FT")