In [1]:
from random import randrange
import torch
from datasets import load_dataset
from peft import LoraConfig, prepare_model_for_kbit_training, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    set_seed,
    pipeline,
    TrainerCallback,
    AutoModelForSequenceClassification,
)
from trl import SFTTrainer
from huggingface_hub import login
import os, gc, time, evaluate
import numpy as np
import warnings
from sklearn.metrics import f1_score
import logging
warnings.filterwarnings("ignore")

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

def clear_memory():
    gc.collect()
    torch.cuda.empty_cache()
clear_memory()

os.environ["WANDB_NOTEBOOK_NAME "] = "CHiLPhi3m4"
os.environ["WANDB_PROJECT"] = "CHiLPhi3m4"
os.environ["HF_HUB_TOKEN"] = "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"

login(token=os.getenv("HF_HUB_TOKEN"))
local_model_dir = "./CHiLPhi3m4"

# 'model_id' and 'model_name' are the identifiers for the pre-trained model from Hugging Face hub that you want to fine-tune.
model_id = "microsoft/Phi-3-mini-4k-instruct"
model_name = "microsoft/Phi-3-mini-4k-instruct"
new_model = "CHiLPhi3m4"
hf_model_repo="nguyenkhanh87/"+new_model
dataset_name = "lex_glue"

# Load Model on GPU 
device_map = {"": 0}

# Bits and Bytes configuration for the model
use_4bit = True
bnb_4bit_compute_dtype = "bfloat16"
bnb_4bit_quant_type = "nf4"
use_double_quant = True

# LoRA configuration for the model
lora_r = 16
lora_alpha = 16
lora_dropout = 0.05
target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"]
set_seed(1234)

isSetupParams = False
if isSetupParams:
    dataset = load_dataset("lex_glue", "case_hold")
    patience = 1
    train_batch_size = 1
    eval_batch_size = 1
    evalsteps = 1
    logsteps = 1
    num_train_epochs = 1
    save_step = 1
else:
    dataset = load_dataset("lex_glue", "case_hold")
    patience = 10
    train_batch_size = 4
    eval_batch_size = 2
    evalsteps = 500
    logsteps = 500
    num_train_epochs = 5
    save_step = 2000

tokenizer_id = model_id
tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
tokenizer.padding_side = 'right'

def create_message_column(row):
    messages = []
    answers = "".join([f"{i}. {val}\n" for i, val in enumerate(row['endings'])])
    user = {
        "content": f"Choose best option to fill <HOLDING> base on this context, answer only with the number of option(for example: 0):\n{row['context']}\n Options to choose:\n {answers}",
        "role": "user"
    }
    messages.append(user)
    assistant = {
        "content": f"{row['label']}",
        "role": "assistant"
    }
    messages.append(assistant)
    return {"messages": messages}

def create_message_column_val(row):
    messages = []
    answers = "".join([f"{i}. {val}\n" for i, val in enumerate(row['endings'])])
    user = {
        "content": f"Choose best option to fill <HOLDING> base on this context, answer only with the number of option(for example: 0):\n{row['context']}\n Options to choose:\n {answers}",
        "role": "user"
    }
    messages.append(user)
    return {"messages": messages}

def format_dataset_chatml(row):
    return {"text": tokenizer.apply_chat_template(row["messages"], add_generation_prompt=False, tokenize=False)}

# if isSetupParams is True, take only 20 example
if isSetupParams:
    dataset['train'] = dataset['train'].select(list(range(20)))
    dataset['validation'] = dataset['validation'].select(list(range(20)))

dataset_chatml = dataset['train'].map(create_message_column)
dataset_chatml = dataset_chatml.map(format_dataset_chatml)

dataset_val = dataset['validation'].take(20).map(create_message_column_val)
dataset_val = dataset_val.map(format_dataset_chatml)

if torch.cuda.is_bf16_supported():
  compute_dtype = torch.bfloat16
  attn_implementation = 'flash_attention_2'
else:
  compute_dtype = torch.float16
  attn_implementation = 'sdpa'

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, add_eos_token=True, use_fast=True)
tokenizer.pad_token = tokenizer.unk_token
tokenizer.pad_token_id = tokenizer.convert_tokens_to_ids(tokenizer.pad_token)
tokenizer.padding_side = 'left'

use_4bit = True
bnb_4bit_compute_dtype = "bfloat16"
bnb_4bit_quant_type = "nf4"
use_double_quant = True

bnb_config = BitsAndBytesConfig(
    load_in_4bit=use_4bit,
    bnb_4bit_quant_type=bnb_4bit_quant_type,
    bnb_4bit_compute_dtype=compute_dtype,
    bnb_4bit_use_double_quant=use_double_quant,
)

model = AutoModelForCausalLM.from_pretrained(
          model_name, torch_dtype=compute_dtype, trust_remote_code=True, quantization_config=bnb_config, device_map=device_map,
          attn_implementation=attn_implementation
)

#model2 = AutoModelForSequenceClassification.from_pretrained(model_name, torch_dtype=compute_dtype, trust_remote_code=True, quantization_config=bnb_config, device_map=device_map).base_model

model = prepare_model_for_kbit_training(model)

rouge = evaluate.load("rouge")
# f1 score compute metric for the model
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    logger.info(f'Predictions: {predictions}')
    logger.info(f'Labels: {labels}')
    
    predictions = np.argmax(predictions, axis=1)
    return {"f1": f1_score(y_true=labels, y_pred=predictions)}

class CustomEarlyStoppingCallback(TrainerCallback):
    def __init__(self, patience=5, threshold=0.0):
        self.patience = patience
        self.threshold = threshold
        self.best_loss = float('inf')
        self.patience_counter = 0

    def on_train_begin(self, args, state, control, **kwargs):
        self.patience_counter = 0
        self.best_loss = float('inf')

    def on_step_end(self, args, state, control, **kwargs):
        if state.epoch > 1: # at least 1 epoch
            if state.log_history:
                current_loss = state.log_history[0].get('loss')
                if current_loss is not None:
                    if current_loss < self.best_loss - self.threshold:
                        self.best_loss = current_loss
                        self.patience_counter = 0
                    else:
                        self.patience_counter += 1

                    if self.patience_counter >= self.patience:
                        control.should_training_stop = True
                        print("Early stopping triggered!")
        return control

early_stopping = CustomEarlyStoppingCallback(patience=patience)

TRAINER_STATE_NAME = "trainer_state.json"
PREFIX_CHECKPOINT_DIR = "checkpoint"

class CustomSFTTrainer(SFTTrainer):
    def _save_checkpoint(self, model, trial=None, metrics=None):
        # Copy from Trainer but remove 2 lines to avoid eval_ prefix
        checkpoint_folder = f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}"
        if self.hp_search_backend is None and trial is None:
            self.store_flos()
        run_dir = self._get_output_dir(trial=trial)
        output_dir = os.path.join(run_dir, checkpoint_folder)
        self.save_model(output_dir, _internal_call=True)
        if not self.args.save_only_model:
            self._save_optimizer_and_scheduler(output_dir)
            self._save_rng_state(output_dir)
        if metrics is not None and self.args.metric_for_best_model is not None:
            metric_to_check = self.args.metric_for_best_model
            print(metrics) # print metrics to check what is available
            if metric_to_check in metrics:
                metric_value = metrics[metric_to_check]
                operator = np.greater if self.args.greater_is_better else np.less
                if (
                    self.state.best_metric is None
                    or self.state.best_model_checkpoint is None
                    or operator(metric_value, self.state.best_metric)
                ):
                    self.state.best_metric = metric_value
                    self.state.best_model_checkpoint = output_dir
        if self.args.should_save:
            self.state.stateful_callbacks["TrainerControl"] = self.control.state()
            self.state.save_to_json(os.path.join(output_dir, TRAINER_STATE_NAME))
        if self.args.push_to_hub:
            self._push_from_checkpoint(output_dir)
        if self.args.should_save:
            self._rotate_checkpoints(use_mtime=False, output_dir=run_dir)

run_name = time.strftime("%Y-%m-%d-%H-%M")

args = TrainingArguments(
    output_dir=local_model_dir,
    eval_strategy="steps",
    save_strategy="steps",
    do_eval=True,
    optim="adamw_torch",
    per_device_train_batch_size=train_batch_size,
    gradient_accumulation_steps=4,
    per_device_eval_batch_size=eval_batch_size,
    log_level="debug",
    learning_rate=1e-4,
    fp16 = not torch.cuda.is_bf16_supported(),
    bf16 = torch.cuda.is_bf16_supported(),
    eval_steps=evalsteps,
    logging_steps=logsteps,
    num_train_epochs=num_train_epochs,
    warmup_ratio=0.1,
    lr_scheduler_type="linear",
    report_to=None,
    seed=342,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    run_name=run_name,
    save_steps=save_step,
)

peft_config = LoraConfig(
    r=lora_r,
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    target_modules=target_modules,
)
 
trainer = CustomSFTTrainer(
    model=model,
    train_dataset=dataset_chatml,
    eval_dataset=dataset_val,
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=args,
    # compute_metrics=compute_metrics,
    # callbacks=[early_stopping],
)

trainer.train()
trainer.save_model(local_model_dir)

# trainer.push_to_hub(hf_model_repo)

tokenizer.save_pretrained(local_model_dir)
model.save_pretrained(local_model_dir)

trainer.push_to_hub(hf_model_repo)
tokenizer.push_to_hub(hf_model_repo)
model.push_to_hub(hf_model_repo)

del model
del tokenizer
del trainer
gc.collect()
torch.cuda.empty_cache()



  from .autonotebook import tqdm as notebook_tqdm


The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/nhk/.cache/huggingface/token
Login successful


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Map: 100%|██████████| 200/200 [00:00<00:00, 3975.74 examples/s]
Map: 100%|██████████| 200/200 [00:00<00:00, 5091.16 examples/s]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 2/2 [00:03<00:00,  1.88s/it]
Map: 100%|██████████| 200/200 [00:00<00:00, 2500.27 examples/s]
Using auto half precision backend
Currently training with a batch size of: 4
***** Running training *****
  Num examples = 45,000
  Num Epochs = 5
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 4
  Total optimization steps = 14,060
  Number of trainable parameters = 8,912,896
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"
ERROR:wandb.jupyter:Failed



Step,Training Loss,Validation Loss
500,1.7525,No log
1000,1.5256,No log
1500,1.513,No log


***** Running Evaluation *****
  Num examples = 200
  Batch size = 2
***** Running Evaluation *****
  Num examples = 200
  Batch size = 2
***** Running Evaluation *****
  Num examples = 200
  Batch size = 2
