In [1]:
import os
import torch
from datasets import load_dataset

from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig
from trl import SFTTrainer
def create_model(model_path):    
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForCausalLM.from_pretrained(model_path, device_map="auto",torch_dtype=torch.bfloat16).half()
    tokenizer.padding_side = 'left'
    return tokenizer, model


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model_path = "microsoft/Phi-3.5-mini-instruct"
tokenizer, model = create_model(model_path)

Downloading shards: 100%|██████████| 2/2 [11:19<00:00, 339.88s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00,  1.07it/s]


In [3]:
tokenizer, model = create_model(model_path)
data_file = "data/train_data.json"
eval_data_file = "data/dev_data.json"
dataset = load_dataset("json", data_files=data_file, split="train")
eval_dataset = load_dataset("json", data_files=eval_data_file, split="train")

Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.12s/it]


In [None]:
for name, param in model.named_parameters():
    print(name)

In [None]:
from peft import LoraConfig, TaskType, get_peft_model
new_model = "_lora_tuning"
peft_params = LoraConfig(
    target_modules=["q_proj", "v_proj"],
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)
peft_params

In [None]:
from peft import LoraConfig, TaskType, get_peft_model
model = get_peft_model(model, peft_params)
peft_params

In [None]:
training_params = TrainingArguments(
    output_dir="./results",
    num_train_epochs = 1,
    per_device_train_batch_size=2,
    per_device_eval_batch_size = 2,
    gradient_accumulation_steps=2,
    optim="paged_adamw_8bit",
    warmup_steps=1000,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=200,
    save_steps=2000,
    do_train = True,
    do_eval = True,
    evaluation_strategy ="steps",
    eval_steps = 2000,
    push_to_hub=False,
    report_to='wandb',
)

In [None]:
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [None]:
trainer = SFTTrainer(
        model=model,
        train_dataset=dataset,
        eval_dataset=eval_dataset,
        peft_config=peft_params,
        dataset_text_field="text",
        max_seq_length=2048,
        tokenizer=tokenizer,
        args=training_params,
        packing=False,
    )

In [None]:
trainer.train()

In [None]:
trainer.save_model(new_model)