In [6]:
# ! pip3 install unsloth

In [1]:
import json
from datasets import Dataset

with open("people_data.json", 'r') as f:
    data = json.load(f)

    tuning_examples = []

    for example in data:
        tuning_examples.append(f"<|user|>\n{example['prompt']}\n<|assistant|>\n{json.dumps(example['response'])}<|endoftext|>")

dataset = Dataset.from_dict({'text':tuning_examples})

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3-mini-4k-instruct-bnb-4bit",
    max_seq_length = 2048, 
    dtype = None, 
    load_in_4bit = True
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model, 
    r = 64, 
    target_modules = [
        'q_proj', 'k_proj', 'v_proj','o_proj', 'gate_proj', 'up_proj','down_proj'
    ],
    lora_alpha = 64 *2,
    lora_dropout = 0, 
    bias = 'none',
    use_gradient_checkpointing = 'unsloth'
    
)

In [None]:
from trl import SFTTrainer, SFTConfig

trainer = SFTTrainer(
    model = model, 
    train_dataset = dataset, 
    tokenizer = tokenizer, 
    dataset_text_filed = 'text',
    max_seq_length = 2048,
    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, 
        warmup_steps= 10,
        max_steps = 60, 
        num_train_epochs = 3, 
        logging_steps= 1,
        output_dir = 'outputs',
        optim = 'adamw_8bit'
    )
    
)

In [None]:
trainer.train()

In [None]:
FastLanguageModel.for_inference(model)

messages = [
    {
        'role': 'user',
        'content': "Mike is a 30 year old programmer. He loves hiking."
    }
]

inputs = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors='pt').to('cuda')

outputs = model.generate(input_ids=inputs, max_new_tokens = 512, use_cache=True, temperature = 0.7, do_sample=True, top_p=0.9)

response = tokenizer.batch_decode(outputs)[0]

print(response)

In [None]:
model.save_pretrained_gguf('fineturned_model', tokenizer, quantization_method='q4_k_m', maximum_memory_usage=0.3)