<a href="https://colab.research.google.com/github/kithiyonjacobCreations/customer_support/blob/main/LLM_finetne_pynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# FINE TUNE THE MODEL USING UNSLOTH


## load the model


In [None]:
import json

In [None]:
file = json.load(open("/content/data.json", "r"))
print(file[1])

{'prompt': 'I forgot my login details.', 'response': "No problem at all. If you've forgotten your password, you can use the 'Forgot Password' link on the login page. If you've forgotten your username or email, please contact our support team for assistance."}


## set the train the model

In [None]:
!pip install unsloth trl peft accelerate bitsandbytes



In [None]:
import torch
print(f"cuda is availabe {torch.cuda.is_available()}")
print(f"device is {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None'}")


cuda is availabe False
device is None


## set the llama3.1 model for finetuning


In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048
dtype = None
model , tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Meta-Llama-3.1-8B",
    max_seq_length=max_seq_length,
    dtype = dtype,
    load_in_4bit = True)

NotImplementedError: Unsloth currently only works on NVIDIA GPUs and Intel GPUs.

In [None]:
from datasets import Dataset
def format_data(data):
  return f"### Input: {data['prompt']}\n### Response: {data['response']}"
formated_data = [format_data(i) for i in file]
dataset = Dataset.from_dict({"text": formated_data})


## create a model for the training


In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",

    use_gradient_checkpointing = "unsloth
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

In [None]:
from trl import SFTConfig, SFTTrainer
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    packing = False,
    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 10,
        max_steps = 60,
        learning_rate = 2e-4,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

In [None]:
  trainer_stats = trainer.train(  )

In [None]:
FastLanguageModel.for_inference(model)

message=[{"role": "user", "content":"i forgot my password"}]

inputs = tokenizer(
    message[0]["content"],
    return_tensors="pt",
).to('cuda')

outputs=model.generate(
    input_ids=inputs.input_ids,
    max_new_tokens=256,
    use_cache=True,
    temperature=0.7,
    do_sample=True,

    top_p=0.5,
)

Response = tokenizer.batch_decode(outputs)[0]
print(Response)

In [None]:
 model.save_pretrained_gguf("customer-support",tokenizer,quantization_method="q4_k_m")


In [None]:
from google.colab import files
import os

gguf_files = [f for f in os.listdir("customer-support") if f.endswith('.gguf')]
if gguf_files:
    gguf_file = os.path.join("customer-support", gguf_files[0])
    print(f"Downloading {gguf_file}...")
    files.download(gguf_file)
else:
    print("No .gguf files found in the 'customer-support' directory.")