# Finetuned LLama3.2 

In [1]:
!pip install -q accelerate peft bitsandbytes transformers trl datasets wandb

In [None]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

login(token='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')

# sample = "24f0457eb697c6e2bacede300d297a1b5cf2986c"

run = wandb.init(
    project='Fine-tune LLMs', 
    job_type="training", 
    anonymous="allow"
)

In [None]:
base_model = "meta-llama/Llama-3.2-1B-Instruct"
dataset_name = "/kaggle/input/llm-dataset/intents.json"
new_model = "llama-3.2-job-bot"

In [4]:
torch_dtype = torch.float16
attn_implementation = "eager"

In [6]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)

# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model)
model, tokenizer = setup_chat_format(model, tokenizer)

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

In [7]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)

In [8]:
import json
import pandas as pd

# Load JSON data from a file
with open(dataset_name) as file:
    data = json.load(file)

# Prepare the DataFrame
rows = []
for intent in data['intents']:
    tag = intent['tag']
    patterns = intent['patterns']
    responses = intent['responses']
    
    # Create combinations of patterns and responses
    for pattern in patterns:
        for response in responses:
            rows.append({'tag': tag, 'patterns': pattern, 'responses': response})

# Create DataFrame
df = pd.DataFrame(rows)

# Display the DataFrame
df.tail()


Unnamed: 0,tag,patterns,responses
1535,job,What jobs can I apply for?,I’d be glad to help. Let me look into some rol...
1536,job,What jobs can I apply for?,No problem! I’ll search for jobs that match yo...
1537,job,What jobs can I apply for?,I’m happy to assist you with this. I’ll start ...
1538,job,What jobs can I apply for?,Let’s find some job openings that align with y...
1539,job,What jobs can I apply for?,I’m here to help! Let’s begin the search for j...


In [11]:
# Assuming dataset is a pandas DataFrame
import pandas as pd


def format_chat_template(row):
    row_json = [
        {"role": "user", "content": row["patterns"]},
        {"role": "assistant", "content": row["responses"]}
    ]
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    return row

# Apply the function to each row in the DataFrame
dataset = df.apply(format_chat_template, axis=1)

# Access the text of the 4th item (index 3)
print(dataset['text'][99])


<|im_start|>user
Wassup<|im_end|>
<|im_start|>assistant
Hi! Feel free to share what’s on your mind today!<|im_end|>



In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Assuming 'dataset' is your DataFrame
train, test = train_test_split(dataset, test_size=0.2)

In [13]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)



In [14]:
from datasets import Dataset

# Assuming `train` and `test` are your DataFrames
train_dataset = Dataset.from_pandas(train)
eval_dataset = Dataset.from_pandas(test)


In [15]:
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,  
    eval_dataset=eval_dataset,   
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/1232 [00:00<?, ? examples/s]

Map:   0%|          | 0/308 [00:00<?, ? examples/s]

In [16]:
trainer.train()



Step,Training Loss,Validation Loss
124,1.0651,0.943128
248,0.7085,0.763315
372,0.6699,0.588758
496,0.4835,0.508631


We detected that you are passing `past_key_values` as a tuple and this is deprecated and will be removed in v4.43. Please use an appropriate `Cache` class (https://huggingface.co/docs/transformers/v4.41.3/en/internal/generation_utils#transformers.Cache)


TrainOutput(global_step=616, training_loss=0.8181127817793326, metrics={'train_runtime': 343.8537, 'train_samples_per_second': 3.583, 'train_steps_per_second': 1.791, 'total_flos': 201093033467904.0, 'train_loss': 0.8181127817793326, 'epoch': 1.0})

In [17]:
wandb.finish()
model.config.use_cache = True

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,█▅▂▁
eval/runtime,█▁▂▆
eval/samples_per_second,▁█▇▃
eval/steps_per_second,▁█▇▃
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▇▅█▄▄▃▂▃▂▃▂▂▂▂▂▂▁▂▂▁▂▃▂▂▃▂▁▂▂▁▁▄▁▂▃▂▁▂▂▁
train/learning_rate,▇███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▃▃▃▃▃▂▂▂▂▂▂▁▁▁
train/loss,█▆▄▃▃▃▂▂▃▃▂▂▂▂▂▁▂▂▂▂▂▃▂▁▂▂▁▁▂▁▁▁▁▁▂▁▁▂▁▂

0,1
eval/loss,0.50863
eval/runtime,24.3792
eval/samples_per_second,12.634
eval/steps_per_second,12.634
total_flos,201093033467904.0
train/epoch,1.0
train/global_step,616.0
train/grad_norm,1.86327
train/learning_rate,0.0
train/loss,0.584


In [18]:
messages = [
    {
        "role": "user",
        "content": "你好，我正在找工作？"
    }
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, 
                                       add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt', padding=True, 
                   truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=150, 
                         num_return_sequences=1)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

output = text.split("assistant")

# Extracting the first assistant response
first_assistant_response = next((text.strip() for text in output if text.startswith('\n')), None)

print(first_assistant_response)

Sure thing, happy to help! I’ll begin searching for positions that would be a great fit for you. Let me know when you’re ready to receive the results.


In [19]:
trainer.model.save_pretrained(new_model)
trainer.model.push_to_hub(new_model, use_temp_dir=False)



adapter_model.safetensors:   0%|          | 0.00/1.10G [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/Kronze/llama-3.2-job-bot/commit/6c15b42c0badb74b98efdeacca5ce1ebaac13db8', commit_message='Upload model', commit_description='', oid='6c15b42c0badb74b98efdeacca5ce1ebaac13db8', pr_url=None, pr_revision=None, pr_num=None)

# Simple llama3.2 tinymodel

In [None]:
%cd /kaggle/working
!git clone --depth=1 https://github.com/ggerganov/llama.cpp.git
%cd /kaggle/working/llama.cpp
!sed -i 's|MK_LDFLAGS   += -lcuda|MK_LDFLAGS   += -L/usr/local/nvidia/lib64 -lcuda|' Makefile
!LLAMA_CUDA=1 conda run -n base make -j > /dev/null

/kaggle/working
fatal: destination path 'llama.cpp' already exists and is not an empty directory.
/kaggle/working/llama.cpp


In [None]:
!python convert-hf-to-gguf.py /kaggle/input/fine-tuned-adapter-to-full-model/llama-3-8b-chat-doctor/ \
    --outfile /kaggle/working/llama-3-8b-chat-doctor.gguf \
    --outtype f16

In [1]:
from transformers import AutoModel, AutoTokenizer

# Load the model and tokenizer
model1 = "Kronze/llama-3.2-job-bot"
model2 = ''
model1 = AutoModel.from_pretrained(model1)
tokenizer = AutoTokenizer.from_pretrained(model_name1)

# Prepare your input
input_text = "What job opportunities do you have?"
inputs = tokenizer(input_text, return_tensors="pt")

# Generate output
with torch.no_grad():  # Disable gradient calculation for inference
    outputs = model(**inputs)

# Print response (modify based on your output processing)
print(outputs)


OSError: Kronze/llama-3.2-job-bot does not appear to have a file named config.json. Checkout 'https://huggingface.co/Kronze/llama-3.2-job-bot/tree/main' for available files.