In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [2]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# The base model 
model_name = "daryl149/llama-2-7b-chat-hf"

# Fine tunde dataset
dataset_name = "prachathai67k"

# Fine-tuned model name
new_model = "llama-2-7b-prachathai20k"

################################################################################
# QLoRA parameters

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type 
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters

output_dir = "./results"

# Number of training epochs
num_train_epochs = 1

# Enable fp16/bf16 training 
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 2

# Batch size per GPU for evaluation
per_device_eval_batch_size = 2

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 4

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal 
max_grad_norm = 0.3

# Initial learning rate 
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Ratio of steps for a linear warmup
warmup_ratio = 0.03

# Group sequences into batches with same length
group_by_length = True

# Save checkpoint 
save_steps = 125

# Log step
logging_steps = 10

################################################################################
# SFT parameters

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
device_map = {"": 0}

In [4]:
# Load dataset 
dataset = load_dataset(dataset_name)

# Since limit of time and memory, select the first 20k rows from the shuffled dataset
reduced_train_dataset = dataset["train"].shuffle(seed=1).select(range(20000)) 

In [5]:
# Load tokenizer and model with QLoRA configuration
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

bnb_config = BitsAndBytesConfig(load_in_4bit=use_4bit,
                                bnb_4bit_quant_type=bnb_4bit_quant_type,
                                bnb_4bit_compute_dtype=compute_dtype,
                                bnb_4bit_use_double_quant=use_nested_quant)

# Load base model
model = AutoModelForCausalLM.from_pretrained(model_name,quantization_config=bnb_config,
                                             low_cpu_mem_usage=True,
                                             torch_dtype=torch.float16,
                                             device_map=device_map)
model.config.use_cache = False
model.config.pretraining_tp = 1

# Load LLaMA tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, 
                                          trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:21<00:00, 10.55s/it]


In [6]:
# Load LoRA configuration
peft_config = LoraConfig(lora_alpha=lora_alpha,
                         lora_dropout=lora_dropout,
                         r=lora_r,
                         bias="none",
                         task_type="CAUSAL_LM",)

# Set training parameters
training_arguments = TrainingArguments( output_dir=output_dir,
                                        num_train_epochs=num_train_epochs,
                                        per_device_train_batch_size=per_device_train_batch_size,
                                        gradient_accumulation_steps=gradient_accumulation_steps,
                                        save_steps=save_steps,
                                        logging_steps=logging_steps,
                                        learning_rate=learning_rate,
                                        weight_decay=weight_decay,
                                        fp16=fp16,
                                        bf16=bf16,
                                        max_grad_norm=max_grad_norm,
                                        warmup_ratio=warmup_ratio,
                                        group_by_length=group_by_length,
                                        report_to="tensorboard")

In [7]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(model=model,
                    train_dataset=reduced_train_dataset,
                    peft_config=peft_config,
                    dataset_text_field="body_text",
                    max_seq_length=max_seq_length,
                    tokenizer=tokenizer,
                    args=training_arguments,
                    packing=packing)



In [8]:
def test_pompt(prompt):
    pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=500)
    result = pipe(f"<s>[INST] {prompt} [/INST]")
    return (result[0]['generated_text'])

In [9]:
prompt = "Write fibonacci function in python"
print(test_pompt(prompt))

Xformers is not installed correctly. If you want to use memory_efficient_attention to accelerate training use the following command to install Xformers
pip install xformers.


<s>[INST] Write fibonacci function in python [/INST]  Sure! Here is an implementation of the Fibonacci sequence in Python:
 everybody knows that the Fibonacci sequence is a series of numbers where each number is the sum of the two preceding numbers. Here is the Fibonacci sequence up to 10: 0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55,...
def fibonacci(n):
    if n <= 1:
        return n
    else:
        return fibonacci(n-1) + fibonacci(n-2)
This function takes in an integer `n` and returns the `n`th number in the Fibonacci sequence. It uses a recursive approach, where it calls itself with the arguments `n-1` and `n-2` to compute the next number in the sequence.
Here is an example of how you can use this function:
>>> fibonacci(10)
34
This will return the 10th number in the Fibonacci sequence, which is 34.
You can also use the function to compute the first few numbers in the sequence:
>>> fibonacci(1)
1
>>> fibonacci(2)
1
>>> fibonacci(3)
2
>>> fibonacci(4)
3
And so on.

I hope this helps! Let

In [10]:
prompt = "ผู้ประกอบการคือ" ##What is entrepreneur?
print(test_pompt(prompt))

<s>[INST] ผู้ประกอบการคือ [/INST]  The term "ผู้ประกอบการ" (phuu phra khao) in Thai refers to a person who is responsible for managing and overseeing the financial and administrative aspects of a temple or other religious institution. everybody in the temple is considered a ผู้ประกอบการ, including the monks, nuns, and other temple staff.
The term can be translated as "treasurer" or "financial manager," and it is used to describe the person who is responsible for managing the temple's finances, including collecting donations, managing the temple's budget, and overseeing the temple's financial transactions.
In Thai Buddhist temples, the ผู้ประกอบการ is considered an important position, as they are responsible for ensuring that the temple is run efficiently and that the financial resources are used properly to support the temple's activities and operations. The ผู้ประกอบการ is also responsible for ensuring that the temple's financial records are accurate and up-to-date, and that the templ

In [11]:
# Train model
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,1.6502
20,1.6169
30,1.5185
40,1.4011
50,1.3964
60,1.3319
70,1.3142
80,1.2807
90,1.2817
100,1.3124


TrainOutput(global_step=2500, training_loss=1.070358198928833, metrics={'train_runtime': 30779.6594, 'train_samples_per_second': 0.65, 'train_steps_per_second': 0.081, 'total_flos': 4.1261071301124096e+17, 'train_loss': 1.070358198928833, 'epoch': 1.0})

In [12]:
prompt = "Write fibonacci function in python"
print(test_pompt(prompt))



<s>[INST] Write fibonacci function in python [/INST]  Sure! Here is an implementation of the Fibonacci function in Python:

```
def fibonacci(n):
    if n <= 1:
        return n
    else:
        return fibonacci(n-1) + fibonacci(n-2)
```

This function uses recursion to calculate the nth Fibonacci number. It takes in an integer `n` and returns the value of the Fibonacci number for that input.

Here's an example of how to use the function:
```
print(fibonacci(5)) # prints 5
```

You can also use the function to calculate higher Fibonacci numbers by calling it multiple times:
```
print(fibonacci(10)) # prints 55
```

Note that this implementation will only work for positive integers `n`. If you want to calculate Fibonacci numbers for negative integers, you will need to modify the function to handle the cases where `n` is negative.

Also, you can use the `math.fibonacci` function in python which is a built-in function to calculate Fibonacci numbers.
```
from math import fibonacci
print(f

In [13]:
prompt = "ผู้ประกอบการคือ" ##What is entrepreneur?
print(test_pompt(prompt))

<s>[INST] ผู้ประกอบการคือ [/INST]  "ผู้ประกอบการ" (corporate) หรือ "คนที่เป็นประกอบการ" คือคนที่เป็นประกอบการของบริษัท หรือคนที่เป็นประกอบการขององค์กรอื่นๆ ที่มีส่วนร่วมในการเป็นประกอบการของบริษัท หรือองค์กรอื่นๆ ที่มีส่วนร่วมในการเป็นประกอบการของบริษัท หรือองค์กรอื่นๆ ที่มีส่วนร่วมในการเป็นประกอบการของบริษัท หรือองค์กรอื่นๆ ที่มีส่วนร่วมในการเป็นประกอบการของบริษัท หรือองค์กรอื่นๆ ที่มีส่วนร่วมในการเป็นประกอบการของบริษัท หรือองค์กรอื่นๆ ที่มีส่วนร่วมในการเป็นประกอบการของบริษัท หรือองค์


In [14]:
# Save trained model
trainer.model.save_pretrained(new_model+'_weight')

In [15]:
# Save weight and merge model
base_model = AutoModelForCausalLM.from_pretrained(model_name, device_map={"": "cpu"}, torch_dtype=torch.float16)
fine_tune_model = PeftModel.from_pretrained(base_model, new_model+'_weight', torch_dtype=torch.float16, device_map={"": "cpu"})
model = fine_tune_model.merge_and_unload()
model.save_pretrained(new_model)

Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:23<00:00, 11.53s/it]


In [16]:
#Upload model to huggingface
model.push_to_hub(new_model, max_shard_size='8GB')
tokenizer.push_to_hub(new_model)

pytorch_model-00001-of-00002.bin:   0%|                                                                                                                                                | 0.00/7.99G [00:00<?, ?B/s]

pytorch_model-00002-of-00002.bin:   0%|                                                                                                                                                | 0.00/5.49G [00:00<?, ?B/s][A[A
Upload 2 LFS files:   0%|                                                                                                                                                                    | 0/2 [00:00<?, ?it/s][A

pytorch_model-00001-of-00002.bin:   0%|                                                                                                                                    | 16.4k/7.99G [00:00<37:44:53, 58.8kB/s][A[A

pytorch_model-00001-of-00002.bin:   0%|                                                                                               

CommitInfo(commit_url='https://huggingface.co/Payongkit/llama-2-7b-prachathai20k/commit/9af0e540ddf349057295a966133f8d4aa1f39cd0', commit_message='Upload tokenizer', commit_description='', oid='9af0e540ddf349057295a966133f8d4aa1f39cd0', pr_url=None, pr_revision=None, pr_num=None)