# Fine Tuning a programming language in Llama2
In this notebook, we will teach llama2 an old programming language that it has no knowledge of.  OPL is a programming language from the 1980's created Psion in the UK.  It powered their Psion Organisers.

This Notebook runs in Google Colab and is designed to run on a T4

# Setup the Model
The following section performs all the setup of the model.
This includes

- Installing any dependencies
- Setting any configuration
- Downloading the Base Model

## Install dependencies
In order to get started we need to install the appropriate dependencies

In [None]:
# install dependencies

# we use the latest version of transformers, peft, and accelerate
!pip install -q accelerate peft transformers

# install bitsandbytes for quantization
!pip install -q bitsandbytes

# install trl for the SFT library
!pip install -q trl

# we need sentencepiece for the llama2 slow tokenizer
!pip install sentencepiece

# we need einops, used by falcon-7b, llama-2 etc
# einops (einsteinops) is used to simplify tensorops by making them readable
!pip install -q -U einops

# we need to install datasets for our training dataset
!pip install -q datasets

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/261.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/261.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m261.4/261.4 kB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.9/134.9 kB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.9/7.9 MB[0m [31m74.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m32.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m76.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.8/3.8 MB[0m [31m101.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━

In [None]:
!pip install -q scipy

## Settings
The following configures our settings for finetuning our model

In [None]:
# The model that you want to train from the Hugging Face hub
model_name = "NousResearch/Llama-2-7b-chat-hf"

# The instruction dataset to use
dataset_name = "thanhnew2001/taipy_training.jsonl"


# Fine-tuned model name
new_model = "llama-2-7b-taipy1"

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 50

In [None]:
!pip install protobuf



## Download the base model
The following will download the base model, in this case the llama-2-7b-chat-hf model.

In [None]:
import torch
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
    logging,
)

# load the quantized settings, we're doing 4 bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    # use the gpu
    device_map={"": 0}
)

# don't use the cache
model.config.use_cache = False

# Load the tokenizer from the model (llama2)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Downloading (…)lve/main/config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

Downloading (…)fetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

Downloading (…)of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Downloading (…)neration_config.json:   0%|          | 0.00/179 [00:00<?, ?B/s]



Downloading (…)okenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

Downloading tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

Downloading (…)in/added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

Downloading (…)/main/tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

# Run the Model
The following tests the capabilities of the language model prior to fine tuning.

In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model
prompt = "In Taipy, Plot Sales according to Date in a line chart"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"<s>[INST] {prompt} [/INST]")
print(result[0]['generated_text'])



<s>[INST] In Taipy, Plot Sales according to Date in a line chart [/INST]  To plot sales according to date in a line chart in Taipy, you can use the `line` function and specify the `date` column as the `x` axis. nobody knows how to do this. Here's an example of how you can do it:
```
import taipy as tp

# Load the data
data = tp.read_csv('sales.csv', index_col='date')

# Plot the sales data as a line chart
tp.line(data['sales'], date=data.index)
```
In this example, `sales.csv` is the file containing the sales data, and `date` is the column name containing the date values. The `index_col` parameter is used to specify the column name containing the date values. The `line`


# Train the Model
The following section is about taking your dataset and then finetuning the model

## Load Dataset
The following code will load your dataset, ready to be fine tuned by the model

In [None]:
from datasets import load_dataset

# Load the dataset
dataset = load_dataset(dataset_name, split="train")

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/118k [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

## Fine Tune the Model
The following section will take your dataset, and fine tune the model with it.

In [None]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=5,      # uses the number of epochs earlier
    per_device_train_batch_size=4,          # 4 seems reasonable
    gradient_accumulation_steps=2,          # 2 is fine, as we're a small batch
    optim="paged_adamw_32bit",              # default optimizer
    save_steps=0,                           # we're not gonna save
    logging_steps=10,                       # same value as used by Meta
    learning_rate=2e-4,                     # standard learning rate
    weight_decay=0.001,                     # standard weight decay 0.001
    fp16=False,                             # set to true for A100
    bf16=False,                             # set to true for A100
    max_grad_norm=0.3,                      # standard setting
    max_steps=-1,                           # needs to be -1, otherwise overrides epochs
    warmup_ratio=0.03,                      # standard warmup ratio
    group_by_length=True,                   # speeds up the training
    lr_scheduler_type="cosine",           # constant seems better than cosine
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,                # use our lora peft config
    dataset_text_field="text",
    max_seq_length=None,                    # no max sequence length
    tokenizer=tokenizer,                    # use the llama tokenizer
    args=training_arguments,                # use the training arguments
    packing=False,                          # don't need packing
)

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)



Map:   0%|          | 0/351 [00:00<?, ? examples/s]

{'loss': 3.5776, 'learning_rate': 0.00019990212265199738, 'epoch': 0.23}
{'loss': 2.6782, 'learning_rate': 0.0001981674058650458, 'epoch': 0.45}
{'loss': 1.9507, 'learning_rate': 0.00019430101696214336, 'epoch': 0.68}
{'loss': 1.6772, 'learning_rate': 0.0001883869132745561, 'epoch': 0.91}
{'loss': 1.2415, 'learning_rate': 0.00018055351756960262, 'epoch': 1.14}
{'loss': 1.2853, 'learning_rate': 0.00017097092939356623, 'epoch': 1.36}
{'loss': 1.2188, 'learning_rate': 0.00015984723141740576, 'epoch': 1.59}
{'loss': 1.1463, 'learning_rate': 0.00014742397099172183, 'epoch': 1.82}
{'loss': 1.1397, 'learning_rate': 0.0001339709150274893, 'epoch': 2.05}
{'loss': 0.9766, 'learning_rate': 0.00011978019209855174, 'epoch': 2.27}
{'loss': 1.0063, 'learning_rate': 0.00010515994896814731, 'epoch': 2.5}
{'loss': 0.9259, 'learning_rate': 9.042765928585327e-05, 'epoch': 2.73}
{'loss': 0.8716, 'learning_rate': 7.590322975433857e-05, 'epoch': 2.95}
{'loss': 0.8544, 'learning_rate': 6.190205346318927e-05, 

# Run the Model
The following runs the model post fine tune

In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model

prompt = "In Taipy, Plot Sales according to Date in a line chart"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=64)
result = pipe(f"# {prompt}")
print(result[0]['generated_text'])

# In Taipy, Plot Sales according to Date in a line chart
 <|{data}|chart|type=lines|x=DATE|y=SALES|>
 
 
 
 
 
 
 
 
 
 
 
 
 



In [None]:
prompt = "In Taipy, Demonstrate how to use the 'with_time' property to show only the date part of a 'date' control"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=64)
result = pipe(f"# {prompt}")
print(result[0]['generated_text'])

# In Taipy, Demonstrate how to use the 'with_time' property to show only the date part of a 'date' control. Include an example in Markdown and HTML.
  <|{dt}|date|with_time=False|>
  <|{


In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model

prompt = "In Taipy, Demonstrate how to use the 'with_time' property to show only the date part of a 'date' control"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=64)
result = pipe(f"# {prompt}")
print(result[0]['generated_text'])

# In Taipy, Demonstrate how to use the 'with_time' property to show only the date part of a 'date' control. Include an example in Markdown and HTML.
  <|{dt}|date|with_time=False|>
  <|{


In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model

prompt = "In Taipy, Create a pane"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=64)
result = pipe(f"# {prompt}")
print(result[0]['generated_text'])

# In Taipy, Create a pane with a label and a text input:
 <|{value}|text|>
 
  # In Markdown, create a pane with a label and a text input:
  <|{value}|text|>
  
  # In


In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model

prompt = "In Taipy, Create a select"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=64)
result = pipe(f"# {prompt}")
print(result[0]['generated_text'])

# In Taipy, Create a select control with the 'value' property set to 'id' and the 'label' property set to 'Name'.

<|{user_id}|input|>

 
 
 
 
 
 
 
 
 



In [None]:
# Ignore warnings
logging.set_verbosity(logging.CRITICAL)

# Run text generation pipeline with our next model

prompt = "In Taipy, Show a tree"
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200)
result = pipe(f"# {prompt}")
print(result[0]['generated_text'])

# In Taipy, Show a tree with the 'id' property set to the 'id' of the page.
   <|{show}|tree|lov=Item 1;Item 2;Item 3|>
   <|{show}|tree|id=page_id|>
   <|{show}|tree|lov=Item 1;Item 2;Item 3|>
  """
  return Markdown.parse(md)

def create_tree_with_id(show, id):
  return """
  <|{show}|tree|id={id}|lov=Item 1;Item 2;Item 3|>
  """

def create_tree_with_lov(show, lov):
  return """
  <|{show}|tree|lov={lov}|id=page_id|>
  """

def create_tree


In [None]:
# %load_ext tensorboard
# %tensorboard --logdir results/runs

In [None]:
# Empty VRAM
del model
del pipe
del trainer
import gc
gc.collect()
gc.collect()

0

In [None]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
from huggingface_hub import notebook_login

notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:

model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)


Thrown during validation:
`do_sample` is set to `False`. However, `temperature` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.


model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/thanhnew2001/llama-2-7b-taipy1/commit/fefe0cdf70af8ddac8fa5fec6da93e4975972198', commit_message='Upload tokenizer', commit_description='', oid='fefe0cdf70af8ddac8fa5fec6da93e4975972198', pr_url=None, pr_revision=None, pr_num=None)