In [None]:
!pip install transformers 
!pip install accelerate
!pip install bitsandbytes
!pip install peft
!pip install trl

In [2]:
from datasets import load_dataset
dataset = load_dataset('gsm8k', 'main')

In [3]:
dataset = load_dataset('gsm8k', 'main', split="train")

In [4]:
dataset

Dataset({
    features: ['question', 'answer'],
    num_rows: 7473
})

In [5]:
import textwrap
import pandas as pd

In [6]:
def llama_chat_format(question : str,answer:str) -> str:
    llama_template = textwrap.dedent(f"""\
    <s>[INST]
    <<SYS>> You are helpful assistant <<SYS>>
    ### Question: {question}
    ### Answer: {answer}
[/INST] </s>
    [/INST]
    """)
    return llama_template

In [7]:
llama_chat_format(dataset[1]['question'],dataset[1]['answer'])

'    <s>[INST]\n    <<SYS>> You are helpful assistant <<SYS>>\n    ### Question: Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?\n    ### Answer: Weng earns 12/60 = $<<12/60=0.2>>0.2 per minute.\nWorking 50 minutes, she earned 0.2 x 50 = $<<0.2*50=10>>10.\n#### 10\n[/INST] </s>\n    [/INST]\n'

In [8]:
formatted_dataset=[]
# Read all Q and A pairs and format them as llama chat instrunctions 
for example in range(len(dataset)):
    formatted_dataset.append({"text":llama_chat_format(dataset[example]['question'],dataset[example]['answer'])})

In [9]:
for i in range(0,2):
    print(formatted_dataset[i])

{'text': '    <s>[INST]\n    <<SYS>> You are helpful assistant <<SYS>>\n    ### Question: Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?\n    ### Answer: Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72\n[/INST] </s>\n    [/INST]\n'}
{'text': '    <s>[INST]\n    <<SYS>> You are helpful assistant <<SYS>>\n    ### Question: Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?\n    ### Answer: Weng earns 12/60 = $<<12/60=0.2>>0.2 per minute.\nWorking 50 minutes, she earned 0.2 x 50 = $<<0.2*50=10>>10.\n#### 10\n[/INST] </s>\n    [/INST]\n'}


In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer,TrainingArguments,BitsAndBytesConfig
from peft import LoraConfig, PeftModel
from trl import SFTTrainer



In [3]:
quant_config  = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

peft_config= LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    task_type="CAUSAL_LM"
)

In [None]:
# Donwload llama artifacts
# mkdir llama2-7b-chat-hf
# gs://vertex-model-garden-public-us-central1/llama2/llama2-7b-chat-hf llama2-7b-chat-hf/

In [4]:
model_name = '/home/jupyter/llama2-7b-chat-hf/llama2-7b-chat-hf'
model = AutoModelForCausalLM.from_pretrained(model_name,quantization_config=quant_config,device_map={"": 0} )


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [5]:
model.config.use_cache = False
model.config.pretraining_tp = 1

In [6]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [7]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    logging_steps=25,
    report_to="tensorboard",
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    optim="paged_adamw_32bit",
    save_steps=25,
    lr_scheduler_type="constant"
)

In [16]:
from datasets import load_dataset,Dataset
import pandas as pd
df=pd.DataFrame.from_dict(formatted_dataset)
dataset = Dataset.from_pandas(df)

In [17]:
trainer = SFTTrainer(
    model,
    train_dataset=dataset,
    dataset_text_field="text",
    tokenizer=tokenizer,
    peft_config=peft_config,
    args=training_args,
    max_seq_length=1024
)

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

In [None]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
25,1.3968
50,0.936
75,0.7694
100,0.7678
125,0.742
150,0.8455
175,0.7312
200,0.759
225,0.7576
250,0.7231


In [None]:
tuned="llama2-chat-tuned"
trainer.model.save_pretrained(tuned)

In [9]:
from transformers import pipeline
query = "Question: Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Answer:"
generate_text = pipeline(task="text-generation", model="llama2-chat-tuned", tokenizer=tokenizer, max_length=4096)
response = generate_text(f"<s>[INST] {query} [/INST]")
print(response[0]['generated_text'])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

<s>[INST] Question: Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Answer: [/INST]  To find out how many clips Natalia sold altogether in April and May, we need to use the information given in the problem.
In April, Natalia sold clips to 48 of her friends. So, she sold a total of 48 clips in April.
In May, Natalia sold half as many clips as she sold in April. Since she sold 48 clips in April, she sold half of that amount in May, which is 24 clips.
Therefore, Natalia sold a total of 48 + 24 = 72 clips in April and May together.


In [1]:
from tensorboard import notebook
log_dir = "results/runs"
notebook.start("--logdir {} --port 8081".format(log_dir))
