In [None]:
!pip install transformers 
!pip install accelerate
!pip install bitsandbytes
!pip install peft
!pip install trl

In [None]:
from datasets import load_dataset
dataset = load_dataset('gsm8k', 'main')

In [None]:
dataset = load_dataset('gsm8k', 'main', split="train")

In [None]:
dataset

In [None]:
import textwrap
import pandas as pd

In [None]:
def llama_chat_format(question : str,answer:str) -> str:
    llama_template = textwrap.dedent(f"""\
    <s>[INST]
    <<SYS>> You are helpful assistant. <<SYS>>\n
    Question: {question}
    Answer: {answer}
    [/INST] </s>
    """)
    return llama_template

In [None]:
llama_chat_format(dataset[1]['question'],dataset[1]['answer'])

In [None]:
formatted_dataset=[]
# Read all Q and A pairs and format them as llama chat instrunctions 
for example in range(len(dataset)):
    formatted_dataset.append({"text":llama_chat_format(dataset[example]['question'],dataset[example]['answer'])})

In [None]:
for i in range(0,2):
    print(formatted_dataset[i])

In [None]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer,TrainingArguments,BitsAndBytesConfig
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

In [None]:
quant_config  = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False
)

peft_config= LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=8,
    task_type="CAUSAL_LM"
)

In [None]:
# Donwload llama artifacts
# mkdir llama2-7b-chat-hf
# gs://vertex-model-garden-public-us-central1/llama2/llama2-7b-chat-hf llama2-7b-chat-hf/

In [None]:
model_name = '/home/jupyter/llama2-7b-chat-hf/llama2-7b-chat-hf'
model = AutoModelForCausalLM.from_pretrained(model_name,quantization_config=quant_config,device_map={"": 0} )


In [None]:
model.config.use_cache = False
model.config.pretraining_tp = 1

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    num_train_epochs=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=1,
    logging_steps=25,
    report_to="tensorboard",
    learning_rate=2e-4,
    weight_decay=0.001,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    optim="paged_adamw_32bit",
    save_steps=25,
    lr_scheduler_type="constant"
)

In [None]:
from datasets import load_dataset,Dataset
import pandas as pd
df=pd.DataFrame.from_dict(formatted_dataset)
dataset = Dataset.from_pandas(df)

In [None]:
trainer = SFTTrainer(
    model,
    train_dataset=dataset,
    dataset_text_field="text",
    tokenizer=tokenizer,
    peft_config=peft_config,
    args=training_args,
    max_seq_length=1024
)

In [None]:
trainer.train()

In [None]:
tuned="llama2-chat-tuned"
trainer.model.save_pretrained(tuned)

In [None]:
from transformers import pipeline
query = "Question: Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May? Answer:"
generate_text = pipeline(task="text-generation", model="llama2-chat-tuned", tokenizer=tokenizer, max_length=4096)
response = generate_text(f"<s>[INST] {query} [/INST]")
print(response[0]['generated_text'])

In [None]:
from tensorboard import notebook
log_dir = "results/runs"
notebook.start("--logdir {} --port 8081".format(log_dir))
