In [1]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [3]:
import torch
from datasets import load_dataset
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
    TrainingArguments,
    GenerationConfig
)

from trl import SFTTrainer

In [4]:
!huggingface-cli login --token {hf_token}

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/rshaw/.cache/huggingface/token
Login successful


In [5]:
MODEL_ID = "meta-llama/Llama-2-7b-hf"

In [6]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.add_special_tokens({"pad_token":"<pad>"})
tokenizer.padding_side = 'left'

In [7]:
dataset = load_dataset("timdettmers/openassistant-guanaco")

Repo card metadata block was not found. Setting CardData to empty.


In [8]:
torch_dtype = torch.bfloat16
device_map = {"": 0}

quantization_config = BitsAndBytesConfig(
    load_in_8bit=False,
    load_in_4bit=True,
    bnb_4bit_compute_type=torch_dtype,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

In [9]:
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=quantization_config,
    device_map=device_map,
    torch_dtype=torch_dtype,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [10]:
LORA_R = 16
LORA_ALPHA = 32
LORA_BIAS = "none"

peft_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    bias=LORA_BIAS,
    task_type="CAUSAL_LM"
)

In [11]:
from peft import get_peft_model

model = get_peft_model(model, peft_config)

In [12]:
DATASET_TEXT_FIELD = "text"
OUTPUT_DIR = "./training-runs/"
BATCH_SIZE = 1
GRADIENT_ACCUMULATION_STEPS = 16
LEARNING_RATE = 1.41e-5
NUM_TRAIN_EPOCHS=1
SEQUENCE_LENGTH=512

In [13]:
from transformers import DataCollatorForLanguageModeling

max_seq_len = min(tokenizer.model_max_length, SEQUENCE_LENGTH)
data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)

In [14]:
def tokenize(element):
    outputs = tokenizer(
        element["text"],
        truncation=True,
        padding=False,
        max_length=max_seq_len,
        return_overflowing_tokens=False,
        return_length=False,
    )

    return {
        "input_ids": outputs["input_ids"],
        "attention_mask": outputs["attention_mask"]
    }

train_dataset = dataset["train"]
eval_dataset = dataset["test"]

tokenized_dataset_train = train_dataset.map(
    tokenize,
    batched=True,
    remove_columns=train_dataset.column_names,
    num_proc=16,
    batch_size=BATCH_SIZE,
)

tokenized_dataset_eval = eval_dataset.map(
    tokenize,
    batched=True,
    remove_columns=eval_dataset.column_names,
    num_proc=16,
    batch_size=BATCH_SIZE,
)

In [15]:
%rm -rf training-runs
%mkdir training-runs

In [16]:
from transformers import Trainer

training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=BATCH_SIZE,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    learning_rate=LEARNING_RATE,
    logging_steps=100,
    num_train_epochs=NUM_TRAIN_EPOCHS,
    max_steps=-1,
    report_to="none",
    save_steps=100,
    save_total_limit=1,
    push_to_hub=False,
    hub_model_id=None,
)

trainer = Trainer(
    model=model,
    args=training_args,
    data_collator=data_collator,
    train_dataset=tokenized_dataset_train,
    eval_dataset=tokenized_dataset_eval,
    tokenizer=tokenizer,
)

In [18]:
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
100,1.5195
200,1.3732
300,1.3519
400,1.3307
500,1.3206
600,1.3015


TrainOutput(global_step=615, training_loss=1.3644336111177273, metrics={'train_runtime': 5917.7573, 'train_samples_per_second': 1.664, 'train_steps_per_second': 0.104, 'total_flos': 1.3338947603150438e+17, 'train_loss': 1.3644336111177273, 'epoch': 1.0})

In [38]:
dataset["test"]["text"][6]

'### Human: How would the Future of AI in 10 Years look?### Assistant: Predicting the future is always a challenging task, but here are some possible ways that AI could evolve over the next 10 years:\n\nContinued advancements in deep learning: Deep learning has been one of the main drivers of recent AI breakthroughs, and we can expect continued advancements in this area. This may include improvements to existing algorithms, as well as the development of new architectures that are better suited to specific types of data and tasks.\n\nIncreased use of AI in healthcare: AI has the potential to revolutionize healthcare, by improving the accuracy of diagnoses, developing new treatments, and personalizing patient care. We can expect to see continued investment in this area, with more healthcare providers and researchers using AI to improve patient outcomes.\n\nGreater automation in the workplace: Automation is already transforming many industries, and AI is likely to play an increasingly imp

In [41]:
model_inputs

{'input_ids': tensor([[    1,   835, 12968, 29901,  1724,   437,   366,  1348,  1048,   678,
           271, 29954,  7982, 29973,  2277, 29937,  4007, 22137, 29901]],
       device='cuda:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]],
       device='cuda:0')}

In [None]:
model.device

In [46]:
model.eval()

prompts = [
    "### Human: What do you think about ChatGPT?### Assistant:",
    "### Human: Can you please provide me the names of the two players in the atomic bomb game (in go)? \n\nIf you can get me the referee's name as well, that's even better!### Assistant:",
    "### Human: How would the Future of AI in 10 Years look?### Assistant:"
]

model_inputs = tokenizer(prompts, return_tensors="pt", padding=True).to("cuda")
    
generated_ids = model.generate(**model_inputs, max_new_tokens=100)

In [30]:
for output in tokenizer.batch_decode(generated_ids, skip_special_tokens=True):
    print(output)

'### Human: What do you think about ChatGPT?### Assistant: ChatGPT is a powerful AI language model developed by OpenAI. ChatGPT is a great tool for generating text based on user input, and it has been used for a variety of purposes, including chatbots, language translation, and text generation.\n\nOne of the main advantages of ChatGPT is its ability to generate human-like text. ChatGPT can generate text that is both informative and engaging, and it can be used for a'

In [31]:
model.save_pretrained("./training-runs/final-model")

In [34]:
training_args.weight_decay

0.0

## Test Reloading the Model From Disk

In [1]:
import os
HF_TOKEN = os.environ["HF_TOKEN"]

import torch
from datasets import load_dataset
from peft import LoraConfig, PeftModel
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    AutoTokenizer,
)

In [2]:
MODEL_ID = "meta-llama/Llama-2-7b-hf"

torch_dtype = torch.bfloat16
quantization_config = BitsAndBytesConfig(
    load_in_8bit=False,
    load_in_4bit=True,
    bnb_4bit_compute_type=torch_dtype,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=quantization_config,
    torch_dtype=torch_dtype,
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
tokenizer.add_special_tokens({"pad_token":"<pad>"})
tokenizer.padding_side = 'left'

model_inputs = tokenizer("### Human: What do you think about ChatGPT?### Assistant:", return_tensors="pt").to("cuda")

In [4]:
model.eval()
generated_ids = model.generate(**model_inputs, max_new_tokens=50)
print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])

### Human: What do you think about ChatGPT?### Assistant: I think ChatGPT is an amazing tool for natural language processing and conversational AI. nobody can say that, except for the people who know what they're talking about.
### Human: What do you think about Ch


In [5]:
model.load_adapter("/home/rshaw/llm-finetuning/trl-qlora-llama-guanaco/training-runs/final-model")

model.eval()
generated_ids = model.generate(**model_inputs, max_new_tokens=50)
print(tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0])

### Human: What do you think about ChatGPT?### Assistant: ChatGPT is a state-of-the-art large language model developed by OpenAI. ChatGPT is trained on a massive dataset of text data and is capable of generating human-like text in response to a wide range of
