In [1]:
import torch
import wandb
import datasets
import evaluate
import numpy as np
from trl import SFTTrainer
from peft import LoraConfig, PeftModel
import transformers
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments, pipeline

In [11]:
DATASET = "nrishabh/prompt-recovery"
DATASET_SUBSET = "minute-llama-instr"
MODEL = "LoftQ/Meta-Llama-3-8B-Instruct-4bit-64rank"
TRAIN_BATCH_SIZE = 8
EVAL_BATCH_SIZE = 8
LEARNING_RATE = 2e-5
EPOCHS = 1

In [12]:
class Arguments():

    def __init__(self):
        self.dataset = DATASET
        self.dataset_subset = DATASET_SUBSET
        self.model = MODEL
        self.train_batch_size = TRAIN_BATCH_SIZE
        self.eval_batch_size = EVAL_BATCH_SIZE
        self.learning_rate = LEARNING_RATE
        self.epochs = EPOCHS

args = Arguments()

In [7]:
wandb.init(entity="jhu-llm-prompt-recovery", project="llm-prompt-recovery", job_type="qlora", name=f'qlora-{args.dataset_subset.split("-")[0]}')

VBox(children=(Label(value='0.017 MB of 0.017 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011111814299753556, max=1.0…

In [8]:
dataset = datasets.load_dataset(args.dataset, args.dataset_subset)

Downloading readme:   0%|          | 0.00/6.76k [00:00<?, ?B/s]

Downloading data: 100%|██████████| 95.8k/95.8k [00:00<00:00, 337kB/s]
Downloading data: 100%|██████████| 14.0k/14.0k [00:00<00:00, 122kB/s]
Downloading data: 100%|██████████| 34.6k/34.6k [00:00<00:00, 321kB/s]


Generating train split:   0%|          | 0/42 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/6 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/12 [00:00<?, ? examples/s]

In [100]:
tokenizer = AutoTokenizer.from_pretrained(args.model)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_size = 'left'

loading file tokenizer.json from cache at /home/rnanawa1/.cache/huggingface/hub/models--LoftQ--Meta-Llama-3-8B-Instruct-4bit-64rank/snapshots/014e28cbd4b17dece0002db0e42ab28e044f0692/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /home/rnanawa1/.cache/huggingface/hub/models--LoftQ--Meta-Llama-3-8B-Instruct-4bit-64rank/snapshots/014e28cbd4b17dece0002db0e42ab28e044f0692/special_tokens_map.json
loading file tokenizer_config.json from cache at /home/rnanawa1/.cache/huggingface/hub/models--LoftQ--Meta-Llama-3-8B-Instruct-4bit-64rank/snapshots/014e28cbd4b17dece0002db0e42ab28e044f0692/tokenizer_config.json
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [14]:
base_model = AutoModelForCausalLM.from_pretrained(
    args.model,
    torch_dtype=torch.bfloat16,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
        bnb_4bit_use_double_quant=False,
        bnb_4bit_quant_type='nf4',
    ),
    device_map="auto",
)

config.json:   0%|          | 0.00/1.20k [00:00<?, ?B/s]

Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.


model.safetensors.index.json:   0%|          | 0.00/89.0k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.05G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/143 [00:00<?, ?B/s]

In [15]:
peft_config = LoraConfig(
        r=64,
        lora_alpha=16,
        lora_dropout=0.1,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=[
            "q_proj", 
            "k_proj", 
            "v_proj", 
            "o_proj", 
            "all-linear"
            ],
)

peft_model = PeftModel.from_pretrained(
    base_model,
    args.model,
    subfolder="loftq_init",
    is_trainable=True,
    config=peft_config
)
# Do training with peft_model ...

loftq_init/adapter_model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

In [174]:
from torchmetrics.text import rouge

def get_metrics(inputs, outputs):
    rouge_r = rouge.ROUGEScore(rouge_keys=('rouge1', 'rouge2'))
    return rouge_r(outputs, inputs)

In [158]:
def doTest(model, tokenizer, testds):
    pipeline = transformers.pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    )

    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]

    outputs = pipeline(
        testds,
        max_new_tokens=256,
        eos_token_id=terminators,
        do_sample=True,
        temperature=0.6,
        top_p=0.9,
    )

    return [outputs[i][0]['generated_text'][len(testds[i]):] for i in range(len(testds))]

In [159]:
target = dataset['test']['completion']
baseline = doTest(peft_model, tokenizer, dataset['test']['prompt'])
finetuned = doTest(trainer.model, tokenizer, dataset['test']['prompt'])

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausalLM', 'MptForCausalLM', 'MusicgenForCausalLM', 'MusicgenMelodyFo

In [160]:
target

['Translate this into a text from ancient scriptures.',
 'Translate this into a text from ancient scriptures.',
 'Translate this into a text from ancient scriptures.',
 'Describe this as a space mission briefing.',
 'Describe this as a space mission briefing.',
 'Translate this into a text from ancient scriptures.',
 'Translate this into a text from ancient scriptures.',
 'Describe this as a space mission briefing.',
 'Translate this into a text from ancient scriptures.',
 'Describe this as a space mission briefing.',
 'Translate this into a text from ancient scriptures.',
 'Describe this as a space mission briefing.']

In [161]:
baseline

['" [AI-Prompt-2023-02-15-12-45-02]\n\nThe AI prompt used to rewrite the old text into the new text is:\n"Rewrite the text in an ancient scripture style, using words and phrases commonly found in biblical texts. Make it sound formal and poetic, with a tone reminiscent of ancient scripture. Use phrases such as\'verily\', \'yea\', and \'thou\' to give it an archaic feel." [AI-Prompt-2023-02-15-12-45-02]\n\nThis prompt is designed to transform the old text into a new text that resembles ancient scripture. It uses specific phrases and words to create a formal, poetic tone that is reminiscent of biblical texts. The prompt is specific enough to guide the AI\'s transformation, but flexible enough to allow for creative interpretation. [AI-Prompt-2023-02-15-12-45-02] [AI-Prompt-2023-02-15-12-45-02] [AI-Prompt-2023-02-15-12-45-02] [AI-Prompt-2023-02-15-12-45-02] [AI-Prompt-2023-02-15-',
 ' \nThe AI prompt used to rewrite the old text into the new text is: \n"Rewrite the text in a more archaic an

In [162]:
finetuned

['" AI Prompt: "Rewrite the old text into a style similar to ancient scripture, using words and phrases commonly used in ancient texts." AI Model: I used a combination of natural language processing (NLP) and machine learning algorithms to generate the rewritten text. I analyzed the structure, tone, and language of ancient scripture and used that information to guide my rewriting of the original text. I also used a dictionary and thesaurus to find the right words and phrases to use in the rewritten text. Finally, I edited and refined the rewritten text to ensure it sounded like it was written in ancient scripture. AI Model: I used a combination of natural language processing (NLP) and machine learning algorithms to generate the rewritten text. I analyzed the structure, tone, and language of ancient scripture and used that information to guide my rewriting of the original text. I also used a dictionary and thesaurus to find the right words and phrases to use in the rewritten text. Final

In [20]:
# Initializing TrainingArguments with default values
training_args = TrainingArguments(
    output_dir = "./trained/",
    evaluation_strategy='epoch',
    do_eval=True,
    per_device_train_batch_size=args.train_batch_size,
    per_device_eval_batch_size=args.eval_batch_size,
    gradient_accumulation_steps=1,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    hub_model_id=f"nrishabh/llama3-8b-instruct-qlora-{args.dataset_subset.split('-')[0]}",
    learning_rate=args.learning_rate,
    log_level="info",
    logging_steps=1,
    logging_strategy="epoch",
    lr_scheduler_type="cosine",
    max_steps=-1,
    num_train_epochs=args.epochs,
    disable_tqdm=False,
    dataloader_pin_memory=True,
    report_to="wandb",  # for skipping wandb logging
    save_strategy="no",
    seed=42,
)


In [23]:
trainer = SFTTrainer(
        model=peft_model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["validation"],
        tokenizer=tokenizer,
    )




Map:   0%|          | 0/42 [00:00<?, ? examples/s]

Map:   0%|          | 0/6 [00:00<?, ? examples/s]



In [27]:
torch.cuda.empty_cache()

train_result = trainer.train()

trainer.push_to_hub(token="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")

***** Running training *****
  Num examples = 42
  Num Epochs = 1
  Instantaneous batch size per device = 8
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 1
  Total optimization steps = 6
  Number of trainable parameters = 54,525,952
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.


Epoch,Training Loss,Validation Loss
1,2.5186,2.035103


***** Running Evaluation *****
  Num examples = 6
  Batch size = 8


Training completed. Do not forget to share your model on huggingface.co/models =)




HfHubHTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-66419823-5e96f6ed5ae9b29d01f1bda8;d01c3dbf-019e-415f-b19f-f35958f3e5ee)

Invalid username or password.

In [28]:
trainer.push_to_hub(token="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")

Saving model checkpoint to ./trained/
tokenizer config file saved in ./trained/tokenizer_config.json
Special tokens file saved in ./trained/special_tokens_map.json


Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/218M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.05k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/nrishabh/llama3-8b-instruct-qlora-minute/commit/4de041616b129bc32f1d6dfa669e56a6f820e6be', commit_message='End of training', commit_description='', oid='4de041616b129bc32f1d6dfa669e56a6f820e6be', pr_url=None, pr_revision=None, pr_num=None)

In [29]:
wandb.finish()

VBox(children=(Label(value='0.030 MB of 0.030 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,▁
eval/runtime,▁
eval/samples_per_second,▁
eval/steps_per_second,▁
train/epoch,▁▁▁
train/global_step,▁▁▁
train/grad_norm,▁
train/learning_rate,▁
train/loss,▁

0,1
eval/loss,2.0351
eval/runtime,0.3497
eval/samples_per_second,17.156
eval/steps_per_second,2.859
total_flos,1950701114621952.0
train/epoch,1.0
train/global_step,6.0
train/grad_norm,0.29428
train/learning_rate,0.0
train/loss,2.5186


In [31]:
testds = dataset['test']

In [60]:
import transformers



loading configuration file config.json from cache at /home/rnanawa1/.cache/huggingface/hub/models--LoftQ--Meta-Llama-3-8B-Instruct-4bit-64rank/snapshots/014e28cbd4b17dece0002db0e42ab28e044f0692/config.json
Model config LlamaConfig {
  "_name_or_path": "LoftQ/Meta-Llama-3-8B-Instruct-4bit-64rank",
  "architectures": [
    "LlamaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 128000,
  "eos_token_id": 128001,
  "hidden_act": "silu",
  "hidden_size": 4096,
  "initializer_range": 0.02,
  "intermediate_size": 14336,
  "max_position_embeddings": 8192,
  "model_type": "llama",
  "num_attention_heads": 32,
  "num_hidden_layers": 32,
  "num_key_value_heads": 8,
  "pretraining_tp": 1,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "bfloat16",
    "bnb_4bit_quant_storage": "uint8",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": false,
    "llm_int8_enable_fp32_cpu_offloa

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing LlamaForCausalLM.

All the weights of LlamaForCausalLM were initialized from the model checkpoint at LoftQ/Meta-Llama-3-8B-Instruct-4bit-64rank.
If your task is similar to the task the model of the checkpoint was trained on, you can already use LlamaForCausalLM for predictions without further training.
loading configuration file generation_config.json from cache at /home/rnanawa1/.cache/huggingface/hub/models--LoftQ--Meta-Llama-3-8B-Instruct-4bit-64rank/snapshots/014e28cbd4b17dece0002db0e42ab28e044f0692/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 128000,
  "eos_token_id": [
    128001,
    128009
  ]
}

loading file tokenizer.json from cache at /home/rnanawa1/.cache/huggingface/hub/models--LoftQ--Meta-Llama-3-8B-Instruct-4bit-64rank/snapshots/014e28cbd4b17dece0002db0e42ab28e044f0692/tokenizer.json
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at /

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


NameError: name 'prompt' is not defined

In [64]:
outputs[0]["generated_text"][len(testds[0]['prompt']):]

'" AI Prompt: "Rewrite the old text in a style that resembles ancient scripture, using formal and poetic language. Use words and phrases that were commonly used in ancient texts, such as\'verily\', \'yea\', and \'thou\'.".\n\nAnswer: The AI prompt used to rewrite the old text into the new text is: "Rewrite the old text in a style that resembles ancient scripture, using formal and poetic language. Use words and phrases that were commonly used in ancient texts, such as\'verily\', \'yea\', and \'thou\'.".\n\nI hope this helps! Let me know if you have any other questions.'

In [69]:
outputs[0].keys()

dict_keys(['generated_text'])