In [None]:
import torch
import peft
import transformers
from datasets import load_dataset
from huggingface_hub import login

# Log in to huggingface
login()

In [None]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    trainable_p = 100 * trainable_params / all_param
    print(f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {trainable_p}")

In [None]:
# Load base model and tokenizer

base_model_id = "bert-base-uncased"

bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

lora_config = peft.LoraConfig(
    r=16, # dimension of the updated matrices
    lora_alpha=32, # parameter for scaling
    target_modules=["q_proj"], # , "up_proj", "o_proj", "k_proj", "down_proj", "gate_proj", "v_proj"
    lora_dropout=0.05, # dropout probability for layers
    bias="none",
    task_type="CAUSAL_LM",
)

model = transformers.AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="auto")
tokenizer = transformers.AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)

model.gradient_checkpointing_enable()
model = peft.prepare_model_for_kbit_training(model)
tokenizer.pad_token = tokenizer.eos_token

model = peft.get_peft_model(model, lora_config)
print("------------------------------------------------------------------------------------------------------------------")
print_trainable_parameters(model)
print("------------------------------------------------------------------------------------------------------------------")
print("model:\n" + str(model))

In [None]:
# Load the dataset [TODO: CHECK AT DATASÆTTET ER LOADED RIGTIGT]

data = load_dataset("AdiOO7/llama-2-finance")

data = data.map(lambda samples: tokenizer(samples["text"]), batched=True)

print("Row count: " + str(len(data["train"])))
print("Sample row: " + data["train"][0]['text'])
print("Train dataset:\n" + data['train'])

Row count: 4846
Sample row: ### Instruction: What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive} ### Human: Technopolis plans to develop in stages an area of no less than 100,000 square meters in order to host companies working in computer technologies and telecommunications , the statement said . ### Assistant: neutral.


In [None]:
# Train the model

trainer = transformers.Trainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=4,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=1000,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

You're using a LlamaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
1,0.0
2,0.9063
3,3.2727
4,2.44
5,0.0
6,0.0
7,0.0
8,0.0
9,3.5127
10,3.0439


TrainOutput(global_step=1000, training_loss=1.9945557520389556, metrics={'train_runtime': 3141.9474, 'train_samples_per_second': 5.092, 'train_steps_per_second': 0.318, 'total_flos': 2.970115920396288e+16, 'train_loss': 1.9945557520389556, 'epoch': 3.3})

In [None]:
model.push_to_hub("llama2-qlora-finetuned-sentiment-1000s-4086r")



adapter_model.bin:   0%|          | 0.00/160M [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.bin:   0%|          | 0.00/160M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/magnus42/llama2-qlora-finetuned-sentiment-1000s-4086r/commit/69148d96011ce013004f29bdeaf788081b28944d', commit_message='Upload model', commit_description='', oid='69148d96011ce013004f29bdeaf788081b28944d', pr_url=None, pr_revision=None, pr_num=None)

In [None]:
# Load finetuned model

peft_model_id = "magnus42/llama2-qlora-finetuned-sentiment-1000s-4086r"

config = peft.PeftConfig.from_pretrained(peft_model_id)
base_model = transformers.AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
model = peft.PeftModel.from_pretrained(base_model, peft_model_id)
tokenizer = transformers.AutoTokenizer.from_pretrained(config.base_model_name_or_path, trust_remote_code=True)
model.config.use_cache = True

In [None]:
# Generate text

batch = tokenizer("### Instruction: What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive} ### Human: Technopolis plans to develop in stages an area of no less than 100,000 square meters in order to host companies working in computer technologies and telecommunications , the statement said . ### Assistant:", return_tensors='pt', return_token_type_ids=False)

with torch.cuda.amp.autocast():
  output_tokens = model.generate(**batch, max_new_tokens=30)

print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))



 ### Instruction: What is the sentiment of this tweet? Please choose an answer from {negative/neutral/positive} ### Human: Technopolis plans to develop in stages an area of no less than 100,000 square meters in order to host companies working in computer technologies and telecommunications , the statement said . ### Assistant: Technopolis plans to develop in stages an area of no less than 100,000 square meters in order to host companies working
