## 1. Downloading the pre-trained model

In [None]:
from huggingface_hub import login
import dotenv
import os 

token = os.getenv("HUGGINGFACE_TOKEN")

login(token=token)

In [7]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    "utter-project/EuroLLM-1.7B-Instruct",
    device_map="auto"
)

tokenizer = AutoTokenizer.from_pretrained("utter-project/EuroLLM-1.7B-Instruct")
tokenizer.pad_token = tokenizer.eos_token

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


## 2. Downloading and formating the dataset

In [17]:
from datasets import load_dataset

dataset = load_dataset("ai-forever/POLLUX", split="test[:20000]")

def format(example):
  text = f"<s>[INST] {example['instruction']}[/INST] {example['answer']}</s>"

  tokens = tokenizer(
    text,
    max_length=512,
    truncation=True,
    padding="max_length",
    return_tensors="pt"
  )

  labels = tokens['input_ids'].clone()

  inst_token_id = tokenizer.convert_tokens_to_ids("[/INST]")
  if inst_token_id in tokens["input_ids"][0]:
    end_idx = (tokens["input_ids"][0] == inst_token_id).nonzero(as_tuple=True)[0][-1].item()
    labels[0, :end_idx+1] = -100

  tokens['labels'] = labels

  return {k: v[0] for k, v in tokens.items()}

dataset = dataset.map(format, remove_columns=dataset.column_names)

Map: 100%|██████████| 20000/20000 [00:23<00:00, 846.91 examples/s]


## 3. Preparating for the finetuning

In [18]:
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=['q_proj', 'v_proj'],
    lora_dropout=0.05,
    bias='none',
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 11,010,048 || all params: 1,667,860,480 || trainable%: 0.6601


## 4. Train the model

In [22]:
from transformers import TrainingArguments
import torch

torch.backends.cuda.enable_flash_sdp(False)
torch.backends.cuda.enable_mem_efficient_sdp(False)
torch.backends.cuda.enable_math_sdp(True)

training_args = TrainingArguments(
    output_dir="./EURO_LLM_LORA",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    logging_steps=10,
    save_steps=100,
    num_train_epochs=3,
    fp16=True,
    save_total_limit=2,
    report_to="none",
)

from transformers import Trainer, DataCollatorForLanguageModeling

trainer = Trainer(
    model=model,
    train_dataset=dataset,
    args=training_args,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

trainer.train()

Step,Training Loss
10,2.0247
20,2.0801
30,2.0113
40,1.9356
50,1.8629
60,1.8557
70,1.9324
80,1.8463
90,1.7953
100,1.8068


TrainOutput(global_step=3750, training_loss=1.7333391621907552, metrics={'train_runtime': 5767.8067, 'train_samples_per_second': 10.403, 'train_steps_per_second': 0.65, 'total_flos': 2.591016615936e+17, 'train_loss': 1.7333391621907552, 'epoch': 3.0})

___

## 5. Test the model and print graphs