In [1]:
# instalar librerias

!pip install torch==2.2.2
!pip install -q git+https://github.com/huggingface/transformers
!pip install -q bitsandbytes accelerate xformers einops
!pip install -q hf_transfer
!pip install trl
!pip install peft

Collecting torch==2.2.2
  Downloading torch-2.2.2-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch==2.2.2)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch==2.2.2)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch==2.2.2)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch==2.2.2)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch==2.2.2)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch==2.2.2)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-m

In [3]:
# cargar modelo
model_name = 'meta-llama/Meta-Llama-3-8B'
access_token = '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'

# load LoRA configuration
from peft import LoraConfig
lora_config = LoraConfig(
    lora_alpha = 16,
    r = 32,
    lora_dropout = 0.1,
    bias = 'none',
)

# training arguments
from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
training_arguments = TrainingArguments(
    output_dir = './results',
    num_train_epochs = 1,
    per_device_train_batch_size = 4,
    gradient_accumulation_steps = 1,
    optim = 'paged_adamw_32bit',
    save_steps = 25,
    logging_steps = 25,
    learning_rate = 2e-4,
    weight_decay = 0.001,
    fp16 = False,
    bf16 = False,
    max_grad_norm = 0.3,
    warmup_ratio = 0.03,
    group_by_length = True,
    lr_scheduler_type = 'constant',
    report_to = 'tensorboard'
)

# bits and bytes configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type = 'nf4',
    bnb_4bit_compute_dtype = 'float16',
    bnb_4bit_use_double_quant = False,
)

# tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    token = access_token,
    trust_remote_code = True
    )

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_size = 'right'

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/73.0 [00:00<?, ?B/s]

In [4]:
# load data and metrics
import pandas as pd
from sklearn.model_selection import train_test_split
from trl import SFTTrainer
import torch
import transformers
import numpy as np
from sklearn.model_selection import KFold
import datasets
from datasets import Dataset

data = pd.read_csv('training_test_data_textFormat_es.csv')

# cross validation

n_splits = 10
kf = KFold(n_splits=n_splits, shuffle=False) # create folds

real_phrases = []
generated_phrases = []
lemmas = []

for train_index, test_index in kf.split(data):
    training_data, test_data = data['text'][train_index], data['text'][test_index]

    training_data = Dataset.from_pandas(training_data.to_frame().reset_index())
    test_data = Dataset.from_pandas(test_data.to_frame().reset_index())

    # model
    model = AutoModelForCausalLM.from_pretrained(model_name,
                                                device_map='auto',
                                                quantization_config=bnb_config,
                                                token = access_token)

    model.config.use_cache = False
    model.config.pretraining_tp = 1

    trainer = SFTTrainer(
      model = model,
      train_dataset = training_data,
      peft_config = lora_config,
      dataset_text_field = 'text',
      max_seq_length = 30,
      tokenizer = tokenizer,
      args = training_arguments,
      packing = True
    )

    trainer.train()

    for p in test_data:
      text = p['text'].split('Phrase: ')
      prompt = text[0]
      ph = text[1]
      real_phrases.append(ph)
      lemmas.append(prompt.split('Lemmas: ')[1].strip('\n'))

      inputs = tokenizer(prompt, return_tensors = 'pt')
      generate_ids = model.generate(inputs.input_ids, max_length = 50)
      output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0]

      try:
        generated_phrases.append(output.split('Phrase: ')[1])
      except:
        generated_phrases.append('')

config.json:   0%|          | 0.00/654 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/177 [00:00<?, ?B/s]


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Generating train split: 0 examples [00:00, ? examples/s]

Step,Training Loss
25,2.8984
50,1.9007



Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3-8B/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3-8B is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in meta-llama/Meta-Llama-3-8B.

Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3-8B/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3-8B is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in meta-llama/Meta-Llama-3-8B.

Cannot access gated repo for url https://huggingface.co/meta-llama/Meta-Llama-3-8B/resolve/main/config.json.
Access to model meta-llama/Meta-Llama-3-8B is restricted. You must be authenticated to access it. - silently ignoring the lookup for the file config.json in meta-llama/Meta-Llama-3-8B.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please

ValueError: Some modules are dispatched on the CPU or the disk. Make sure you have enough GPU RAM to fit the quantized model. If you want to dispatch the model on the CPU or the disk while keeping these modules in 32-bit, you need to set `load_in_8bit_fp32_cpu_offload=True` and pass a custom `device_map` to `from_pretrained`. Check https://huggingface.co/docs/transformers/main/en/main_classes/quantization#offload-between-cpu-and-gpu for more details. 

In [None]:
from bs4 import BeautifulSoup

clean = []

for i in generated_phrases:
  clean_text = BeautifulSoup(i, "lxml").text.rstrip()
  clean.append(clean_text)

results = pd.DataFrame()
results['lemmas'] = lemmas
results['real phrases'] = real_phrases
results['generated phrases'] = clean

results.to_csv(f'llama3_results_alpha_{lora_alpha}_r_{lora_r}_n{n_splits}.csv')

In [None]:
for i in generated_phrases:
  print(i)