In [None]:
import torch
torch.cuda.is_available() # NOTE: This should be True for everything to work smoothly.

In [None]:
from typing import Any, Optional
from transformers import (
    TrainerCallback,
    PreTrainedModel,
    PreTrainedTokenizer,
    BitsAndBytesConfig,
    Trainer,
    PreTrainedModel,
)
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
from peft import prepare_model_for_kbit_training, get_peft_model, PeftModel, LoraConfig, LoftQConfig
import torch
from peft.tuners.lora import LoraLayer
import os
from functools import wraps
import random
from datasets import load_dataset


In [None]:
CHAT_TEMPLATE= """{% for message in messages %}
    {% if message['role'] == 'user' %}
        {{'<user> ' + message['content'].strip() + ' </user>' }}
    {% elif message['role'] == 'system' %}
        {{'<system>\\n' + message['content'].strip() + '\\n</system>\\n\\n' }}
    {% elif message['role'] == 'assistant' %}
        {{ message['content'].strip() + ' </assistant>' + eos_token }}
    {% elif message['role'] == 'input' %}
        {{'<input> ' + message['content'] + ' </input>' }}
    {% endif %}
{% endfor %}"""

In [None]:
class QLoraWrapperModelInit:    
    """
    A wrapper class for initializing transformer-based models with QLoRa and gradient checkpointing.

    This class serves as a wrapper for the `model_init` function, which initializes the model.
    It activates gradient checkpointing when possible and applies QLoRa to the model.

    Parameters
    ----------
    model_init : callable
        A function that initializes the transformer-based model for training.
    model_config : Any
        The configuration for the model.
    tokenizer : Any
        The tokenizer used for tokenization.

    Returns
    -------
    Pre-trained model with QLoRa and gradient checkpointing, if enabled.
    """

    def __init__(self, model_init: Any, model_config: Any, tokenizer: Any) -> None:
        self.model_init = model_init
        self.model_config = model_config
        self.tokenizer = tokenizer

    def __call__(self) -> PreTrainedModel:
        """
        Initialize the model and apply QLoRa and gradient checkpointing when configured.

        Returns
        -------
        Pre-trained model with QLoRa and gradient checkpointing, if enabled.
        """
        model = self.model_init()
        has_gradient_checkpointing = False
        if not model.__class__.__name__ in [
            "MPTForCausalLM",
            "MixFormerSequentialForCausalLM",
        ]:
            try:
                model.resize_token_embeddings(len(self.tokenizer))
            except Exception as e:
                print(
                    f"Could not resize token embeddings due to {e}, but will continue anyway..."
                )
            try:
                model.gradient_checkpointing_enable()
                has_gradient_checkpointing = True
            except Exception as e:
                print(f"Model checkpointing did not work: {e}")
        if model.__class__.__name__ == "LlamaForCausalLM":
            model.config.pretraining_tp = 1
        model = prepare_model_for_kbit_training(
            model, use_gradient_checkpointing=has_gradient_checkpointing
        )
        model = get_peft_model(model, self.model_config.peft_config)
        model.config.use_cache = False
        if self.model_config.neftune_noise_alpha is not None:
            model = activate_neftune(model, self.model_config.neftune_noise_alpha)
        model = self.change_layer_types_for_stability(model)
        return model

    def change_layer_types_for_stability(
        self, model: PreTrainedModel
    ) -> PreTrainedModel:
        """
        Change layer types of the model for stability.

        Parameters
        ----------
        model : PreTrainedModel
            The pre-trained model.

        Returns
        -------
        Pre-trained model with modified layer types for stability.
        """
        for name, module in model.named_modules():
            if isinstance(module, LoraLayer):
                module = module.to(torch.float32)
            if "norm" in name:
                module = module.to(torch.float32)
            if "lm_head" in name or "embed_tokens" in name:
                if hasattr(module, "weight"):
                    module = module.to(torch.float32)
        return model

In [None]:
from autotransformers import AutoTrainer, DatasetConfig, ModelConfig
from autotransformers.llm_templates import instructions_to_chat, NEFTuneTrainer, modify_tokenizer, SavePeftModelCallback
from functools import partial
from peft import LoraConfig, LoftQConfig
from datasets import load_dataset

In [None]:
ds = load_dataset("somosnlp/Conversaciones_terapeuticas_espanol", split= "train")
ds = ds.rename_column("chat", "messages")
ds = ds.train_test_split(test_size=0.1)
ds = ds["train"].train_test_split(0.2, seed=203984)

In [None]:
fixed_train_args = {
    "per_device_train_batch_size": 1,
    "per_device_eval_batch_size": 1,
    "gradient_accumulation_steps": 16,
    "warmup_ratio": 0.03,
    "learning_rate": 2e-4,
    "fp16": True,  # Cambiado de True a False
    "logging_steps": 50,
    "lr_scheduler_type": "constant",
    "weight_decay": 0.001,
    "eval_steps": 200,
    "save_steps": 50,
    "num_train_epochs": 1,
    "logging_first_step": True,
    "evaluation_strategy": "steps",
    "save_strategy": "steps",
    "max_grad_norm": 0.3,
    "optim": "paged_adamw_32bit",
    "gradient_checkpointing": True,
    "group_by_length": False,
    "save_total_limit": 50,
    "adam_beta2": 0.999
}


In [None]:
model_config = {
        "seed": 9834,
        "direction_optimize": "minimize",
        "metric_optimize": "eval_loss",
        "callbacks": [SavePeftModelCallback],
        "fixed_training_args": fixed_train_args,
        "dataset_name": "Conversaciones_terapeuticas_espanol",
        "alias": "terapia",
        "retrain_at_end": False,
        "task": "chatbot",
        "text_field": "messages",
        "label_col": "messages",
        "num_proc": 4,
        "loaded_dataset": ds,
        "partial_split": True, # to create a validation split.
}

In [None]:
model_config = DatasetConfig(**model_config)

In [None]:
lora_config = LoraConfig(
        r=256,
        lora_alpha=32,
        target_modules="all-linear",  # "query_key_value" # "Wqkv"
        lora_dropout=0.1,  # 0.1 for <13B models, 0.05 otherwise.
        bias="none",
        task_type="CAUSAL_LM",
        use_rslora=True,
        loftq_config=LoftQConfig(loftq_bits=4)
)

In [None]:
from transformers import BitsAndBytesConfig
funciona_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

In [None]:
gemma_config = ModelConfig(
    name="google/gemma-2b-it",
    save_name="gemma_2b",
    save_dir="./gemma_2b_terapia_1",
    custom_params_model={"trust_remote_code": True, "device_map": {"": 0}},
    model_init_wrap_cls=QLoraWrapperModelInit,
    quantization_config=funciona_config,
    peft_config=lora_config,
    # neftune_noise_alpha=10,
    # custom_trainer_cls=NEFTuneTrainer,
    func_modify_tokenizer=partial(
        modify_tokenizer,
        new_model_seq_length=4096, # lower the maximum seq length to 4096 instead of 8192 to fit in google colab GPUs.
        add_special_tokens={"pad_token": "[PAD]"}, # add pad token.
        chat_template=CHAT_TEMPLATE # add the new chat template including the system and input roles.
    )
)

In [None]:
autotrainer = AutoTrainer(
    model_configs=[gemma_config],
    dataset_configs=[model_config],
    metrics_dir="./chaterapia",
    hp_search_mode="fixed",
    clean=True,
    metrics_cleaner="tmp_metrics_cleaner",
    use_auth_token=True
)

In [None]:
result = autotrainer()