In [None]:
!nvidia-smi

**En este notebook y tutorial, realizaremos un fine-tune [Llama-8k](https://huggingface.co/microsoft/Phi-3-small-8k-instruct) modelo relativamente pequeño de 7 mil millones de parametros - que ha 'demostrado un rendimiento casi de última generación entre los modelos con menos de 13 mil millones de parámetros' - *en tus propios datos!!***

**Aqui usaremos [QLoRA (Efficient Finetuning of Quantized LLMs)](https://arxiv.org/abs/2305.14314), una técnica de fine-tunning altamente eficiente que consiste en cuantizar un LLM preentrenado a solo 4 bits y agregar pequeños 'Adaptadores de Bajo Rango'. Este enfoque único permite realizar el fine-tunning de LLMs utilizando solo una GPU. Esta técnica está respaldada por el/la... [PEFT library](https://huggingface.co/docs/peft/index).**

# Tabla de Contenido

- [1- Instalar librerias requeridas](#1)
- [ 2 - Cargar dataset](#2)
- [ 3 - Crear configuración de bitsandbytes](#3)
- [ 4 - Cargar Modelo Base](#4)
- [ 5 - Tokenizar](#5)
- [ 6 - Testear el modelo con Zero Shot Inferencing](#6)
- [ 7 - Pre-procesando el dataset](#7)
- [ 8 - Configurar el modelo PEFT/LoRA para realizar Fine-Tuning](#8)
- [ 9 - Entrenar Adaptador PEFT](#9)
- [ 10 - Evaluar el Modelo Qualitativamente (Evaluacion Humana)](#10)
- [ 11 - Evaluar el Modelo Quantitaviamente (con Metrica ROUGE)](#11)

<a name='1'></a>
#### 1. Instalar librerias requeridas

In [1]:
%%time
!pip install -U transformers
!pip install -U bitsandbytes
!pip install -U peft
!pip install -U accelerate
!pip install -U datasets
!pip install -U scipy
!pip install -U einops
!pip install -U evaluate
!pip install -U trl
!pip install -U rouge_score
!pip install -U torch

Collecting transformers
  Downloading transformers-4.45.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.4/44.4 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers<0.21,>=0.20 (from transformers)
  Downloading tokenizers-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading transformers-4.45.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m78.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hDownloading tokenizers-0.20.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m73.8 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: tokenizers, transformers
  Attempting uninstall: tokenizers
    Found existing installation: tokenizers 0.19.1
    Uninstalling tokenizers-0.19.1:
      Successful

In [2]:
import torch
print(torch.__version__)

2.4.1+cu121


In [3]:
%%time
import os
import shutil
import zipfile
import gc
import torch
import time
import pandas as pd
import numpy as np
import transformers
import multiprocessing
import psutil
import requests
import tarfile
import json
import evaluate
import datetime, os

from trl import SFTTrainer
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    AutoTokenizer,
    TrainingArguments,
    Trainer,
    GenerationConfig,
    EarlyStoppingCallback
)
from transformers.integrations import TensorBoardCallback
from tqdm import tqdm
from huggingface_hub import interpreter_login
from pynvml import *
from functools import partial
from transformers import set_seed
from datasets import load_dataset, DatasetDict, load_from_disk
from peft import LoraConfig, PeftModel, get_peft_model, prepare_model_for_kbit_training
from IPython.display import FileLink
from urllib.request import urlopen
from io import BytesIO
from subprocess import Popen
from os import chmod
from os.path import isfile

#interpreter_login()

  warn(
2024-09-30 19:34:03.582467: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-30 19:34:03.582560: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-30 19:34:03.715598: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


CPU times: user 9.36 s, sys: 1.22 s, total: 10.6 s
Wall time: 16.5 s


In [4]:
# Habilitar los permisos necesarios para acceder a google-drive
import os

PROJECT_NAME = 'drugs-generative'

try:
    from google.colab import drive
    ROOT = '/content/drive/'
    drive.mount(ROOT, force_remount=True)
    IN_COLAB = True
    BASE_FOLDER = ROOT + 'MyDrive/' + PROJECT_NAME
    DATASET_FOLDER = BASE_FOLDER
except:
    #ROOT = '/kaggle/input/drugs-data'
    ROOT = '/kaggle'
    IN_COLAB = False
    BASE_FOLDER = os.path.join("/kaggle/working", PROJECT_NAME)
    DATASET_FOLDER = os.path.join("/kaggle/input", PROJECT_NAME)
    TOKENIZER_FOLDER = os.path.join("/kaggle/input", 'drugs-tokenizer')
    
    

In [36]:

USE_ALL_DATASET = False
NUMBER_ELEMENT = 10000
SAVE_TOKENIZER = True
LOAD_TOKENIZER = False
PROCESS_SAMPLE = False
NGROK_TOKEN = '2mfZzvcUfXHZqEB2Cc3REgZQ3eG_8a2WJJCc9vp9UpVV3AFVT'
HUGGING_TOKEN = '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'

#model_name='meta-llama/Meta-Llama-3-8B'
model_name = 'meta-llama/Llama-2-7b-hf'
name_zip_tokenizer = 'tokenizer.zip'
log_name_directory = 'logs'
ngrok_url = 'https://bin.equinox.io/c/bNyj1mQVY4c/ngrok-v3-stable-linux-amd64.tgz'
train_dataset = None
eval_dataset = None
seed = 42

DATASET_PATH = os.path.join(DATASET_FOLDER, "drugs_data.parquet")
TOKENIZER_PATH = os.path.join(TOKENIZER_FOLDER, "tokenizer.zip")
LOG_TRAIN_PATH = os.path.join(BASE_FOLDER, log_name_directory)

# Valida directorio principal del proyecto
if not(os.path.exists(BASE_FOLDER)):
    !mkdir -p {BASE_FOLDER}
    print('Directorio proyecto creado exitosamente!!')

    
# Valida directorio en donde se almacenan los logs del entrenamiento
if not(os.path.exists(LOG_TRAIN_PATH)):
    !mkdir -p {LOG_TRAIN_PATH}
    print('Directorio para almacenar logs creado exitosamente!!')    
    
# Valida descarga dataset del Proyecto
if not (os.path.exists(DATASET_PATH)):
    print('Dataset no existe!!')


################################################################################
# Model parameters
padding_side = "right"

################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 16

# Dropout probability for LoRA layers
lora_dropout = 0.1

################################################################################
# bitsandbytes parameters
################################################################################

# Activate 4-bit precision base model loading
use_4bit = True

# Compute dtype for 4-bit base models
bnb_4bit_compute_dtype = "float16"

# Quantization type (fp4 or nf4)
bnb_4bit_quant_type = "nf4"

# Activate nested quantization for 4-bit base models (double quantization)
use_nested_quant = False

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = LOG_TRAIN_PATH

# Number of training epochs
num_train_epochs = 10

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = False
bf16 = False

# Batch size per GPU for training
per_device_train_batch_size = 1

# Batch size per GPU for evaluation
per_device_eval_batch_size = 1

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Enable gradient checkpointing
gradient_checkpointing = True

# Maximum gradient normal (gradient clipping)
max_grad_norm = 0.3

# Initial learning rate (AdamW optimizer)
learning_rate = 2e-4

# Weight decay to apply to all layers except bias/LayerNorm weights
weight_decay = 0.001

# Optimizer to use
optim = "paged_adamw_32bit"

# Learning rate schedule (constant a bit better than cosine)
lr_scheduler_type = "constant"

# Number of training steps (overrides num_train_epochs)
max_steps = -1

# Ratio of steps for a linear warmup (from 0 to learning rate)
warmup_ratio = 0.03

# Group sequences into batches with same length
# Saves memory and speeds up training considerably
group_by_length = True

# Save checkpoint every X updates steps
save_steps = 100

# Log every X updates steps
logging_steps = 25


eval_strategy = 'steps'


eval_steps = 25


################################################################################
# SFT parameters
################################################################################

# Maximum sequence length to use
max_seq_length = None

# Pack multiple short examples in the same input sequence to increase efficiency
packing = False

# Load the entire model on the GPU 0
# device_map = {"": 0}
device_map = "auto"
    
################################################################################
compute_dtype = getattr(torch, bnb_4bit_compute_dtype)

# Check GPU compatibility with bfloat16
if compute_dtype == torch.float16 and use_4bit:
    major, _ = torch.cuda.get_device_capability()
    if major >= 8:
        print("=" * 80)
        print("Your GPU supports bfloat16: accelerate training with bf16=True")
        bf16 = True
        print("=" * 80)



<a name='2'></a>
#### 2. Definición de Funciones 

In [7]:
# Funcion para imprimir la utilización de la memoria de la GPU
def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")


# Función para reemplazar NaN con cadena vacía
def replace_nan_with_empty_string(example):
    for key, value in example.items():
        if value is None or pd.isna(value) or (value == 'nan'):
            example[key] = ''
    return example


def create_prompt_formats_llama3(sample):
    '''
    
    '''
    #===========================================================================================
    try:
        # Construir las partes iniciales
        instruct_key = '### Instruct: Generate a detailed description of the medication for healthcare professionals and patients. Maintain a professional and concise tone throughout all responses. Do not fabricate information, and if a specific field regarding the safety in sensitive groups (pregnant women, children, elderly) is not present, simply state "No specific information available."'
        context_key = '### Context: You are a pharmaceutical chemist specialized in the in-depth understanding of drug descriptions. Your task is to generate a professional and accurate response based on the information provided. If a specific field lacks information, state "No specific information available" instead of providing unconfirmed details.'
        input_key = f"### Input: Provide a detailed description of the medication {sample.get('generic_name', '')} using the available data."
        end_key = "### End"

        # Lista de campos a procesar
        fields = [
            ("brand_name", "Brand Name", "What is the brand name of the medication?"),
            ("generic_name", "Generic Name", "What is the generic name of the medication?"),
            ("substance_name", "Active Ingredient", "What is the active ingredient of the medication?"),
            ("manufacturer_name", "Manufacturer Name", "Who is the manufacturer of the medication?"),
            ("product_type", "Product Type", None),
            ("route", "Route of Administration", None),
            ("dosage_and_administration", "Dosage and Administration", "What is the recommended dosage for this medication?"),
            ("indications_and_usage", "Indications and Usage", "What is this medication used for?"),
            ("contraindications", "Contraindications", "What are the contraindications of the medication?"),
            ("warnings", "Warnings", "What warnings are associated with this medication?"),
            ("precautions", "Precautions", None),
            ("adverse_reactions", "Adverse Reactions", "What adverse reactions are associated with this medication?"),
            ("controlled_substance", "Controlled Substance", None),
            ("active_ingredient", "Chemical Substance", None),
            ("last_update", "Last Update", None)
        ]

        drugs = []
        questions = []

        # Procesar los campos
        for field, label_name, question_text in fields:
            field_value = sample.get(field)
            if field_value:
                drugs.append(f'<{field}> {label_name}: {field_value} </{field}>')
                if question_text:
                    questions.append(f'<question> {question_text}</question><answer> {field_value}</answer>')

        # Construir las partes finales
        output_key = f"### Output: {sample.get('description', '')}"
        if drugs:
            output_key += "\n" + "\n".join(drugs)

        question_key = '### Questions: ' + ("\n".join(questions) if questions else "")

        # Construir el texto final
        parts = [instruct_key, context_key, input_key, output_key, question_key, end_key]
        sample["text"] = "\n\n".join(parts)

    except Exception as ex:
        raise Exception(f'Ocurrió un error inesperado al cargar el prompt [line: {ex.__traceback__.tb_lineno}] - {ex}')
        
    return sample

def create_prompt_formats_llama2(sample):
    '''

    '''
    #===========================================================================================
    try:
        # Lista de campos a procesar: campo en el dataset, nombre a mostrar, pregunta asociada
        fields = [
            ("brand_name", "Brand Name", "What is the brand name of the medication?"),
            ("generic_name", "Generic Name", "What is the generic name of the medication?"),
            ("substance_name", "Active Ingredient", "What is the active ingredient of the medication?"),
            ("manufacturer_name", "Manufacturer Name", "Who is the manufacturer of the medication?"),
            ("product_type", "Product Type", None),
            ("route", "Route of Administration", None),
            ("dosage_and_administration", "Dosage and Administration", "What is the recommended dosage for this medication?"),
            ("indications_and_usage", "Indications and Usage", "What is this medication used for?"),
            ("contraindications", "Contraindications", "What are the contraindications of the medication?"),
            ("warnings", "Warnings", "What warnings are associated with this medication?"),
            ("precautions", "Precautions", None),
            ("adverse_reactions", "Adverse Reactions", "What adverse reactions are associated with this medication?"),
            ("controlled_substance", "Controlled Substance", None),
            ("active_ingredient", "Chemical Substance", None),
            ("last_update", "Last Update", None)
        ]

        drugs = []
        questions = []

        # Procesar los campos y construir las secciones de descripción y preguntas/respuestas
        for field, label_name, question_text in fields:
            field_value = sample.get(field)
            if field_value:
                # Añadir al bloque de descripción del medicamento en formato simple
                drugs.append(f'{label_name}: {field_value}')
                # Si hay una pregunta asociada al campo, añadirla también
                if question_text:
                    questions.append(f'Question: {question_text}\nAnswer: {field_value}')

        # Mensaje del sistema con el prompt mejorado
        system_message = f"""You are a helpful Medical Assistant. Your task is to generate descriptions of medications or respond to questions related to them, depending on the user's request.

        If the user requests a **medication description**, follow this structure:
        - Brand Name: [Brand Name]
        - Generic Name: [Generic Name]
        - Active Ingredient: [Active ingredients]
        - Indications: [Uses]
        - Dosage: [Recommended dosage]
        - Side Effects: [Common side effects]
        - Contraindications: [When the medication should not be used]
        - Interactions: [Drugs or substances that interact with this medication]

        If the user asks a **direct question about the medication**, answer based on the provided information and the medication's context.

        Please use only the available information in the provided context.

        Guidelines:
        - Maintain a professional, precise, and concise tone in all responses.
        - Do not fabricate information. If a field lacks data, state "No specific information available."
        - Ensure the information is understandable for both healthcare professionals and patients.
        """

        # Construir la descripción y preguntas de manera directa sin etiquetas adicionales
        description = "\n".join(drugs)

        # Agregar las preguntas si hay
        if questions:
            questions_block = "\n\n".join(questions)
        else:
            questions_block = "No additional questions provided."

        # Crear el prompt completo combinando todo dentro de [INST]
        sample['text'] = f"""<s>[INST] <<SYS>>
        {system_message}
        <</SYS>>
        {description}

        {questions_block}

        [/INST]</s>"""
    except Exception as ex:
        raise Exception(f'Ocurrió un error inesperado al cargar el prompt [line: {ex.__traceback__.tb_lineno}] - {ex}')

    return sample
    

def preprocess_batch(batch, tokenizer, max_length):
    """
    Tokenizing a batch
    """
    return tokenizer(
        batch["text"],
        max_length=max_length,
        truncation=True,
    )

    
def preprocess_dataset(tokenizer: AutoTokenizer, max_length: int, seed, dataset):
    """Format & tokenize it so it is ready for training
    :param tokenizer (AutoTokenizer): Model Tokenizer
    :param max_length (int): Maximum number of tokens to emit from tokenizer
    """
    
    try:
        # Añadir un prompt a cada muestra
        print("Preprocessing dataset...")
        
        create_prompt_formats = None
        if(model_name == 'meta-llama/Meta-Llama-3-8B'):
            print("Create prompt llama3...")
            create_prompt_formats = create_prompt_formats_llama3
        elif(model_name == 'meta-llama/Llama-2-7b-hf'):
            print("Create prompt llama2...")
            create_prompt_formats = create_prompt_formats_llama2
        
        
        num_cores = multiprocessing.cpu_count()
        print(f"Número de núcleos de la CPU disponibles: {num_cores}")
        
        # Usar todos menos uno o dos núcleos para no sobrecargar el sistema
        num_proc = max(1, num_cores - 1)
        
        dataset = dataset.map(create_prompt_formats
                              #num_proc=num_proc
                             )#, batched=True)
        
        _preprocessing_function = partial(preprocess_batch,
                                          max_length = max_length,
                                          tokenizer = tokenizer
                                         )

        dataset = dataset.map(_preprocessing_function, 
                              remove_columns=[col for col in dataset.column_names if col != "text"],
                              #num_proc=num_proc
                             )

        # Filtrar las muestras que tienen input_ids que exceden la longitud máxima (max_length).
        dataset = dataset.filter(lambda sample: len(sample["input_ids"]) < max_length)

        # Shuffle dataset
        dataset = dataset.shuffle(seed=seed)

        return dataset
    except Exception as ex:
        raise Exception(f'Ocurrió un error inesperado al pre-procesar el dataset [line: {ex.__traceback__.tb_lineno}] - {ex}')

        
def preprocess_dataset_sample(seed, dataset):
    """Format & tokenize it so it is ready for training
    :param tokenizer (AutoTokenizer): Model Tokenizer
    :param max_length (int): Maximum number of tokens to emit from tokenizer
    """
    
    try:
        # Añadir un prompt a cada muestra
        print("Preprocessing dataset...")
        
        create_prompt_formats = None
        if(model_name == 'meta-llama/Meta-Llama-3-8B'):
            print("Create prompt llama3...")
            create_prompt_formats = create_prompt_formats_llama3
        elif(model_name == 'meta-llama/Llama-2-7b-hf'):
            print("Create prompt llama2...")
            create_prompt_formats = create_prompt_formats_llama2
        
        num_cores = multiprocessing.cpu_count()
        print(f"Número de núcleos de la CPU disponibles: {num_cores}")
        
        # Usar todos menos uno o dos núcleos para no sobrecargar el sistema
        num_proc = max(1, num_cores - 1)
        
        dataset = dataset.map(create_prompt_formats
                              #num_proc=num_proc
                             )#, batched=True)
        
        # Shuffle dataset
        dataset = dataset.shuffle(seed=seed)

        return dataset
    except Exception as ex:
        raise Exception(f'Ocurrió un error inesperado al pre-procesar el dataset [line: {ex.__traceback__.tb_lineno}] - {ex}')

        
def print_number_of_trainable_model_parameters(model):
    try:
        trainable_model_params = 0
        all_model_params = 0
        for _, param in model.named_parameters():
            all_model_params += param.numel()
            if param.requires_grad:
                trainable_model_params += param.numel()
        return f"all model parameters: {all_model_params}\ntrainable model parameters: {trainable_model_params}\npercentage of trainable model parameters: {100 * trainable_model_params / all_model_params:.2f}%"
    except Exception as ex:
        print(f'Ocurrió un error inesperado al imprimir los parametros del modelo [line: {ex.__traceback__.tb_lineno}] - {ex}')


def launch_tensorboard():
    tb_process, ngrok_process = None, None
    
    # Launch TensorBoard
    if not is_process_running('tensorboard'):
        tb_command = f'tensorboard --logdir {LOG_TRAIN_PATH}/runs/ --host 0.0.0.0 --port 6006'
        tb_process = run_cmd_async_unsafe(tb_command)
    
    # Install ngrok
    if not isfile('./ngrok'):
        #ngrok_url = 'https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip'
        print('Inicia descarga de ngrok....')
        download_and_extract(ngrok_url)
        chmod('./ngrok', 0o755)
        
        #Registra token de autorizacion
        tb_command = f'./ngrok config add-authtoken {NGROK_TOKEN}'
        tb_process = run_cmd_async_unsafe(tb_command)

    # Create ngrok tunnel and print its public URL
    if not is_process_running('ngrok'):
        ngrok_process = run_cmd_async_unsafe('./ngrok http 6006')
        time.sleep(1) # Waiting for ngrok to start the tunnel
    
    ngrok_api_res = urlopen('http://127.0.0.1:4040/api/tunnels', timeout=10)
    ngrok_api_res = json.load(ngrok_api_res)
    assert len(ngrok_api_res['tunnels']) > 0, 'ngrok tunnel not found'
    tb_public_url = ngrok_api_res['tunnels'][0]['public_url']
    print(f'TensorBoard URL: {tb_public_url}')

    return tb_process, ngrok_process


def download_and_extract(url, extract_to='.'):
    try:
        # Descargar el archivo
        response = requests.get(url, stream=True)
        response.raise_for_status()  # Lanza una excepción si la respuesta tiene un error

        # Detectar el tipo de archivo a partir de la URL
        if url.endswith('.zip'):
            # Si es un archivo ZIP, utilizar ZipFile
            with ZipFile(BytesIO(response.content)) as zip_file:
                zip_file.extractall(path=extract_to)
                print(f'Archivo ZIP extraído en: {os.path.abspath(extract_to)}')

        elif url.endswith('.tgz') or url.endswith('.tar.gz'):
            # Si es un archivo .tgz o .tar.gz, utilizar tarfile
            with tarfile.open(fileobj=BytesIO(response.content), mode='r:gz') as tar_file:
                tar_file.extractall(path=extract_to)
                print(f'Archivo TGZ extraído en: {os.path.abspath(extract_to)}')

        else:
            print("Formato de archivo no soportado.")
            return

    except requests.exceptions.RequestException as e:
        print(f"Error en la descarga: {e}")
    except Exception as e:
        print(f"Ocurrió un error inesperado: {e}, {e.__traceback__.tb_lineno}")


def run_cmd_async_unsafe(cmd):
    return Popen(cmd, shell=True)


def is_process_running(process_name):
    running_process_names = (proc.name() for proc in psutil.process_iter())
    return process_name in running_process_names
        

def compute_perplexity(eval_pred):
    logits, labels = eval_pred
    loss = eval_pred.loss
    perplexity = math.exp(loss)
    return {"perplexity": perplexity}


def compute_rouge(eval_pred):
    rouge_metric = evaluate.load_metric("rouge")
    predictions, labels = eval_pred
    predictions = [pred.strip() for pred in predictions]
    references = [label.strip() for label in labels]
    rouge_scores = rouge_metric.compute(predictions=predictions, references=references)
    return {
        "rouge1": rouge_scores["rouge1"].mid.fmeasure,
        "rouge2": rouge_scores["rouge2"].mid.fmeasure,
        "rougeL": rouge_scores["rougeL"].mid.fmeasure
    }


def compute_meteor(eval_pred):
    meteor_metric = evaluate.load_metric("meteor")
    predictions, labels = eval_pred
    predictions = [pred.strip() for pred in predictions]
    references = [label.strip() for label in labels]
    meteor_score = meteor_metric.compute(predictions=predictions, references=references)
    return {"meteor": meteor_score["meteor"]}


def compute_exact_match(eval_pred):
    predictions, labels = eval_pred
    exact_matches = sum([1 if pred == ref else 0 for pred, ref in zip(predictions, labels)]) / len(labels)
    return {"exact_match": exact_matches}


# Definir las métricas de evaluación
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = [logit.argmax(dim=-1) for logit in logits]

    # Calcular todas las métricas
    perplexity = compute_perplexity((logits, labels))["perplexity"]
    rouge = compute_rouge((predictions, labels))
    meteor = compute_meteor((predictions, labels))["meteor"]
    exact_match = compute_exact_match((predictions, labels))["exact_match"]

    # Guardar métricas en un archivo CSV
    with open('metrics.csv', mode='a') as metrics_file:
        metrics_writer = csv.writer(metrics_file)
        metrics_writer.writerow([perplexity, rouge["rouge1"], rouge["rouge2"], rouge["rougeL"], meteor, exact_match])

    # Retornar todas las métricas
    return {
        "perplexity": perplexity,
        "rouge1": rouge["rouge1"],
        "rouge2": rouge["rouge2"],
        "rougeL": rouge["rougeL"],
        "meteor": meteor,
        "exact_match": exact_match
    }

In [9]:
class ModelAnalizer:
    '''
    '''
    
    def __init__(self, model_name_or_path):
        self.model_name_or_path = model_name_or_path
        self.model = None
        self.tokenizer = None
        self._load_qtz_config()
    
    
    def _load_qtz_config(self):
        try:
            #compute_dtype = getattr(torch, "float16")
            
            # Load tokenizer and model with QLoRA configuration
            self.bnb_config = BitsAndBytesConfig(load_in_4bit=use_4bit, #True,
                                                 bnb_4bit_quant_type=bnb_4bit_quant_type, #'nf4',
                                                 bnb_4bit_compute_dtype=compute_dtype,
                                                 bnb_4bit_use_double_quant=use_nested_quant #False,
                                                )
        except Exception as ex:
            raise Exception(f'Ocurrió un error inesperado al cargar quantization-config [line: {ex.__traceback__.tb_lineno}] - {ex}')
        
        
    def _load_model(self):
        try:
            #device_map = {"": 0}
            self.model = AutoModelForCausalLM.from_pretrained(self.model_name_or_path, 
                                                              device_map=device_map,
                                                              quantization_config=self.bnb_config,
                                                              trust_remote_code=True,
                                                              token=HUGGING_TOKEN
                                                              )
            self.model.config.use_cache = False
            self.model.config.pretraining_tp = 1
            # Carga el tokenizador
            self._tokenizer()
        except Exception as ex:
            raise Exception(f'Ocurrió un error inesperado al cargar el modelo [line: {ex.__traceback__.tb_lineno}] - {ex}')
            
    
    def _tokenizer(self):
        # https://ai.stackexchange.com/questions/41485/while-fine-tuning-a-decoder-only-llm-like-llama-on-chat-dataset-what-kind-of-pa
        try:
            print(f'self.model_name_or_path : {self.model_name_or_path}')
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, 
                                                          trust_remote_code=True, 
                                                          add_bos_token=True,
                                                          use_fast=False, 
                                                          add_eos_token=True, 
                                                          padding_side=padding_side, #"left",
                                                          token=HUGGING_TOKEN
                                                         )
            if not(self.tokenizer):
                raise Exception(f'No se ha definido el atributo self.tokenizer')
            
            self.tokenizer.pad_token = self.tokenizer.eos_token
            
        except Exception as ex:
            raise Exception(f'Ocurrió un error inesperado al cargar el tokenizador [line: {ex.__traceback__.tb_lineno}] - {ex}')
        
    
    
    def gen(self, prompt, maxlen=512, sample=True):
        try:
            '''
            eval_tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path,
                                                           trust_remote_code=True,
                                                           add_bos_token=True,
                                                           use_fast=False
                                                          )
            eval_tokenizer.pad_token = eval_tokenizer.eos_token
            
            toks = eval_tokenizer(p, return_tensors="pt")
            '''
            
            toks = self.tokenizer(prompt, return_tensors="pt").to("cuda")
            res = self.model.generate(**toks.to("cuda"), 
                                      max_new_tokens=maxlen,
                                      do_sample=sample,
                                      num_return_sequences=1,
                                      temperature=0.7,
                                      num_beams=1,
                                      top_p=0.95
                                     ).to('cpu')
            return self.tokenizer.batch_decode(res, skip_special_tokens=True)
        
        except Exception as ex:
            raise Exception(f'Ocurrió un error inesperado al procesar la inferencia en el modelo [line: {ex.__traceback__.tb_lineno}] - {ex}')
    
    
    def get_max_length(self):
        try:
            max_length = None
            for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
                max_length = getattr(self.model.config, length_setting, None)
                if max_length:
                    print(f"Found max length: {max_length}")
                    break
            if not max_length:
                max_length = 1024
                print(f"Using default max length: {max_length}")
            return max_length
        
        except Exception as ex:
            raise Exception(f'Ocurrió un error inesperado al obtener tamaño del modelo [line: {ex.__traceback__.tb_lineno}] - {ex}')
    


<a name='3'></a>
#### 3. Cargar el dataset

In [42]:
%%time
#Cargar tu dataset
dataset = load_dataset('parquet', data_files=DATASET_PATH)


# Tomar una muestra aleatoria de x cantidad de registros de forma aleatoria)
if not(USE_ALL_DATASET):
    sampled_dataset = dataset['train'].shuffle(seed=42).select(range(NUMBER_ELEMENT))
else:
    sampled_dataset = dataset['train']

# Dividir en 70% train y 30% (test + validation)
train_test_valid = sampled_dataset.train_test_split(test_size=0.3, seed=42)

# Dividir el 30% restante en 15% test y 15% validation
test_valid = train_test_valid['test'].train_test_split(test_size=0.5, seed=42)

# Reunir los conjuntos en un DatasetDict
dataset = DatasetDict({
    'train': train_test_valid['train'],
    'test': test_valid['test'],
    'validation': test_valid['train']
})

dataset


CPU times: user 191 ms, sys: 40.3 ms, total: 231 ms
Wall time: 409 ms


DatasetDict({
    train: Dataset({
        num_rows: 7000
    })
    test: Dataset({
        num_rows: 1500
    })
    validation: Dataset({
        num_rows: 1500
    })
})

In [43]:
dataset['train'][26]

{'abuse': '',
 'abuse_table': '',
 'active_ingredient': 'Active ingredient Benzalkonium Chloride 0.13%',
 'active_ingredient_table': '',
 'adverse_reactions': '',
 'adverse_reactions_table': '',
 'alarms': '',
 'ask_doctor_or_pharmacist': '',
 'ask_doctor_or_pharmacist_table': '',
 'ask_doctor_table': '',
 'brand_name': '',
 'carcinogenesis_and_mutagenesis_and_impairment_of_fertility': '',
 'clinical_pharmacology_table': '',
 'clinical_studies_table': '',
 'components_table': '',
 'contraindications': '',
 'controlled_substance': '',
 'dependence': '',
 'dependence_table': '',
 'description': '',
 'description_table': '',
 'do_not_use_table': '',
 'dosage_and_administration': 'Directions Pump onto hands as needed. Rub briskly together until dry. Pump onto wounds 3 times a day after cleaning. Allow foam to dissipate. Wipe excess with sterile gauze. May be bandaged once dry.',
 'dosage_and_administration_table': '',
 'drug_abuse_and_dependence': '',
 'drug_abuse_and_dependence_table': ''

#### Imprime el consumo de GPU antes de cargar el modelo pre-entrenado

In [16]:
print_gpu_utilization()

GPU memory occupied: 2105 MB.


In [13]:
%%time
try:
    llm = ModelAnalizer(model_name)
    llm._load_model()
except Exception as ex:
    print(f"Ocurrió un error inesperado [line: {ex.__traceback__.tb_lineno}] - {ex}")
    


config.json:   0%|          | 0.00/609 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

self.model_name_or_path : meta-llama/Llama-2-7b-hf


tokenizer_config.json:   0%|          | 0.00/776 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

CPU times: user 23.2 s, sys: 21.2 s, total: 44.4 s
Wall time: 1min 9s


In [20]:
print_gpu_utilization()

GPU memory occupied: 2373 MB.


#### 6. Prueba el modelo con inferencia Zero Shot

In [18]:
%%time
seed = 42
index = 26
set_seed(seed)
max_tokens = 500

try:
    prompt = dataset['train'][index]
    
    prompt_description = """Generate a detailed description for the following medication: [Name, Composition, Indications, Dosage, Side Effects, Contraindications, Interactions]
    Medication: Ibuprofen
    """

    # Pregunta al modelo o instrucciones para preguntas y respuestas
    prompt_question = "Using the context of the medication Ibuprofen, answer the following question: What are the common side effects of Ibuprofen?"
    
    # Mensaje del sistema
    system_message = """Generate a detailed description of the medication for healthcare professionals and patients. Maintain a professional and concise tone throughout all responses. Do not fabricate information, and if a specific field regarding the safety in sensitive groups (pregnant women, children, elderly) is not present, simply state "No specific information available.
    You must follow the following structure for descriptions:
    - Name: [Medication Name]
    - Composition: [Active ingredients]
    - Indications: [Uses]
    - Dosage: [Recommended dosage]
    - Side Effects: [Common side effects]
    - Contraindications: [When the medication should not be used]
    - Interactions: [Drugs or substances that interact with this medication]
    Please use only the information available in the context provided."""

    # Template del prompt completo con instancias y sistema
    formatted_prompt = f'''<s>[INST] <<SYS>>
    {system_message}
    <</SYS>>
    {prompt_description} [/INST]</s>'''

    # Template para preguntas y respuestas
    question_template = f'''<s>[INST] <<SYS>>
    {system_message}
    <</SYS>>
    {prompt_question} [/INST]</s>'''
    
    output = llm.gen(formatted_prompt, max_tokens)
    #print(res[0])

    dash_line = '-'.join('' for x in range(100))
    print(dash_line)
    print(f'Input Prompt:\n{formatted_prompt}')
    print(dash_line)
    print(f'Model Generation - Zero Shot:\n{output}')

except Exception as ex:
    print(f"Ocurrió un error inesperado [line: {ex.__traceback__.tb_lineno}] - {ex}")

Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


---------------------------------------------------------------------------------------------------
Input Prompt:
<s>[INST] <<SYS>>
    Generate a detailed description of the medication for healthcare professionals and patients. Maintain a professional and concise tone throughout all responses. Do not fabricate information, and if a specific field regarding the safety in sensitive groups (pregnant women, children, elderly) is not present, simply state "No specific information available.
    You must follow the following structure for descriptions:
    - Name: [Medication Name]
    - Composition: [Active ingredients]
    - Indications: [Uses]
    - Dosage: [Recommended dosage]
    - Side Effects: [Common side effects]
    - Contraindications: [When the medication should not be used]
    - Interactions: [Drugs or substances that interact with this medication]
    Please use only the information available in the context provided.
    <</SYS>>
    Generate a detailed description for the fo

#### 7. Pre-procesando el dataset

In [None]:
%%time
try:
    
    path_tokenizer = os.path.join(BASE_FOLDER, "dataset")
    train_dataset_path = os.path.join(path_tokenizer, "train_dataset")
    eval_dataset_path = os.path.join(path_tokenizer, "eval_dataset")
    
    if(False):
        if not(os.path.exists(path_tokenizer)):
            #!mkdir -p {tokenizer_path_folder}
            os.makedirs(path_tokenizer)
            print('Directorio para almacenar dataset creado exitosamente!')

        if (os.path.exists(TOKENIZER_PATH)):
            with zipfile.ZipFile(TOKENIZER_PATH, 'r') as zip_ref:
                zip_ref.extractall(path_tokenizer)
            print(f"Tokenizador cargado desde {TOKENIZER_PATH}")
        else:
            raise FileNotFoundError(f"El tokenizador no existe en la ruta {TOKENIZER_PATH}")

    train_dataset = load_from_disk(train_dataset_path)
    eval_dataset = load_from_disk(eval_dataset_path)
     
    
except Exception as ex:
    print(f"Error [line: {ex.__traceback__.tb_lineno}] - {ex}")

FileLink(f'./{PROJECT_NAME}/dataset/{name_zip_tokenizer}')

In [44]:
%%time
try:
    
    path_tokenizer = os.path.join(BASE_FOLDER, "dataset")
    path_save_tokenizer = os.path.join(path_tokenizer, name_zip_tokenizer)
    #path_load_tokenizer = os.path.join(DATASET_FOLDER, name_zip_tokenizer)
    
    train_dataset_path = os.path.join(path_tokenizer, "train_dataset")
    eval_dataset_path = os.path.join(path_tokenizer, "eval_dataset")
    
    if not(os.path.exists(path_tokenizer)):
        #!mkdir -p {tokenizer_path_folder}
        os.makedirs(path_tokenizer)
        print('Directorio para almacenar dataset creado exitosamente!')
    
     
    if (LOAD_TOKENIZER):
        if (os.path.exists(TOKENIZER_PATH)):
            with zipfile.ZipFile(TOKENIZER_PATH, 'r') as zip_ref:
                zip_ref.extractall(path_tokenizer)
            print(f"Tokenizador cargado desde {TOKENIZER_PATH}")
        else:
            raise FileNotFoundError(f"El tokenizador no existe en la ruta {TOKENIZER_PATH}")
        
        train_dataset = load_dataset(train_dataset_path)
        eval_dataset = load_dataset(eval_dataset_path)
        
    else:
        if not(PROCESS_SAMPLE):
            max_length = llm.get_max_length()
            print('Inicia pre-procesamiento')
            train_dataset = preprocess_dataset(tokenizer=llm.tokenizer, 
                                               max_length=max_length,
                                               seed=seed,
                                               dataset=dataset['train']
                                              )

            eval_dataset = preprocess_dataset(tokenizer=llm.tokenizer, 
                                              max_length=max_length,
                                              seed=seed,
                                              dataset=dataset['validation']
                                             )
        else:
            train_dataset = preprocess_dataset_sample(seed=seed,
                                                      dataset=dataset['train']
                                                     )

            eval_dataset = preprocess_dataset_sample(seed=seed,
                                                     dataset=dataset['validation']
                                                     )
            
        
        
        if(SAVE_TOKENIZER):
            # save in disk
            train_dataset.save_to_disk(train_dataset_path)
            eval_dataset.save_to_disk(eval_dataset_path)
            
            file_path = None
            
            if (os.path.exists(path_save_tokenizer)):
                os.remove(path_save_tokenizer)  # Eliminar el archivo existente
                print(f'Archivo zip existente, eliminado desde la ruta : {path_tokenizer}')
                
            with zipfile.ZipFile(path_save_tokenizer, 'w') as zipf:
                for folder, subfolders, files in os.walk(path_tokenizer):
                    for file in files:
                        file_path = os.path.join(folder, file)
                        zipf.write(file_path, os.path.relpath(file_path, path_tokenizer))
            
            if(os.path.exists(path_save_tokenizer)):
                print('Process file zip tokenizer...')
except Exception as ex:
    print(f"Error [line: {ex.__traceback__.tb_lineno}] - {ex}")

FileLink(f'./{PROJECT_NAME}/dataset/{name_zip_tokenizer}')

Found max length: 4096
Inicia pre-procesamiento
Preprocessing dataset...
Create prompt llama2...
Número de núcleos de la CPU disponibles: 4


Map:   0%|          | 0/7000 [00:00<?, ? examples/s]

Map:   0%|          | 0/7000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/7000 [00:00<?, ? examples/s]

Preprocessing dataset...
Create prompt llama2...
Número de núcleos de la CPU disponibles: 4


Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

Filter:   0%|          | 0/1500 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/5039 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/1051 [00:00<?, ? examples/s]

Archivo zip existente, eliminado desde la ruta : /kaggle/working/drugs-generative/dataset
Process file zip tokenizer...
CPU times: user 4min 49s, sys: 3.45 s, total: 4min 53s
Wall time: 4min 58s


In [45]:
print(f"Shapes of the datasets:")
print(f"Training: {train_dataset.shape}")
print(f"Validation: {eval_dataset.shape}")
print(train_dataset)





Shapes of the datasets:
Training: (5039, 3)
Validation: (1051, 3)
Dataset({
    features: ['text', 'input_ids', 'attention_mask'],
    num_rows: 5039
})


#### 8. Configura el modelo PEFT/LoRA para el Fine-Tuning
Ahora, vamos a realizar un ajuste fino eficiente en parámetros (PEFT). PEFT es una forma de ajuste fino por instrucciones que es mucho más eficiente que el ajuste fino completo. PEFT es un término genérico que incluye Adaptación de Bajo Rango (LoRA) y ajuste por indicaciones (¡que NO ES LO MISMO que la ingeniería de prompts!). En la mayoría de los casos, cuando alguien menciona PEFT, generalmente se refieren a LoRA. LoRA, en esencia, permite un ajuste fino eficiente del modelo utilizando menos recursos computacionales, a menudo realizable con solo una GPU. Después del ajuste fino con LoRA para una tarea o caso de uso específico, el resultado es un LLM original sin cambios y la aparición de un "adaptador LoRA" considerablemente más pequeño, que a menudo representa un porcentaje de un solo dígito del tamaño del LLM original (en MBs en lugar de GBs).

Durante la inferencia, el adaptador LoRA debe combinarse con su LLM original. La ventaja radica en la capacidad de muchos adaptadores LoRA para reutilizar el LLM original, reduciendo así los requisitos generales de memoria cuando se manejan múltiples tareas y casos de uso.

Nota el hiperparámetro de rango (r), que define el rango/dimensión del adaptador a ser entrenado. r es el rango de la matriz de bajo rango utilizada en los adaptadores, lo que controla el número de parámetros entrenados. Un rango mayor permitirá mayor expresividad, pero hay una compensación en términos de cómputo.

alpha es el factor de escalado para los pesos aprendidos. La matriz de pesos se escala por alpha/r, y por lo tanto, un valor más alto de alpha asigna más peso a las activaciones de LoRA.

In [23]:
print(print_number_of_trainable_model_parameters(llm.model))

all model parameters: 3500412928
trainable model parameters: 262410240
percentage of trainable model parameters: 7.50%


In [24]:
print(llm.model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear4bit(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps=

In [25]:
peft_config = LoraConfig(lora_alpha=lora_alpha, #16,
                         lora_dropout=lora_dropout, #0.1,
                         r=lora_r, #64,
                         bias="none",
                         task_type="CAUSAL_LM",
                         target_modules=['q_proj','k_proj','v_proj','o_proj'], #dense
                        )


# 2 - Utilizando el método prepare_model_for_kbit_training de PEFT.
llm.model = prepare_model_for_kbit_training(llm.model)
peft_model = get_peft_model(llm.model, peft_config)

Una vez que todo esté configurado y el modelo base esté preparado, podemos utilizar la función auxiliar print_trainable_parameters() para ver cuántos parámetros entrenables hay en el modelo.

In [26]:
print(print_number_of_trainable_model_parameters(peft_model))

all model parameters: 3567521792
trainable model parameters: 67108864
percentage of trainable model parameters: 1.88%


In [27]:
# Observa cómo se ve diferente el modelo ahora, con los adaptadores LoRA añadidos:
print(peft_model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(32000, 4096)
        (layers): ModuleList(
          (0-31): 32 x LlamaDecoderLayer(
            (self_attn): LlamaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=4096, out_features=4096, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=4096, out_features=64, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=64, out_features=4096, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): l

#### 9. Entrenando el Adaptador PEFT

Define los argumentos de entrenamiento y crea una instancia de Trainer.

In [51]:
tensorboard_callback = TensorBoardCallback()
tb_process, ngrok_process = launch_tensorboard()


TensorBoard URL: https://4027-34-168-146-28.ngrok-free.app


In [52]:
from torch import amp

'''
peft_training_args = TrainingArguments(
    output_dir = output_dir,
    warmup_steps=1,
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    max_steps=1000,
    learning_rate=2e-4,
    optim="paged_adamw_8bit",
    logging_steps=25,
    logging_dir="./logs",
    save_strategy="steps",
    save_steps=25,
    evaluation_strategy="steps",
    eval_steps=25,
    do_eval=True,
    gradient_checkpointing=True,
    report_to="none",
    overwrite_output_dir = 'True',
    group_by_length=True,
)
'''
'''
peft_training_args = TrainingArguments(
    output_dir = LOG_TRAIN_PATH,
    do_eval=True,
    eval_strategy="steps",
    fp16=False,
    gradient_accumulation_steps=4,
    gradient_checkpointing=True,
    gradient_checkpointing_kwargs={"use_reentrant": False},
    learning_rate=2.0e-04,
    logging_steps=25,
    log_level="info",
    logging_strategy="steps",
    lr_scheduler_type="cosine",
    max_steps=1000,
    #num_train_epochs=1,
    overwrite_output_dir = True,
    per_device_eval_batch_size=1,
    per_device_train_batch_size=1,
    save_strategy="steps",
    eval_steps=25,
    group_by_length=True,
    logging_dir=LOG_PATH,
    optim="paged_adamw_8bit",
    save_steps=25,
    warmup_steps=50,
    save_total_limit=None,
    seed=42,
    report_to="tensorboard",
)

peft_trainer = transformers.Trainer(model=peft_model,
                                    train_dataset=train_dataset,
                                    eval_dataset=eval_dataset,
                                    args=peft_training_args,
                                    data_collator=transformers.DataCollatorForLanguageModeling(llm.tokenizer, mlm=False),
                                    )
'''

llm.model.train()
# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=per_device_train_batch_size,
    per_device_eval_batch_size=per_device_eval_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    fp16=fp16,
    bf16=bf16,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    #group_by_length=group_by_length,
    lr_scheduler_type=lr_scheduler_type,
    seed=42,
    eval_strategy=eval_strategy,
    eval_steps=eval_steps,
    load_best_model_at_end=True,
    report_to="tensorboard"
)

# Configurar early stopping
early_stopping = EarlyStoppingCallback(
    early_stopping_patience=3,  # Número de evaluaciones sin mejora antes de detener el entrenamiento
    early_stopping_threshold=0.001  # Mejora mínima requerida para continuar el entrenamiento
)


# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=llm.model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    #dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=llm.tokenizer,
    args=training_arguments,
    #compute_metrics=compute_metrics,
    callbacks=[early_stopping],
    packing=packing,
)



In [55]:
#training_args.device
torch.cuda.empty_cache()
print(f"GPUs disponibles: {torch.cuda.device_count()}")

GPUs disponibles: 2


In [56]:
trainer.train()

  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss,Validation Loss
25,0.7115,0.3509
50,0.3502,0.31524
75,0.3345,0.302872
100,0.2922,0.296332
125,0.2635,0.288717
150,0.2762,0.282655
175,0.2714,0.280138
200,0.2367,0.275439
225,0.3251,0.276044
250,0.2455,0.271233



Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-2-7b-hf.
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]

Cannot access gated repo for url https://huggingface.co/meta-llama/Llama-2-7b-hf/resolve/main/config.json.
Access to model meta-llama/Llama-2-7b-hf is restricted. You must have access to it and be authenticated to access it. Please log in. - silently ignoring the lookup for the file config.json in meta-llama/Llama-2-7b-hf.
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]

Cannot access gated repo for 

KeyboardInterrupt: 

In [None]:
print_gpu_utilization()

In [None]:
# Liberar memoria para la fusión de pesos


import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
print_gpu_utilization()

#### 10. Evaluar el modelo cualitativamente (Evaluación Humana)

In [None]:
try:
    llm = ModelAnalizer(model_name)
    llm._load_model()
except Exception as ex:
    print(f"Ocurrió un error inesperado [line: {ex.__traceback__.tb_lineno}] - {ex}")
    


In [None]:

ft_model = PeftModel.from_pretrained(llm.model, 
                                     "/kaggle/working/peft-dialogue-summary-training/final-checkpoint/checkpoint-1000",
                                     torch_dtype=torch.float16,
                                     is_trainable=False
                                    )

In [None]:
%%time
seed = 42
index = 120
set_seed(seed)
max_tokens = 512

try:
    prompt = dataset['train'][index]

    # Instrucción: Resume la siguiente conversación
    formatted_prompt = f'Instruct: Generate a detailed description of the medication for healthcare professionals and patients. Maintain a professional and concise tone throughout all responses. Do not fabricate information, and if a specific field regarding the safety in sensitive groups (pregnant women, children, elderly) is not present, simply state "No specific information available".\n Provide a detailed description of the medication {prompt["generic_name"]} using the available data.\n Output:\n'
    res = ft_model.gen(formatted_prompt, max_tokens)
    #print(res[0])
    output = res[0].split('Output:\n')[1]

    dash_line = '-'.join('' for x in range(100))
    print(dash_line)
    print(f'Input Prompt:\n{formatted_prompt}')
    print(dash_line)
    print(f'Peft Model Generation:\n{output}')

except Exception as ex:
    print(f"Ocurrió un error inesperado [line: {ex.__traceback__.tb_lineno}] - {ex}")

#### 10. Evaluar el modelo cuantitativamente (con la Metrica ROUGE)

In [None]:
def data_process(dataset):
    try:
        # Añadir un prompt a cada muestra
        print("Preprocessing dataset...")

        num_cores = multiprocessing.cpu_count()
        print(f"Número de núcleos de la CPU disponibles: {num_cores}")

        # Usar todos menos uno o dos núcleos para no sobrecargar el sistema
        num_proc = max(1, num_cores - 1)

        dataset = dataset.map(create_prompt_formats_v1,
                              num_proc=num_proc
                             )#, batched=True)
    except Exception as ex:
        raise Exception(f'Ocurrió un error inesperado al pre-procesar el dataset [line: {ex.__traceback__.tb_lineno}] - {ex}')

    return dataset

In [None]:
%%time
try:
    train_dataset=data_process(dataset['train'])
    eval_dataset=data_process(dataset['validation'])

    print(f"Shapes of the datasets:")
    print(f"Training: {train_dataset.shape}")
    print(f"Validation: {eval_dataset.shape}")
    
except Exception as ex:
    print(f'Ocurrió un error inesperado [line: {ex.__traceback__.tb_lineno}] - {ex}')



In [None]:
display(train_dataset[120])

In [None]:
eval_dataset[120]