## Setting up the Environment 🛠️

Before diving into the core machine learning tasks, it's essential to set up the environment with the necessary libraries and tools. This code cell takes care of installing and upgrading various Python packages that are crucial for this project.

In [1]:

!python -m pip install --upgrade pip
!pip install -q -U bitsandbytes einops
!pip install -q -U git+https://github.com/huggingface/transformers.git
!pip install -q -U git+https://github.com/huggingface/peft.git

!pip install -q -U git+https://github.com/huggingface/accelerate.git

!pip install -q -U accelerate
!pip install -q -U datasets

!pip install nvidia-ml-py3

Collecting pip
  Downloading pip-24.0-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-24.0
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.8/119.8 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m410.6/410.6 MB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m14.1/14.1 MB[0m [31m66.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.7/23.7 MB[0m [31m52.3 MB/s[0m eta [36m0:00:00[0m
[2K 

In [2]:
#!pip install transformers==4.29.2
#!pip install tokenizers==0.13.3
#!pip install peft==0.5.0
#!pip install accelerate=0.23.0

## Importing Frameworks and Checking Its Version 🎛️

``.__version__`` is a way to check versions

In [3]:
import torch
torch.__version__

'2.2.1+cu121'

In [4]:
import transformers
transformers.__version__

'4.41.0.dev0'

This cell deals with configuring the environment for GPU usage.

In [5]:
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'

In [6]:
# Import necessary libraries
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training, LoraConfig, get_peft_model
from datasets import load_dataset
import transformers


## GPU Utilization and Summary Printing Functions 💻

This cell defines two functions for monitoring GPU utilization and printing a summary of training results.

In [7]:
from pynvml import *


def print_gpu_utilization():
    nvmlInit()
    handle = nvmlDeviceGetHandleByIndex(0)
    info = nvmlDeviceGetMemoryInfo(handle)
    print(f"GPU memory occupied: {info.used//1024**2} MB.")


def print_summary(result):
    print(f"Time: {result.metrics['train_runtime']:.2f}")
    print(f"Samples/second: {result.metrics['train_samples_per_second']:.2f}")
    print_gpu_utilization()

## Model and Dataset Preparation, Training, and Summary 🚀

This cell is quite dense and contains several steps, from loading the model and dataset to training the model and printing the summary.

## QLORA 4 Bit Training


In [8]:
%%time
# Define the model name
model_name = "EleutherAI/gpt-j-6b"

# Load the model's tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define the configuration for the quantizer
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load the model in 4-bit
model = AutoModelForCausalLM.from_pretrained(model_name,trust_remote_code=True, device_map={"":0})


# Prepare the model for LoRa, adding trainable adapters for each layer
model = prepare_model_for_kbit_training(model)

# Configuration for LoRa
config = LoraConfig(
    r=8,
    lora_alpha=32,
    #target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",

)

model = get_peft_model(model, config)
model.config.use_cache = False

# Load the dataset
data = load_dataset("Abirate/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

# Pad tokens to max length
tokenizer.pad_token = tokenizer.eos_token

# Training configuration
trainer = transformers.Trainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=8,
        warmup_steps=2,
        max_steps=20,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        fp16_full_eval=True,
        output_dir="outputs",
        half_precision_backend = True,
        # optim="paged_adamw_8bit",

    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

# Train the model
result = trainer.train()

print(print_summary(result))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/619 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.37M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/4.04k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/357 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/930 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/24.2G [00:00<?, ?B/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 256.00 MiB. GPU 0 has a total capacity of 14.75 GiB of which 127.06 MiB is free. Process 6572 has 14.62 GiB memory in use. Of the allocated memory 14.52 GiB is allocated by PyTorch, and 1.76 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

## Free up GPU memory

In [9]:
import gc
del trainer, model, config, quant_config, data
gc.collect()
torch.cuda.empty_cache()

NameError: name 'trainer' is not defined

In [None]:
!nvidia-smi

In [None]:
import gc
del trainer, model, config, quant_config, data
gc.collect()
torch.cuda.empty_cache()

## Without Quantize + Gradient Checkpointing

In [None]:


# Define the model name
model_name = "EleutherAI/gpt-j-6b"

# Load the model's tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)


# Load the model in 4-bit
model = AutoModelForCausalLM.from_pretrained(model_name,trust_remote_code=True, device_map={"":0})

# Enable gradient checkpointing
#model.gradient_checkpointing_enable()

# Prepare the model for LoRa, adding trainable adapters for each layer
model = prepare_model_for_kbit_training(model)

# Configuration for LoRa
config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",

)

model = get_peft_model(model, config)
model.config.use_cache = False

# Load the dataset
data = load_dataset("Abirate/english_quotes")
data = data.map(lambda samples: tokenizer(samples["quote"]), batched=True)

# Pad tokens to max length
tokenizer.pad_token = tokenizer.eos_token

# Training configuration
trainer = transformers.Trainer(
    model=model,
    train_dataset=data["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=8,
        warmup_steps=2,
        max_steps=20,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        fp16_full_eval=True,
        output_dir="outputs",
        half_precision_backend = True,
        # optim="paged_adamw_8bit",
        gradient_checkpointing=True,

    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

# Train the model
result = trainer.train()

print(print_summary(result))
