# Setup the Model
The following section performs all the setup of the model.
This includes

- Installing any dependencies
- Setting any configuration
- Downloading the Base Model

## Install dependencies
In order to get started we need to install the appropriate dependencies

In [1]:
# # install dependencies

# # we use the latest version of transformers, peft, and accelerate
# !pip install -q accelerate peft transformers

# # install bitsandbytes for quantization
# !pip install -q bitsandbytes

# # install trl for the SFT library
# !pip install -q trl

# # we need sentencepiece a slow tokenizer
# !pip install sentencepiece

# # we need einops, used by falcon-7b, llama-2 etc
# # einops (einsteinops) is used to simplify tensorops by making them readable
# !pip install -q -U einops

# # we need to install datasets for our training dataset
# !pip install -q datasets

In [2]:
import torch

# The model that you want to train from the Hugging Face hub
model_name = "mistralai/Mistral-7B-Instruct-v0.2"
# The instruction dataset to use
dataset_name = "../dataset/"

# Fine-tuned model name
new_model = "ReqBrain-Mistral-7B-Instruct-v0.2"

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Number of training epochs
num_train_epochs = 30

In [3]:
# Check if GPU is available
if torch.cuda.is_available():
    # Get the number of available GPUs
    gpu_count = torch.cuda.device_count()
    
    print(f"Number of available GPUs: {gpu_count}")
    
    # List details of each GPU
    for i in range(gpu_count):
        gpu_device = torch.cuda.get_device_properties(i)
        print(gpu_device)
        print(f"GPU {i + 1}: {gpu_device.name}")
        print(f"\tCompute Capability: {gpu_device.major}.{gpu_device.minor}")
        print(f"\tMemory: {gpu_device.total_memory / (1024 ** 3):.2f} GB")
else:
    print("No GPUs available.")

Number of available GPUs: 1
_CudaDeviceProperties(name='Tesla V100-SXM2-32GB', major=7, minor=0, total_memory=32500MB, multi_processor_count=80)
GPU 1: Tesla V100-SXM2-32GB
	Compute Capability: 7.0
	Memory: 31.74 GB


## Download the base model
The following will download the base model.

In [4]:
from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    pipeline,
    logging,
)

# load the quantized settings, we're doing 4 bit quantization
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    # use the gpu
    device_map={"": 0}
)

# don't use the cache
model.config.use_cache = False

# Load the tokenizer from the model (mistralai)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

2023-12-17 18:03:51.393280: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-17 18:03:52.253145: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-17 18:03:52.253186: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-17 18:03:52.253217: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-17 18:03:52.318968: I tensorflow/core/platform/cpu_feature_g

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

# Train the Model
The following section is about taking your dataset and then finetuning the model

## Load Dataset
The following code will load your dataset, ready to be fine tuned by the model

In [5]:
import datasets

def get_dataset_by_model_format(dataset, split, ds_format):
    return dataset[split].filter(lambda example: example['ds_format'] == ds_format)

# Load the dataset
instruct_dataset = datasets.load_from_disk(dataset_name)

dataset = get_dataset_by_model_format(instruct_dataset, split = 'train', ds_format = 'llama')
dataset_test = get_dataset_by_model_format(instruct_dataset, split = 'test', ds_format = 'llama')

In [6]:
print(dataset['text'][50])

<s>[INST] Could you craft a requirement that includes suggestions or allowances? Remember, these are non-mandatory, non-binding provisions, and should utilize the term 'may' in accordance with ISO 29148 guidelines. [/INST] To settle an exposure, the user may select the optional acceptance that should be used as the basis for the liability calculation for the exposure. </s>


## Fine Tune the Model
The following section will take your dataset, and fine tune the model with it.

In [7]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import LoraConfig, PeftModel
from trl import SFTTrainer

# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
)

# Set training parameters
training_arguments = TrainingArguments(
    output_dir=output_dir,
    num_train_epochs=num_train_epochs,      # uses the number of epochs earlier
    per_device_train_batch_size=2,          # 4 seems reasonable
    gradient_accumulation_steps=2,          # 2 is fine, as we're a small batch
    optim="paged_adamw_32bit",              # default optimizer
    save_steps=0,                           # we're not gonna save
    logging_steps=10,                       # same value as used by Meta
    learning_rate=2e-4,                     # standard learning rate
    weight_decay=0.001,                     # standard weight decay 0.001
    fp16=False,                             # set to true for A100
    bf16=False,                             # set to true for A100
    max_grad_norm=0.3,                      # standard setting
    max_steps=-1,                           # needs to be -1, otherwise overrides epochs
    warmup_ratio=0.03,                      # standard warmup ratio
    group_by_length=True,                   # speeds up the training
    lr_scheduler_type="cosine",           # constant seems better than cosine
    report_to="tensorboard"
)

# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=peft_config,                # use our lora peft config
    dataset_text_field="text",
    max_seq_length=None,                    # no max sequence length
    tokenizer=tokenizer,                    # use the mistralai tokenizer
    args=training_arguments,                # use the training arguments
    packing=False,                          # don't need packing
)

# Train model
trainer.train()

# Save trained model
trainer.model.save_pretrained(new_model)



Map:   0%|          | 0/132 [00:00<?, ? examples/s]

Detected kernel version 4.18.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


Step,Training Loss
10,4.7227
20,4.3115
30,3.1152
40,2.5663
50,2.2938
60,2.1142
70,2.0666
80,1.8814
90,1.7326
100,1.7915


# Mergin LoRA

In [None]:
# %load_ext tensorboard
# %tensorboard --logdir results/runs

In [13]:
# Empty VRAM
del model
del pipe
del trainer
import gc
gc.collect()
gc.collect()

0

In [14]:
# Reload model in FP16 and merge it with LoRA weights
base_model = AutoModelForCausalLM.from_pretrained(
    model_name,
    low_cpu_mem_usage=True,
    return_dict=True,
    torch_dtype=torch.float16,
    device_map={"": 0},
)
model = PeftModel.from_pretrained(base_model, new_model)
model = model.merge_and_unload()

# Reload tokenizer to save it
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

# Pushing to Hugging Face Hub

In [15]:
! git config --global user.email "kasrahabib@gmail.com"
! git config --global user.name "Mohammad Kasra Habib"

In [16]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [17]:
model.push_to_hub(new_model, use_temp_dir=False)
tokenizer.push_to_hub(new_model, use_temp_dir=False)

model-00002-of-00003.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/kasrahabib/Mistral-7B-Instruct-v0.2-ReqBrain/commit/4dd9963faa287318443e87832afeec6971fc458e', commit_message='Upload tokenizer', commit_description='', oid='4dd9963faa287318443e87832afeec6971fc458e', pr_url=None, pr_revision=None, pr_num=None)