In [1]:
!pip install transformers==4.36.2 trl accelerate==0.25.0 torch bitsandbytes peft datasets huggingface-hub -qU

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.2/8.2 MB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.3/155.3 kB[0m [31m18.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m30.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.5/755.5 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.0/105.0 MB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m183.4/183.4 kB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m536.7/536.7 kB[0m [31m37.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.8/79.8 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━

In [None]:
import torch
from trl import SFTTrainer
from datasets import load_dataset
from transformers import TrainingArguments
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [3]:
# device configuration
device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
print('Using device:', device)
if device == 'cuda':
    print(torch.cuda.get_device_name(0))

Using device: cuda
Tesla T4


In [4]:
# loading dataset
dataset = load_dataset("mosaicml/dolly_hhrlhf")
dataset

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading readme:   0%|          | 0.00/2.35k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/23.1M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/1.77M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/59310 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/5129 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['prompt', 'response'],
        num_rows: 59310
    })
    test: Dataset({
        features: ['prompt', 'response'],
        num_rows: 5129
    })
})

In [5]:
dataset["train"] = dataset["train"].select(range(5000))
dataset["test"] = dataset["test"].select(range(1000))
dataset

DatasetDict({
    train: Dataset({
        features: ['prompt', 'response'],
        num_rows: 5000
    })
    test: Dataset({
        features: ['prompt', 'response'],
        num_rows: 1000
    })
})

In [6]:
def generate_prompt(sample):
  """
  Combine both the prompt and input into a single column.
  """

  bos_token = "<s>"
  org_sys_msg = "Below is an instruction that describes a task. Write a response that appropriately completes the request."
  sys_msg = "Use the provided input to create an instruction that could have been used to generate the response with an LLM."
  response = sample["prompt"].replace(org_sys_msg, "").replace("\n### Instruction:\n", "").replace("\n\n### Response:\n", "").strip()
  input = sample["response"]
  eos_token = "</s>"

  prompt = ""
  prompt += bos_token
  prompt += "### Instruction:"
  prompt += "\n" + sys_msg
  prompt += "\n\n### Input:"
  prompt += "\n" + input
  prompt += "\n\n### Response:"
  prompt += "\n" + response
  prompt += eos_token

  return prompt

In [7]:
generate_prompt(dataset["train"][2])

'<s>### Instruction:\nUse the provided input to create an instruction that could have been used to generate the response with an LLM.\n\n### Input:\nFish oil is produced from the tissues of fish.  Fish that contain omega-3 fatty acids are the ones that are used to produce fish oil supplements.  These fatty acids have been show to reduce inflammation in the body and may reduce your risk for heart disease, high blood pressure, and rheumatoid arthritis.  Fish oil can be consumed daily as a morning or nightly supplement for support or enhance your immune health.\n\n### Response:\nwhat is fish oil?</s>'

In [9]:
# loading model and tokenizer

# qunatization config
nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

# model
model = AutoModelForCausalLM.from_pretrained(
    "mistralai/Mistral-7B-Instruct-v0.1",
    device_map='auto',
    quantization_config=nf4_config,
    use_cache=False
)

# tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/967 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/72.0 [00:00<?, ?B/s]

In [17]:
# prediction function

def generate_response(prompt):
    input = tokenizer(prompt, return_tensors="pt").to(device)

    outputs = model.generate(
        **input,
        do_sample=True,
        max_new_tokens=1000,
        pad_token_id=tokenizer.eos_token_id
    )
    response_ids = outputs[0]
    response_text = tokenizer.decode(response_ids, skip_special_tokens=True)
    return response_text

In [18]:
prompt = "### Instruction:\nUse the provided input to create an instruction that could have been used to generate the response with an LLM.### Input:\nThere are more than 12,000 species of grass. The most common is Kentucky Bluegrass, because it grows quickly, easily, and is soft to the touch. Rygrass is shiny and bright green colored. Fescues are dark green and shiny. Bermuda grass is harder but can grow in drier soil.\n\n### Response:"
generate_response(prompt)

'### Instruction:\nUse the provided input to create an instruction that could have been used to generate the response with an LLM.### Input:\nThere are more than 12,000 species of grass. The most common is Kentucky Bluegrass, because it grows quickly, easily, and is soft to the touch. Rygrass is shiny and bright green colored. Fescues are dark green and shiny. Bermuda grass is harder but can grow in drier soil.\n\n### Response:\nThe most common species of grass is Kentucky Bluegrass. It is easily grown, quick to mature, and has a soft touch. Another species that is commonly known for its green shade and shiny appearance is Rygrass. Fescues are also famous for their dark green and shiny color. Compared to these, Bermuda grass is considered harder but can still grow well in drier soil.'

In [27]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [28]:
!pip install --upgrade torch



In [29]:
!nvidia-smi

Thu Feb 22 10:38:07 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   70C    P0              30W /  70W |  10847MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [31]:
# tarining config

# peft config
peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM"
)

# prepare model for 4bit training
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

# training args
args = TrainingArguments(
  output_dir = "./finetuned_mistral",
  max_steps = 100,
  per_device_train_batch_size = 4,
  warmup_steps = 0.03,
  logging_steps=10,
  save_strategy="epoch",
  evaluation_strategy="steps",
  eval_steps=20,
  learning_rate=2e-4,
  bf16=True,
  lr_scheduler_type='constant',
)

max_seq_length = 2048

trainer = SFTTrainer(
  model=model,
  peft_config=peft_config,
  max_seq_length=max_seq_length,
  tokenizer=tokenizer,
  packing=True,
  formatting_func=generate_prompt,
  args=args,
  train_dataset=dataset["train"],
  eval_dataset=dataset["test"]
)

Generating train split: 0 examples [00:00, ? examples/s]

Generating train split: 0 examples [00:00, ? examples/s]



In [None]:
# training
trainer.train()

In [None]:
# saving model
trainer.save_model("./finetuned_mistral")

In [33]:
!pip install huggingface-hub -qU

In [34]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [None]:
trainer.push_to_hub("parthsolanke/fine-tuned-mistral")