In [None]:
!pip3 install -q -U bitsandbytes
!pip3 install -q -U peft
!pip3 install -q -U trl
!pip3 install -q -U accelerate
!pip3 install -q -U datasets
!pip3 install -q -U transformers

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model

bnb_config = BitsAndBytesConfig(
  load_in_4bit=True,
  bnb_4bit_quant_type="nf4",
  bnb_4bit_compute_dtype=torch.float16,
)

Now we specify the model ID and then we load it with our previously defined quantization configuration.Now we specify the model ID and then we load it with our previously defined quantization configuration.

In [None]:

import os
from huggingface_hub import login
access_token=''
HUGGINGFACE_TOKEN = os.environ.get(access_token)
login(token=HUGGINGFACE_TOKEN)


#Model loading
We'll load the model using QLoRA quantization to reduce the usage of memory


In [None]:
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2b-it")
tokenizer.padding_side = 'right'
model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-2b-it",
    device_map="auto",
    quantization_config=bnb_config
)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
`config.hidden_act` is ignored, you should use `config.hidden_activation` instead.
Gemma's activation function will be set to `gelu_pytorch_tanh`. Please, use
`config.hidden_activation` if you want to override this behaviour.
See https://github.com/huggingface/transformers/pull/29402 for more details.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
from google.colab import drive
drive.mount("/LLM")

Drive already mounted at /LLM; to attempt to forcibly remount, call drive.mount("/LLM", force_remount=True).


In [None]:
def get_completion(query: str, model, tokenizer) -> str:
  device = "cuda:0"

  prompt_template = """
  <start_of_turn>user
  Below is an instruction that describes a task. Write a response that appropriately completes the request.
  {query}
  <end_of_turn>\n<start_of_turn>model


  """
  prompt = prompt_template.format(query=query)

  encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)

  model_inputs = encodeds.to(device)


  generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id=tokenizer.eos_token_id)
  # decoded = tokenizer.batch_decode(generated_ids)
  decoded = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
  return (decoded)

In [None]:

# result = get_completion(query="Describe about SR Tendulkar", model=model, tokenizer=tokenizer)
# print(result)


#Load dataset for finetuning

In [None]:
from datasets import load_dataset

dataset = load_dataset("/LLM/MyDrive/Generative_AI/gemma_2", split="train")
dataset

Dataset({
    features: ['instruction', 'input', 'output'],
    num_rows: 117
})

In [None]:
df = dataset.to_pandas()
df.head(10)

Unnamed: 0,instruction,input,output
0,Give three tips for staying healthy.,,1.Eat a balanced diet and make sure to include...
1,"Tips for Becoming a Better Badminton Player, E...",,"1.Most of the time, try to hit the center of t..."
2,Essential Badminton Tips,,"1.Most of the time, try to hit the center of t..."
3,Basics of badminton.,,"1.Most of the time, try to hit the center of t..."
4,How to play better badminton.,,"1.Most of the time, try to hit the center of t..."
5,What are the three primary colors?,,"The three primary colors are red, blue, and ye..."
6,"Exploiting Your Opponent's Weaknesses, How to ...",,1.Understand your opponent's game. \n2. Make y...
7,How to find out Opponent's Weaknesses.,,1.Understand your opponent's game. \n2. Make y...
8,Find out Opponent's Weaknesses.,,1.Understand your opponent's game. \n2. Make y...
9,How can i get Opponent's Weaknesses,,1.Understand your opponent's game. \n2. Make y...


In [None]:
def generate_prompt(data_point):
    """Gen. input text based on a prompt, task instruction, (context info.), and answer

    :param data_point: dict: Data point
    :return: dict: tokenzed prompt
    """
    prefix_text = 'Below is an instruction that describes a task. Write a response that ' \
               'appropriately completes the request.\n\n'
    # Samples with additional context into.
    if data_point['input']:
        text = f"""<start_of_turn>user {prefix_text} {data_point["instruction"]} here are the inputs {data_point["input"]} <end_of_turn>\n<start_of_turn>model{data_point["output"]} <end_of_turn>"""
    # Without
    else:
        text = f"""<start_of_turn>user {prefix_text} {data_point["instruction"]} <end_of_turn>\n<start_of_turn>model{data_point["output"]} <end_of_turn>"""
    return text

In [None]:
# add the "prompt" column in the dataset
text_column = [generate_prompt(data_point) for data_point in dataset]
dataset = dataset.add_column("prompt", text_column)

dataset = dataset.shuffle(seed=1234)  # Shuffle dataset here
dataset = dataset.map(lambda samples: tokenizer(samples["prompt"]), batched=True)

In [None]:
dataset

Dataset({
    features: ['instruction', 'input', 'output', 'prompt', 'input_ids', 'attention_mask'],
    num_rows: 117
})

In [None]:
dataset = dataset.train_test_split(test_size=0.2)
train_data = dataset["train"]
test_data = dataset["test"]

In [None]:
print(test_data)

Dataset({
    features: ['instruction', 'input', 'output', 'prompt', 'input_ids', 'attention_mask'],
    num_rows: 24
})


#Apply Lora  
Here comes the magic with peft! Let's load a PeftModel and specify that we are going to use low-rank adapters (LoRA) using get_peft_model utility function and  the prepare_model_for_kbit_training method from PEFT.

In [None]:
from peft import LoraConfig, get_peft_model
peft_config = LoraConfig(
    lora_alpha = 16,
    lora_dropout=0.1,
    r=16,
    task_type='CAUSAL_LM'
)


In [None]:
# model__ = get_peft_model(model, lora_config)

In [None]:

import numpy as np

def perplexity1(eval_pred):
    if isinstance(eval_pred.predictions, np.ndarray):
        # Convert NumPy arrays to PyTorch tensors
        logits = torch.from_numpy(eval_pred.predictions)
        shift_logits = logits[..., :-1, :].contiguous()
        shift_labels = torch.from_numpy(eval_pred.label_ids[..., 1:]).contiguous()
    else:
        # Assume the inputs are already PyTorch tensors
        shift_logits = eval_pred.predictions[..., :-1, :].contiguous()
        shift_labels = eval_pred.label_ids[..., 1:].contiguous()

    loss_fct = torch.nn.CrossEntropyLoss()
    loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
    return {"perplexity": torch.exp(loss)}


In [None]:
import numpy as np
import torch

def perplexity(eval_pred, batch_size=8):
    if isinstance(eval_pred.predictions, np.ndarray):
        logits = torch.from_numpy(eval_pred.predictions)
        shift_labels = torch.from_numpy(eval_pred.label_ids)
    else:
        logits = eval_pred.predictions
        shift_labels = eval_pred.label_ids

    shift_logits = logits[..., :-1, :].contiguous()
    shift_labels = shift_labels[..., 1:].contiguous()

    loss_fct = torch.nn.CrossEntropyLoss(reduction='sum')
    total_loss = 0.0
    num_elements = 0

    for i in range(0, shift_logits.size(0), batch_size):
        batch_logits = shift_logits[i:i + batch_size]
        batch_labels = shift_labels[i:i + batch_size]

        loss = loss_fct(batch_logits.view(-1, batch_logits.size(-1)), batch_labels.view(-1))
        total_loss += loss.item()
        num_elements += batch_labels.numel()

    avg_loss = total_loss / num_elements
    perplexity_score = torch.exp(torch.tensor(avg_loss))
    return {"perplexity": perplexity_score}


In [None]:

import transformers
training_arguments = transformers.TrainingArguments(
    output_dir = "/PATH_TO_TRAINING_OUTPUT",
    evaluation_strategy="steps",
    logging_strategy="steps",
    lr_scheduler_type="constant",
    logging_steps=20,
    eval_steps=10,
    save_steps=20,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,
    eval_accumulation_steps=4,
    num_train_epochs=1,
    fp16=True,
    group_by_length = True,
    optim="paged_adamw_32bit",
    max_steps = 301
)




In [None]:

from trl import SFTTrainer
trainer = SFTTrainer(
    model,
    tokenizer=tokenizer,
    train_dataset=train_data,
    eval_dataset=test_data,
    peft_config=peft_config,
    neftune_noise_alpha=5,
    compute_metrics=perplexity,
    max_seq_length=500,
    args = training_arguments
)
trainer.train()



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss,Validation Loss,Perplexity
10,No log,5.344881,24.565641
20,5.477500,4.971535,19.729317
30,5.477500,4.501366,15.009045
40,4.603900,4.048098,11.583596
50,4.603900,3.659714,9.304506
60,3.722800,3.314383,7.631872
70,3.722800,2.992007,6.316905
80,3.082500,2.751907,5.470194
90,3.082500,2.529774,4.766413
100,2.556900,2.320354,4.186594


TrainOutput(global_step=101, training_loss=3.871960935026112, metrics={'train_runtime': 346.4662, 'train_samples_per_second': 2.332, 'train_steps_per_second': 0.292, 'total_flos': 1131700735107072.0, 'train_loss': 3.871960935026112, 'epoch': 8.595744680851064})

In [None]:

from trl import SFTTrainer
trainer = SFTTrainer(
    model,
    tokenizer=tokenizer,
    train_dataset=train_data,
    eval_dataset=test_data,
    peft_config=peft_config,
    neftune_noise_alpha=5,
    compute_metrics=perplexity,
    max_seq_length=500,
    args = training_arguments
)
trainer.train()



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.
max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss,Validation Loss,Perplexity
10,No log,5.350356,24.630093
20,5.497100,4.984956,19.863136
30,5.497100,4.524763,15.192521
40,4.629800,4.079262,11.777215
50,4.629800,3.695662,9.488052
60,3.770300,3.348761,7.781404
70,3.770300,3.020453,6.417722
80,3.121600,2.775558,5.543382
90,3.121600,2.553068,4.830646
100,2.594100,2.353992,4.272769


TrainOutput(global_step=301, training_loss=2.3304970137700685, metrics={'train_runtime': 1001.2272, 'train_samples_per_second': 2.405, 'train_steps_per_second': 0.301, 'total_flos': 3349456632877056.0, 'train_loss': 2.3304970137700685, 'epoch': 25.617021276595743})

# calculate perplexity

In [None]:

eval_output = trainer.evaluate()
loss = eval_output["eval_loss"]
print(" loss : ",loss)
loss_tensor = torch.tensor(loss)  # Convert the loss to a tensor
perplexity_score = torch.exp(loss_tensor)
print(f"Final perplexity: {perplexity_score:.2f}")


Step,Training Loss,Validation Loss
7,No log,5.43176


 loss :  5.431760311126709
Final perplexity: 228.55
