In [None]:
!pip install -qqq bitsandbytes==0.39.0
!pip install -qqq torch==2.0.1
!pip install -qqq -U git+https://github.com/huggingface/transformers.git@e03a9cc
!pip install -qqq -U git+https://github.com/huggingface/peft.git@42a184f
!pip install -qqq -U git+https://github.com/huggingface/accelerate.git@c9fbb71
!pip install -qqq datasets==2.12.0
!pip install -qqq loralib==0.1.1
!pip install -qqq einops==0.6.1

In [None]:
import json
import os
import pandas as pd
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers

from huggingface_hub import notebook_login
from datasets import Dataset
from pprint import pprint
from peft import (
    LoraConfig,
    PeftConfig,
    PeftModel,
    get_peft_model,
    prepare_model_for_kbit_training
)

from transformers import (
    AutoConfig,
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig
)

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [None]:
notebook_login()

In [None]:
# This model is specifically tailored to run in low-ram enviroments (such as this, colab)
CHECKPOINT = 'vilsonrodrigues/falcon-7b-instruct-sharded'

# Bits and bites config to quantize the model (brainfloat16 -> normalized float 4)
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(
    CHECKPOINT,
    device_map="auto",
    trust_remote_code=True,
    quantization_config=bnb_config
)

tokenizer = AutoTokenizer.from_pretrained(CHECKPOINT)
tokenizer.pad_token = tokenizer.eos_token

In [5]:
def print_trainable_parameters(model):
  """
  Prints the number of trainable parameters in the model.
  """
  trainable_params = 0
  all_param = 0
  for _, param in model.named_parameters():
    all_param += param.numel()
    if param.requires_grad:
      trainable_params += param.numel()
  print(
      f"trainable params: {trainable_params} || all params: {all_param} || trainables%: {100 * trainable_params / all_param}"
  )

In [6]:
model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [7]:
config = LoraConfig(
    r=16,  # can be raised (to e.g. 64)
    lora_alpha=32,  # can be lowered (to e.g. 16)
    target_modules=["query_key_value"],  # can include more layers
    lora_dropout=0.05,  # can be raised (to e.g. 0.1)
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)  # by utilizing low rank adaptation, we only train 13% of all avaliable, trainable parameters

trainable params: 4718592 || all params: 3613463424 || trainables%: 0.13058363808693696


In [8]:
generation_config = model.generation_config
generation_config.max_new_tokens = 3  # the longest seq. that the model can generate should be _-1, which is max. 3 chars
generation_config.temperature = 0  # we need low temperature, to ensure that model the highest prob. token
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [None]:
prompt1 = """You are a sentiment analysis model. Your task is to evaluate the sentiment
of the given text and output a sentiment score between -1 and 1, where -1 signifies
a negative sentiment, 0 indicates neutral, and 1 represents positive sentiment."""


prompt2 = """Anlyze the sentiment of the given text and output a sentiment score
between -1 and 1, where -1 signifies a negative sentiment, 0 indicates neutral,
and 1 represents positive sentiment."""


In [9]:
def generate_prompt(data_point):
  return f"""Anlyze the sentiment of the given text and output a sentiment score
between -1 and 1, where -1 signifies a negative sentiment, 0 indicates neutral,
and 1 represents positive sentiment.

<text>: {data_point['User']}
<sentiment>: {data_point['Prompt']}
""".strip()

def generate_and_tokenize_prompt(data_point):
  full_prompt = generate_prompt(data_point)
  tokenized_full_prompt = tokenizer(full_prompt, padding=True, truncation=True)
  return tokenized_full_prompt

In [14]:
# Our custom dataset
train_data = pd.read_csv("train_data.csv")
train_data.rename(columns={"text": "User", "label": "Prompt"}, inplace=True)
dataset = Dataset.from_pandas(train_data)
dataset = dataset.shuffle().map(generate_and_tokenize_prompt)

Map:   0%|          | 0/203 [00:00<?, ? examples/s]

## Finetuning the model

In [None]:
training_args = transformers.TrainingArguments(
      per_device_train_batch_size=1,
      gradient_accumulation_steps=4,
      num_train_epochs=1,
      learning_rate=2e-4,
      fp16=True,
      save_total_limit=3,
      logging_steps=1,
      output_dir="experiments",
      optim="paged_adamw_8bit",
      lr_scheduler_type="cosine",
      warmup_ratio=0.05,
)

trainer = transformers.Trainer(
    model=model,
    train_dataset=dataset,
    args=training_args,
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False
trainer.train()

In [19]:
model.save_pretrained("trained-model")

In [None]:
config = PeftConfig.from_pretrained("trained-model")
model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

tokenizer=AutoTokenizer.from_pretrained(config.base_model_name_or_path)
tokenizer.pad_token = tokenizer.eos_token

model = PeftModel.from_pretrained(model, "trained-model")

In [22]:
generation_config = model.generation_config
generation_config.max_new_tokens = 3
generation_config.temperature = 0
generation_config.num_return_sequences = 1
generation_config.pad_token_id = tokenizer.eos_token_id
generation_config.eos_token_id = tokenizer.eos_token_id

In [26]:
%%time
device = "cuda:0"

prompt = """Anlyze the sentiment of the given text and output a sentiment score
between -1 and 1, where -1 signifies a negative sentiment, 0 indicates neutral,
and 1 represents positive sentiment.

<text>: the arp extension discussed in this document is a good addition to the label-distribution toolkit and can potentially be the simplest option available for certain deployments. i support wg adoption.
<sentiment>:
""".strip()

encoding = tokenizer(prompt, return_tensors="pt").to(device)
with torch.inference_mode():
  outputs = model.generate(
      input_ids = encoding.input_ids,
      attention_mask = encoding.attention_mask,
      generation_config = generation_config
  )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

Anlyze the sentiment of the given text and output a sentiment score 
between -1 and 1, where -1 signifies a negative sentiment, 0 indicates neutral, 
and 1 represents positive sentiment.

<text>: the arp extension discussed in this document is a good addition to the label-distribution toolkit and can potentially be the simplest option available for certain deployments. i support wg adoption.
<sentiment>: 0

CPU times: user 1.21 s, sys: 0 ns, total: 1.21 s
Wall time: 2.05 s
