<a href="https://colab.research.google.com/github/rawatnikhil857/knightRiders-hackon/blob/main/Simple_LoRA_Implementation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Simplified LoRA Implementation

#### Install Dependencies

In [1]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/peft.git git+https://github.com/huggingface/transformers.git

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m11.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m493.7/493.7 kB[0m [31m36.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m258.1/258.1 kB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m33.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wh

#### Confirm CUDA

In [2]:
import torch
torch.cuda.is_available()

True

#### Load Base Model

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="0"
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained(
    "bigscience/bloom-1b7",
    torch_dtype=torch.float16,
    device_map='auto',
)

tokenizer = AutoTokenizer.from_pretrained("bigscience/tokenizer")

Downloading (…)lve/main/config.json:   0%|          | 0.00/715 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/227 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

##### View Model Summary

In [4]:
print(model)

BloomForCausalLM(
  (transformer): BloomModel(
    (word_embeddings): Embedding(250880, 2048)
    (word_embeddings_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
    (h): ModuleList(
      (0-23): 24 x BloomBlock(
        (input_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (self_attention): BloomAttention(
          (query_key_value): Linear(in_features=2048, out_features=6144, bias=True)
          (dense): Linear(in_features=2048, out_features=2048, bias=True)
          (attention_dropout): Dropout(p=0.0, inplace=False)
        )
        (post_attention_layernorm): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
        (mlp): BloomMLP(
          (dense_h_to_4h): Linear(in_features=2048, out_features=8192, bias=True)
          (gelu_impl): BloomGelu()
          (dense_4h_to_h): Linear(in_features=8192, out_features=2048, bias=True)
        )
      )
    )
    (ln_f): LayerNorm((2048,), eps=1e-05, elementwise_affine=True)
  )
  (

In [5]:
for param in model.parameters():
  param.requires_grad = False  # freeze the model - train adapters later
  if param.ndim == 1:
    # cast the small parameters (e.g. layernorm) to fp32 for stability
    param.data = param.data.to(torch.float32)

model.gradient_checkpointing_enable()  # reduce number of stored activations
model.enable_input_require_grads()

class CastOutputToFloat(nn.Sequential):
  def forward(self, x): return super().forward(x).to(torch.float32)
model.lm_head = CastOutputToFloat(model.lm_head)

#### Helper Function

In [6]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

#### Obtain LoRA Model

In [7]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["query_key_value"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 1572864 || all params: 1723981824 || trainable%: 0.09123437254985815


#### Load Sample Dataset

In [21]:
from datasets import load_dataset
qa_dataset = load_dataset('csv', data_files='Context-Question-AnswerDatabase.csv')
type(qa_dataset)

Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

datasets.dataset_dict.DatasetDict

```
### CONTEXT
{context}

### QUESTION
{question}

### ANSWER
{answer}</s>
```

In [23]:
def create_prompt(context, question, answer):
  prompt_template = f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n{answer}</s>"
  return prompt_template

mapped_qa_dataset = qa_dataset.map(lambda samples: tokenizer(create_prompt(samples['Context'], samples['Question'], samples['Answer'])))

Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

In [24]:
mapped_qa_dataset

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'Context', 'Question', 'Answer', 'input_ids', 'token_type_ids', 'attention_mask'],
        num_rows: 1000
    })
})

#### Train LoRA

In [27]:
import transformers

trainer = transformers.Trainer(
    model=model,
    train_dataset=mapped_qa_dataset["train"],
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=100,
        learning_rate=1e-3,
        fp16=True,
        logging_steps=1,
        output_dir='outputs',
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
)
model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

Step,Training Loss
1,1.4506
2,1.6466
3,1.42
4,1.4641
5,1.3636
6,1.6186
7,1.3196
8,1.461
9,1.5004
10,1.3864


TrainOutput(global_step=100, training_loss=1.0600507980585099, metrics={'train_runtime': 502.8627, 'train_samples_per_second': 0.795, 'train_steps_per_second': 0.199, 'total_flos': 2768496369647616.0, 'train_loss': 1.0600507980585099, 'epoch': 0.4})

In [28]:
HUGGING_FACE_USER_NAME = "rawatnikhil857"

In [29]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [30]:
model_name = "movieRec-bloom-1b7"

model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{model_name}", use_auth_token=True)



adapter_model.bin:   0%|          | 0.00/6.31M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/rawatnikhil857/movieRec-bloom-1b7/commit/ddc2e992c89cdb96cfa2d404541e11971642c822', commit_message='Upload model', commit_description='', oid='ddc2e992c89cdb96cfa2d404541e11971642c822', pr_url=None, pr_revision=None, pr_num=None)

In [31]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = f"{HUGGING_FACE_USER_NAME}/{model_name}"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=False, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
qa_model = PeftModel.from_pretrained(model, peft_model_id)

Downloading (…)/adapter_config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/222 [00:00<?, ?B/s]

Downloading tokenizer.json:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

Downloading adapter_model.bin:   0%|          | 0.00/6.31M [00:00<?, ?B/s]

In [57]:
from IPython.display import display, Markdown

def make_inference(context, question):
  batch = tokenizer(f"### CONTEXT\n{context}\n\n### QUESTION\n{question}\n\n### ANSWER\n" ,return_tensors='pt')

  with torch.cuda.amp.autocast():
    output_tokens = qa_model.generate(**batch, max_new_tokens=200, repetition_penalty=1.2)

  display(Markdown((tokenizer.decode(output_tokens[0], skip_special_tokens=True))))

In [61]:
data = pd.read_csv("Context-Question-AnswerDatabase.csv")
context = data["Context"][30]
question = "What will be some more movies this user might like?"
make_inference(context, question)



### CONTEXT
"Given a user's past movie ratings in the format: Title, Genres, Rating.\nRatings range from 1.0 to 5.0\n\nSeven (a.k.a. Se7en) (1995), Mystery|Thriller, Rating 5.0\nDave (1993), Comedy|Romance, Rating 4.0\nMadness of King George, The (1994), Comedy|Drama, Rating 4.0\nRemains of the Day, The (1993), Drama|Romance, Rating 4.0\nUsual Suspects, The (1995), Crime|Mystery|Thriller, Rating 5.0\nIn the Line of Fire (1993), Action|Thriller, Rating 4.0\nHeavy Metal (1981), Action|Adventure|Animation|Horror|Sci-Fi, Rating 5.0\nTerminator 2: Judgment Day (1991), Action|Sci-Fi, Rating 4.0\nAce Ventura: Pet Detective (1994), Comedy, Rating 4.0\nEnglishman Who Went Up a Hill But Came Down a Mountain, The (1995), Comedy|Romance, Rating 4.0\nPhiladelphia (1993), Drama, Rating 5.0\nAdventures of Priscilla, Queen of the Desert, The (1994), Comedy|Drama, Rating 5.0\nFugitive, The (1993), Thriller, Rating 4.0\nAladdin (1992), Adventure|Animation|Children|Comedy|Musical, Rating 4.0\nMuriel's Wedding (1994), Comedy, Rating 4.0\nClear and Present Danger (1994), Action|Crime|Drama|Thriller, Rating 4.0\nForrest Gump (1994), Comedy|Drama|Romance|War, Rating 5.0\nFear (1996), Thriller, Rating 4.0\nOutbreak (1995), Action|Drama|Sci-Fi|Thriller, Rating 4.0\nDesperado (1995), Action|Romance|Western, Rating 5.0\nLeaving Las Vegas (1995), Drama|Romance, Rating 5.0\nLion King, The (1994), Adventure|Animation|Children|Drama|Musical|IMAX, Rating 4.0\nDead Man Walking (1995), Crime|Drama, Rating 5.0\nWhat's Eating Gilbert Grape (1993), Drama, Rating 4.0\nLéon: The Professional (a.k.a. The Professional) (Léon) (1994), Action|Crime|Drama|Thriller, Rating 4.0\nBirdcage, The (1996), Comedy, Rating 5.0\nPulp Fiction (1994), Comedy|Crime|Drama|Thriller, Rating 5.0\nJurassic Park (1993), Action|Adventure|Sci-Fi|Thriller, Rating 4.0\nPyromaniac's Love Story, A (1995), Comedy|Romance, Rating 4.0\nHot Shots! Part Deux (1993), Action|Comedy|War, Rating 4.0\n"

### QUESTION
What will be some more movies this user might like?

### ANSWER
Blood Diamond II (1997), Animation|Children|Comedy|Romance, Rating: 5.0\nGhostbusters! (1984), Action|Comedy|Sci-Fi, Rating: 3.5\nCowboys & Aliens II: Ghost Hunters (1990), Action|Adventure|Sci-Fi, Rating: 2.5\nEternal Sunshine Of The Spotless Mind (2004), Drama|Romance|Winner Is..., Rating: 4.0\nAmerican History X (1998), Documentary|History, Rating: 6.0\nShrek (2001), Adventure|Animation|Children|Comedy|Fantasy|Romance, Rating: 4.0\nStar Wars Episode I: Return of the Jedi (1983), Action|Adventure|Sci-Fi, Rating: 4.0\n