In [1]:
# Import Required Libraries
import torch
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, AutoModel
from peft import LoraConfig
from trl import SFTTrainer
from datasets import load_dataset
import pandas as pd
import datasets

In [2]:
### this is for running in local ###
import os
try:
    os.environ['HTTP_PROXY']='http://185.46.212.90:80'
    os.environ['HTTPS_PROXY']='http://185.46.212.90:80'
    print ("proxy_exported")
except:
    None

proxy_exported


In [3]:
# os.environ["CUDA_VISIBLE_DEVICES"]="2"
device = "cuda" if torch.cuda.is_available() else "cpu"

In [4]:
device

'cuda'

In [5]:
# Install dependencies
!pip install -q -U trl transformers accelerate peft einops datasets bitsandbytes scipy

[0m

In [6]:
# Define Constants
MODEL_NAME = "microsoft/phi-2"
DATASET_NAME = "OpenAssistant/oasst1"
OUTPUT_DIR = "./results"

In [7]:
# Initialize BitsAndBytes Config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
)

In [8]:
# Load Model and Tokenizer
#model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, quantization_config=bnb_config, trust_remote_code=True)
model = AutoModel.from_pretrained('/home/saurabh/era_saurabh/late_submissions/s27/phi-2')
model.config.use_cache = False
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [9]:
# Print Model for Layer Identification
print(model)

PhiModel(
  (embed_tokens): Embedding(51200, 2560)
  (embed_dropout): Dropout(p=0.0, inplace=False)
  (layers): ModuleList(
    (0-31): 32 x PhiDecoderLayer(
      (self_attn): PhiAttention(
        (q_proj): Linear(in_features=2560, out_features=2560, bias=True)
        (k_proj): Linear(in_features=2560, out_features=2560, bias=True)
        (v_proj): Linear(in_features=2560, out_features=2560, bias=True)
        (dense): Linear(in_features=2560, out_features=2560, bias=True)
        (rotary_emb): PhiRotaryEmbedding()
      )
      (mlp): PhiMLP(
        (activation_fn): NewGELUActivation()
        (fc1): Linear(in_features=2560, out_features=10240, bias=True)
        (fc2): Linear(in_features=10240, out_features=2560, bias=True)
      )
      (input_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
      (resid_dropout): Dropout(p=0.1, inplace=False)
    )
  )
  (final_layernorm): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
)


In [10]:
# LORA Configuration
lora_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["Wqkv", "out_proj", "fc1", "fc2"]
)

In [11]:
# Training Arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    optim="paged_adamw_32bit",
    save_steps=100,
    logging_steps=10,
    learning_rate=2e-4,
    fp16=True,
    max_grad_norm=0.3,
    max_steps=700,
    warmup_ratio=0.05,
    lr_scheduler_type="constant"
)

In [13]:
# Load and Prepare Dataset
dataset = load_dataset(DATASET_NAME, split="train")
# assistant_responses = dataset.query('role == "assistant" and rank == 0.0')
# prompters = dataset.query('role == "prompter"').set_index("message_id")

In [14]:
# Combine Prompts and Responses
assistant_responses['prompt_response'] = assistant_responses.apply(
    lambda row: "### Human: " + prompters.loc[row.parent_id, 'text'] + "### Assistant: " + row['text'], axis=1
)

NameError: name 'assistant_responses' is not defined

In [None]:
# Create HuggingFace Dataset
hf_dataset = datasets.Dataset.from_pandas(assistant_responses)

In [15]:
# Initialize Trainer
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    peft_config=lora_config,
    max_seq_length=32,
    tokenizer=tokenizer,
    args=training_args,
    dataset_text_field="text"
    )

Map:   0%|          | 0/84437 [00:00<?, ? examples/s]

Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.


In [16]:
# Adjust Normalization Layers
for _, module in trainer.model.named_modules():
    if isinstance(module, torch.nn.LayerNorm):
        module.float()

In [17]:
os.environ["CUDA_VISIBLE_DEVICES"]="2"

In [18]:
# Train the Model
trainer.train()

TypeError: Caught TypeError in replica 0 on device 0.
Original Traceback (most recent call last):
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/parallel/parallel_apply.py", line 64, in _worker
    output = module(*input, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1533, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/peft/peft_model.py", line 1073, in forward
    return self.base_model(
  File "/usr/local/lib/python3.8/dist-packages/torch/nn/modules/module.py", line 1533, in _call_impl
    return forward_call(*args, **kwargs)
  File "/usr/local/lib/python3.8/dist-packages/peft/tuners/tuners_utils.py", line 103, in forward
    return self.model.forward(*args, **kwargs)
TypeError: forward() got an unexpected keyword argument 'labels'


In [None]:
# Test the Model
def generate_text(prompt):
    pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=200)
    result = pipe(f"<s>[INST] {prompt} [/INST]")
    return result[0]['generated_text']

In [None]:
# Example Usage
print(generate_text("What is large language model?"))
print(generate_text("What is QLora that stands for Quantization and Low-Rank Adapters"))