In [1]:
import transformers
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

In [7]:
model_id = "microsoft/phi-2"
pipeline = transformers.pipeline(
    task="text-generation",
    model=model_id,
    device_map="auto",
    tokenizer=model_id,
    model_kwargs={
        "torch_dtype": torch.float16,
        "quantization_config": {"load_in_4bit": True},
        "low_cpu_mem_usage": True,
	},
)

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

In [14]:
def get_dataset():
    
    splits = {'test': 'plain_text/test-00000-of-00001.parquet', 'validation': 'plain_text/validation-00000-of-00001.parquet', 'train': 'plain_text/train-00000-of-00001.parquet'}
    dataset = pd.read_parquet("hf://datasets/stanfordnlp/snli/" + splits["test"])
    return dataset

In [15]:
dataset = get_dataset()

In [17]:
# keep only every 100th example
dataset = dataset.iloc[::100]

In [24]:
def get_prompt(df_row):
    return f"""Instruction: You are given premise and hypothesis. You have to predict the relationship between them. You have to give a one word answer from [entailment, contradiction, or neutral] representing the relationship between the hypothesis and premise.\nPremise: {df_row['premise']}\nHypothesis: {df_row['hypothesis']}\nOutput:"""

In [21]:
# get inference 
print(get_prompt(dataset.iloc[0]))

Instruction: You are given premise and hypothesis. You have to predict the relationship between them. The relationship can be one of the following: entailment, contradiction, or neutral.
Premise: This church choir sings to the masses as they sing joyous songs from the book at a church.
Hypothesis: The church has cracks in the ceiling.
Output:


In [31]:
from tqdm.autonotebook import tqdm

# get inferences for all examples use tqdm to show progress bar and batch_size to control the number of examples processed at once
with torch.no_grad():
	batch_size = 8
	results = []
	for i in tqdm(range(0, len(dataset), batch_size)):
		batch = dataset.iloc[i:i+batch_size]
		results.extend(pipeline([get_prompt(row) for _, row in batch.iterrows()], max_new_tokens=5, temperature=0.1))

  0%|          | 0/13 [00:00<?, ?it/s]

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

In [30]:
results[:5]

[[{'generated_text': 'Instruction: You are given premise and hypothesis. You have to predict the relationship between them. You have to give a one word answer from [entailment, contradiction, or neutral] representing the relationship between the hypothesis and premise.\nPremise: This church choir sings to the masses as they sing joyous songs from the book at a church.\nHypothesis: The church has cracks in the ceiling.\nOutput: The relationship between the premise'}],
 [{'generated_text': 'Instruction: You are given premise and hypothesis. You have to predict the relationship between them. You have to give a one word answer from [entailment, contradiction, or neutral] representing the relationship between the hypothesis and premise.\nPremise: A woman within an orchestra is playing a violin.\nHypothesis: A woman is playing the violin.\nOutput: Entailment.\n'}],
 [{'generated_text': 'Instruction: You are given premise and hypothesis. You have to predict the relationship between them. You 

In [11]:
class Dataset:
    
	def __init__(self):
		self.dataset_id = "hf://datasets/stanfordnlp/snli"
		self.dataset_splits = {
			'test': 'plain_text/test-00000-of-00001.parquet', 
			'validation': 'plain_text/validation-00000-of-00001.parquet', 
			'train': 'plain_text/train-00000-of-00001.parquet'
		}

	def preprocess_dataset(self, sample, test=False):

		INSTRUCTION = "### Instruct: You are given premise and hypothesis. You have to predict the relationship between them. You have to give a one word answer from [entailment, contradiction, or neutral] representing the relationship between the hypothesis and premise."
		PREMISE = f"Premise: {sample['premise']}"
		HYPOTHESIS = f"Hypothesis: {sample['hypothesis']}"
		OUTPUT = f"### Output: {sample['label']}"
		if test:
			OUTPUT = "### Output: "

		return f"{INSTRUCTION}\n{PREMISE}\n{HYPOTHESIS}\n{OUTPUT}"

	def get_train_dataset(self):
		df = pd.read_parquet(f"{self.dataset_id}/{self.dataset_splits['train']}")
		df =  df.iloc[::1000]
		df['prompt'] = df.apply(self.preprocess_dataset, axis=1)
		return df['prompt']
	
	def get_validation_dataset(self):
		df = pd.read_parquet(f"{self.dataset_id}/{self.dataset_splits['validation']}")
		df = df.iloc[::100]
		df['prompt'] = df.apply(self.preprocess_dataset, axis=1)
		return df['prompt']
		
	def get_test_dataset(self):
		df = pd.read_parquet(f"{self.dataset_id}/{self.dataset_splits['test']}")
		df = df.iloc[::100]
		df['prompt'] = df.apply(self.preprocess_dataset, axis=1, test=True)
		return df['prompt']

In [2]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", quantization_config=bnb_config, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
from peft import prepare_model_for_kbit_training

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)

In [8]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [9]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        "Wqkv",
        "fc1",
        "fc2",
    ],
    bias="none",
    lora_dropout=0.05,
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)
print_trainable_parameters(model)

trainable params: 6553600 || all params: 1527946240 || trainable%: 0.42891561420380864


In [17]:
dataset_train = Dataset().get_train_dataset()
dataset_train = dataset_train.map(lambda samples: tokenizer(samples))

In [18]:
import transformers

tokenizer.pad_token = tokenizer.eos_token

trainer = transformers.Trainer(
    model=model,
    train_dataset=dataset_train,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=1,
        gradient_accumulation_steps=4,
        warmup_steps=2,
        max_steps=10,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=1,
        output_dir="outputs",
        optim="paged_adamw_8bit"
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


ValueError: You cannot perform fine-tuning on purely quantized models. Please attach trainable adapters on top of the quantized model to correctly perform fine-tuning. Please see: https://huggingface.co/docs/transformers/peft for more details