In [1]:
!pip install -q -U bitsandbytes evaluate transformers peft accelerate datasets scipy einops evaluate trl

In [6]:
import torch
from transformers import BitsAndBytesConfig, AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset, Dataset, DatasetDict
from peft import get_peft_model, LoraConfig, prepare_model_for_kbit_training
from functools import partial
import time

model_id = "microsoft/phi-2"
huggingface_dataset_name = "snli"
data = load_dataset(huggingface_dataset_name)

# Select samples for training, testing, and validation
train_samples = Dataset.from_dict(data['train'].select(range(0, 550152, 550))[:1000])
test_samples = Dataset.from_dict(data['test'].select(range(0, 10000, 100))[:100])
validation_samples = Dataset.from_dict(data['validation'].select(range(0, 10000, 100))[:100])
train_samples = train_samples.filter(lambda x: x['label'] != -1)
test_samples = test_samples.filter(lambda x: x['label'] != -1)
validation_samples = validation_samples.filter(lambda x: x['label'] != -1)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Load the model and tokenizer
model = AutoModelForSequenceClassification.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    num_labels=3,
    device_map='auto'
)
tokenizer = AutoTokenizer.from_pretrained(model_id, padding_side="left", use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

Filter:   0%|          | 0/1000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/100 [00:00<?, ? examples/s]

Filter:   0%|          | 0/100 [00:00<?, ? examples/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some weights of PhiForSequenceClassification were not initialized from the model checkpoint at microsoft/phi-2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [17]:
import torch
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
from peft import LoraConfig, get_peft_model
peft_config = LoraConfig(
    r=16,
    lora_alpha=64,
    lora_dropout=0.05,
    bias="none"
)

model = get_peft_model(model, peft_config)

def tokenize_samples(data):
    tokenized_inputs = tokenizer(
        data['premise'],
        data['hypothesis'],
        truncation=True,
        padding='max_length',
        max_length=128
    )

    tokenized_inputs['labels'] = data['label']
    return tokenized_inputs

tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "left"

tokenized_train_samples = tokenize_samples(train_samples)
tokenized_val_samples = tokenize_samples(validation_samples)
tokenized_test_samples = tokenize_samples(test_samples)

In [20]:
tokenized_train_samples['labels'] = torch.tensor(tokenized_train_samples['labels'], dtype=torch.long).to(device)
tokenized_val_samples['labels'] = torch.tensor(tokenized_val_samples['labels'], dtype=torch.long).to(device)
tokenized_test_samples['labels'] = torch.tensor(tokenized_test_samples['labels'], dtype=torch.long).to(device)
tokenized_train_samples['labels'] = tokenized_train_samples['labels'].to(torch.long)
tokenized_val_samples['labels'] = tokenized_val_samples['labels'].to(torch.long)
tokenized_test_samples['labels'] = tokenized_test_samples['labels'].to(torch.long)


train_dataset = torch.utils.data.TensorDataset(
    torch.tensor(tokenized_train_samples['input_ids'], dtype=torch.long).to(device),
    torch.tensor(tokenized_train_samples['attention_mask'], dtype=torch.long).to(device),
    torch.tensor(tokenized_train_samples['labels'], dtype=torch.long).to(device)
)

val_dataset = torch.utils.data.TensorDataset(
    torch.tensor(tokenized_val_samples['input_ids'], dtype=torch.long).to(device),
    torch.tensor(tokenized_val_samples['attention_mask'], dtype=torch.long).to(device),
    torch.tensor(tokenized_val_samples['labels'], dtype=torch.long).to(device)
)

from transformers import Trainer, TrainingArguments

training_args = TrainingArguments(
    output_dir="./backup_results",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=5,
    learning_rate=0.0001,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="epoch",
    remove_unused_columns=False
)

from datasets import Dataset

train_dataset = Dataset.from_dict(tokenized_train_samples)
val_dataset = Dataset.from_dict(tokenized_val_samples)
print(len(train_dataset))
print(len(val_dataset))

print(f"Pad token: {tokenizer.pad_token}")
print(f"Pad token ID: {tokenizer.pad_token_id}")
model.config.pad_token_id = tokenizer.pad_token_id
model.to(device)
model.config.num_labels = 3
print(f"Model num_labels: {model.config.num_labels}")

  tokenized_train_samples['labels'] = torch.tensor(tokenized_train_samples['labels'], dtype=torch.long).to(device)
  tokenized_val_samples['labels'] = torch.tensor(tokenized_val_samples['labels'], dtype=torch.long).to(device)
  tokenized_test_samples['labels'] = torch.tensor(tokenized_test_samples['labels'], dtype=torch.long).to(device)
  torch.tensor(tokenized_train_samples['labels'], dtype=torch.long).to(device)
  torch.tensor(tokenized_val_samples['labels'], dtype=torch.long).to(device)


1000
99
Pad token: <|endoftext|>
Pad token ID: 50256
Model num_labels: 3


In [21]:
import time

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

start_time = time.time()

trainer.train()

end_time = time.time()
fine_tuning_time = end_time - start_time
print(f"Time taken to fine-tune the model: {fine_tuning_time / 60:.2f} minutes")

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Epoch,Training Loss,Validation Loss
1,1.0006,No log
2,0.5791,No log
3,0.331,No log
4,0.1297,No log
5,0.0343,No log


Time taken to fine-tune the model: 64.92 minutes


In [23]:
!zip -r phi2.zip "/content/backup_results"

  adding: content/backup_results/ (stored 0%)
  adding: content/backup_results/checkpoint-2500/ (stored 0%)
  adding: content/backup_results/checkpoint-2500/optimizer.pt (deflated 9%)
  adding: content/backup_results/checkpoint-2500/README.md (deflated 66%)
  adding: content/backup_results/checkpoint-2500/training_args.bin (deflated 52%)
  adding: content/backup_results/checkpoint-2500/adapter_model.safetensors (deflated 7%)
  adding: content/backup_results/checkpoint-2500/scheduler.pt (deflated 56%)
  adding: content/backup_results/checkpoint-2500/adapter_config.json (deflated 54%)
  adding: content/backup_results/checkpoint-2500/rng_state.pth (deflated 25%)
  adding: content/backup_results/checkpoint-2500/trainer_state.json (deflated 72%)
  adding: content/backup_results/checkpoint-1000/ (stored 0%)
  adding: content/backup_results/checkpoint-1000/optimizer.pt (deflated 9%)
  adding: content/backup_results/checkpoint-1000/README.md (deflated 66%)
  adding: content/backup_results/chec

In [24]:
!curl bashupload.com -T "/content/phi2.zip"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  965M    0     0  100  965M      0  11.6M  0:01:22  0:01:22 --:--:--     0

Uploaded 1 file, 1 012 169 922 bytes

wget http://bashupload.com/eCanN/phi2.zip


100  965M    0   136  100  965M      1  11.6M  0:01:22  0:01:22 --:--:--    31
