# Lightweight Fine Tuning

In [1]:
from Utils import check_versions, enable_cuda
from TokenizerWrapper import TokenizerWrapper
from DatasetWrapper import sms_spam, imdb
from TrainerWrapper import TrainerWrapper

import random
import numpy as np
import torch

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
# Parameters

# tokenizer
TOKENIZER_NAME = "facebook/opt-350m"
#TOKENIZER_NAME = "distilbert-base-uncased"
#TOKENIZER_NAME = "gpt2"

# model is the same as tokenizer
MODEL_NAME = TOKENIZER_NAME

# downsize the data
FRACTION = 1.0

In [6]:
def reset_random_seed():
    random.seed(42)
    np.random.seed(42)
    torch.manual_seed(42)

In [7]:
# Load and tokenize data
data = sms_spam().reduce_to_fraction(FRACTION)
tokenizer_wrapper = TokenizerWrapper(TOKENIZER_NAME)
trainer_wrapper = TrainerWrapper(MODEL_NAME)

tokenizer_wrapper.tokenize(data)

Downloading readme: 100%|██████████| 4.98k/4.98k [00:00<00:00, 24.2MB/s]
Downloading data: 100%|██████████| 359k/359k [00:00<00:00, 890kB/s]
Generating train split: 100%|██████████| 5574/5574 [00:00<00:00, 160253.42 examples/s]
Map:   0%|          | 0/4459 [00:00<?, ? examples/s]Asking to pad to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no padding.
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
Map: 100%|██████████| 4459/4459 [00:00<00:00, 59648.03 examples/s]
Map: 100%|██████████| 1115/1115 [00:00<00:00, 59581.98 examples/s]


In [8]:
# Evaluate initial model with HF
trainer_wrapper.init_trainer(data, tokenizer_wrapper.get_tokenizer(), lora=False)
trainer_wrapper.evaluate()

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


{'eval_loss': 0.5389377474784851, 'eval_model_preparation_time': 0.0026, 'eval_accuracy': 0.8717488789237668, 'eval_runtime': 27.951, 'eval_samples_per_second': 39.891, 'eval_steps_per_second': 39.891}


In [9]:
# Evaluate with own metrics
reset_random_seed()
trainer_wrapper.evaluate_with_own_loop(data, trainer_wrapper.get_model(data))

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  labels = torch.tensor(batch['labels']).unsqueeze(0).to(device)


Validation Loss: 1.6337234101188265
Validation Accuracy: 0.2116591928251121


In [10]:
# Train Lora model
reset_random_seed()
trainer_wrapper.train_with_own_loop(data, lora=True)

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  labels = torch.tensor(batch['labels']).unsqueeze(0).to(device)


Epoch: 0 Batch: 0 of 4459 | Loss: 2.4217312335968018
Epoch: 0 Batch: 1 of 4459 | Loss: 1.2390352487564087
Epoch: 0 Batch: 2 of 4459 | Loss: 2.168335437774658
Epoch: 0 Batch: 3 of 4459 | Loss: 1.0628057718276978
Epoch: 0 Batch: 4 of 4459 | Loss: 3.034998893737793
Epoch: 0 Batch: 5 of 4459 | Loss: 2.367587089538574
Epoch: 0 Batch: 6 of 4459 | Loss: 0.8418644666671753
Epoch: 0 Batch: 7 of 4459 | Loss: 1.1284434795379639
Epoch: 0 Batch: 8 of 4459 | Loss: 1.7055150270462036
Epoch: 0 Batch: 9 of 4459 | Loss: 0.3543455898761749
Epoch: 0 Batch: 10 of 4459 | Loss: 1.3235478401184082
Epoch: 0 Batch: 11 of 4459 | Loss: 2.450345039367676
Epoch: 0 Batch: 12 of 4459 | Loss: 1.0087203979492188
Epoch: 0 Batch: 13 of 4459 | Loss: 0.24961400032043457
Epoch: 0 Batch: 14 of 4459 | Loss: 0.9427287578582764
Epoch: 0 Batch: 15 of 4459 | Loss: 1.5415728092193604
Epoch: 0 Batch: 16 of 4459 | Loss: 1.9028406143188477
Epoch: 0 Batch: 17 of 4459 | Loss: 3.487165927886963
Epoch: 0 Batch: 18 of 4459 | Loss: 0.11526

  labels = torch.tensor(batch['labels']).unsqueeze(0).to(device)


Validation Loss: 0.029268045618083702
Validation Accuracy: 0.9910313901345291
Epoch: 1 Batch: 0 of 4459 | Loss: 0.000918681500479579
Epoch: 1 Batch: 1 of 4459 | Loss: 0.002676716074347496
Epoch: 1 Batch: 2 of 4459 | Loss: 0.006111504975706339
Epoch: 1 Batch: 3 of 4459 | Loss: 0.0018816161900758743
Epoch: 1 Batch: 4 of 4459 | Loss: 0.0015816095983609557
Epoch: 1 Batch: 5 of 4459 | Loss: 0.006154987495392561
Epoch: 1 Batch: 6 of 4459 | Loss: 0.0032170468475669622
Epoch: 1 Batch: 7 of 4459 | Loss: 0.0004727914638351649
Epoch: 1 Batch: 8 of 4459 | Loss: 0.0008311392739415169
Epoch: 1 Batch: 9 of 4459 | Loss: 0.004749208223074675
Epoch: 1 Batch: 10 of 4459 | Loss: 0.002868230454623699
Epoch: 1 Batch: 11 of 4459 | Loss: 0.0005895545473322272
Epoch: 1 Batch: 12 of 4459 | Loss: 0.16772247850894928
Epoch: 1 Batch: 13 of 4459 | Loss: 0.0017701209289953113
Epoch: 1 Batch: 14 of 4459 | Loss: 0.004486613906919956
Epoch: 1 Batch: 15 of 4459 | Loss: 0.11195673793554306
Epoch: 1 Batch: 16 of 4459 | Lo

In [11]:
# Evaluate saved Lora model
reset_random_seed()
model_path = "mwolfram/facebook/opt-350m-lora"
model = trainer_wrapper.load_peft_model(model_path)
trainer_wrapper.evaluate_with_own_loop(data, model)

Some weights of OPTForSequenceClassification were not initialized from the model checkpoint at facebook/opt-350m and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Validation Loss: 0.024231021698545237
Validation Accuracy: 0.9937219730941704


In [None]:
# Generate text from saved model
#trainer_wrapper.generate_from_saved_model(tokenizer_wrapper.get_tokenizer(), model_path)