## Making Transformers Efficient in Production

### Intent Detection as a Case Study

In [1]:
from transformers import pipeline

bert_ckpt = "transformersbook/bert-base-uncased-finetuned-clinc"
pipe = pipeline("text-classification", model=bert_ckpt)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [2]:
query = """Hey, I'd like to rent a vehicle from Nov 1st to Nov 15th in
Paris and I need a 15 passenger van"""

pipe(query)

[{'label': 'car_rental', 'score': 0.5490034818649292}]

### Creating a Performance Benchmark

In [3]:
class PerformanceBenchmark:
    def __init__(self, pipeline, dataset, optim_type="BERT baseline"):
        self.pipeline = pipeline
        self.dataset = dataset
        self.optim_type = optim_type
    
    def compute_accuracy(self):
        # We'll define this later
        pass
    
    def compute_size(self):
        # We'll define this later
        pass
    
    def time_pipeline(self):
        # We'll define this later
        pass
    
    def run_benchmark(self):
        metrics = {}
        metrics[self.optim_type] = self.compute_size()
        metrics[self.optim_type].update(self.time_pipeline())
        metrics[self.optim_type].update(self.compute_accuracy())
        return metrics

In [4]:
from datasets import load_dataset

clinc = load_dataset("clinc_oos", "plus")

In [5]:
sample = clinc["test"][42]
sample

{'text': 'transfer $100 from my checking to saving account', 'intent': 133}

In [6]:
intents = clinc["test"].features["intent"]
intents.int2str(sample["intent"])

'transfer'

In [7]:
from datasets import load_metric

accuracy_score = load_metric("accuracy")

  accuracy_score = load_metric("accuracy")


In [8]:
def compute_accuracy(self):
    """This overrides the PerformanceBenchmark.compute_accuracy() method"""
    preds, labels = [], []
    for example in self.dataset:
        pred = self.pipeline(example["text"])[0]["label"]
        label = example["intent"]
        preds.append(intents.str2int(pred))
        labels.append(label)
    accuracy = accuracy_score.compute(predictions=preds, references=labels)
    print(f"Accuracy on test set - {accuracy['accuracy']:.3f}")
    return accuracy

In [9]:
PerformanceBenchmark.compute_accuracy = compute_accuracy

In [10]:
list(pipe.model.state_dict().items())[42]

('bert.encoder.layer.2.attention.self.value.bias',
 tensor([-2.7834e-02,  4.9434e-02,  8.3551e-02,  4.1092e-02,  6.0157e-01,
          1.1774e-01, -5.2112e-02, -6.5143e-02, -2.9358e-02, -4.2250e-02,
          7.9177e-02,  8.0409e-02,  2.9921e-03,  1.7816e-01, -5.0480e-02,
         -1.5634e-01, -2.1707e-02,  1.4381e-02,  2.5132e-02, -2.4110e-02,
         -1.9183e-01, -7.8657e-02,  5.0709e-02,  3.3632e-02, -3.1946e-02,
          1.1616e-01,  9.2720e-02, -1.1787e-01,  2.3233e-01, -1.2678e-02,
         -1.3138e-01, -4.0024e-02,  7.4823e-02, -5.4148e-02, -1.5184e-01,
         -7.4407e-02,  1.1559e-01,  8.2729e-02, -1.3787e-01,  8.3528e-02,
          1.2154e-01,  1.6880e-02, -5.6629e-02, -3.9295e-02,  5.3725e-02,
          6.8602e-02, -1.1294e-01,  4.4001e-02, -2.5884e-01,  1.6767e-01,
          1.8316e-01,  5.6272e-02, -3.6874e-02, -2.7938e-02, -9.3204e-02,
         -7.5239e-03,  4.1141e-02, -1.1542e-02, -9.9749e-02, -3.0910e-02,
          4.1398e-02, -4.4389e-02, -2.6279e-02,  7.2100e-02, 

In [11]:
import torch 

torch.save(pipe.model.state_dict(), "model.pt")

In [12]:
import torch
from pathlib import Path

def compute_size(self):
    """This overrides the PerformanceBenchmark.compute_size() method"""
    state_dict = self.pipeline.model.state_dict()
    tmp_path = Path("model.pt")
    torch.save(state_dict, tmp_path)
    # Calculate size in megabytes
    size_mb = Path(tmp_path).stat().st_size / (1024 * 1024)
    # Delete temporary file
    tmp_path.unlink()
    print(f"Model size (MB) - {size_mb:.2f}")
    return {"size_mb": size_mb}

PerformanceBenchmark.compute_size = compute_size

In [13]:
from time import perf_counter
for _ in range(3):
    start_time = perf_counter()
    _ = pipe(query)
    latency = perf_counter() - start_time
    print(f"Latency (ms) - {1000 * latency:.3f}")

Latency (ms) - 50.613
Latency (ms) - 51.883
Latency (ms) - 47.774


In [14]:
import numpy as np

def time_pipeline(self, query="What is the pin number for my account?"):
    """This overrides the PerformanceBenchmark.time_pipeline() method"""
    latencies = []
    # Warmup
    for _ in range(10):
        _ = self.pipeline(query)
    # Timed run
    for _ in range(100):
        start_time = perf_counter()
        _ = self.pipeline(query)
        latency = perf_counter() - start_time
        latencies.append(latency)
    # Compute run statistics
    time_avg_ms = 1000 * np.mean(latencies)
    time_std_ms = 1000 * np.std(latencies)
    print(f"Average latency (ms) - {time_avg_ms:.2f} +\- {time_std_ms:.2f}")
    return {"time_avg_ms": time_avg_ms, "time_std_ms": time_std_ms}

  print(f"Average latency (ms) - {time_avg_ms:.2f} +\- {time_std_ms:.2f}")


In [15]:
PerformanceBenchmark.time_pipeline = time_pipeline

In [16]:
pb = PerformanceBenchmark(pipe, clinc["test"])
perf_metrics = pb.run_benchmark()

Model size (MB) - 418.15
Average latency (ms) - 37.57 +\- 3.24
Accuracy on test set - 0.867



### Making Models Smaller via Knowledge Distillation

#### Creating a Knowledge Distillation Trainer

In [17]:
from transformers import TrainingArguments

class DistillationTrainingArguments(TrainingArguments):
    def __init__(self, *args, alpha=0.5, temperature=2.0, **kwargs):
        super().__init__(*args, **kwargs)
        self.alpha = alpha
        self.temperature = temperature

In [18]:
import torch.nn as nn
import torch.nn.functional as F
from transformers import Trainer

class DistillationTrainer(Trainer):
    def __init__(self, *args, teacher_model=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.teacher_model = teacher_model
        
    def compute_loss(self, model, inputs, return_outputs=False):
        outputs_stu = model(**inputs)
        # Extract cross-entropy loss and logits from student
        loss_ce = outputs_stu.loss
        logits_stu = outputs_stu.logits
        # Extract logits from teacher
        with torch.no_grad():
            outputs_tea = self.teacher_model(**inputs)
            logits_tea = outputs_tea.logits
        # Soften probabilities and compute distillation loss
        loss_fct = nn.KLDivLoss(reduction="batchmean")
        loss_kd = self.args.temperature ** 2 * loss_fct(
            F.log_softmax(logits_stu / self.args.temperature, dim=-1),
            F.softmax(logits_tea / self.args.temperature, dim=-1))
        # Return weighted student loss
        loss = self.args.alpha * loss_ce + (1. - self.args.alpha) * loss_kd
        return (loss, outputs_stu) if return_outputs else loss

#### Choosing a Good Student Initialization

In [19]:
from transformers import AutoTokenizer

student_ckpt = "distilbert-base-uncased"
student_tokenizer = AutoTokenizer.from_pretrained(student_ckpt)

def tokenize_text(batch):
    return student_tokenizer(batch["text"], truncation=True)

clinc_enc = clinc.map(tokenize_text, batched=True, remove_columns=["text"])
clinc_enc = clinc_enc.rename_column("intent", "labels")

Map:   0%|          | 0/5500 [00:00<?, ? examples/s]

In [20]:
def compute_metrics(pred):
    predictions, labels = pred
    predictions = np.argmax(predictions, axis=1)
    return accuracy_score.compute(predictions=predictions, references=labels)

In [21]:
batch_size = 48

finetuned_ckpt = "distilbert-base-uncased-finetuned-clinc"
student_training_args = DistillationTrainingArguments(
    output_dir=finetuned_ckpt, evaluation_strategy = "epoch",
    num_train_epochs=5, learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size, alpha=1, weight_decay=0.01,
    push_to_hub=True)



In [22]:
id2label = pipe.model.config.id2label
label2id = pipe.model.config.label2id

In [23]:
from transformers import AutoConfig

num_labels = intents.num_classes
student_config = (AutoConfig
                 .from_pretrained(student_ckpt, num_labels=num_labels,
                 id2label=id2label, label2id=label2id))

In [24]:
import torch
from transformers import AutoModelForSequenceClassification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def student_init():
    return (AutoModelForSequenceClassification
            .from_pretrained(student_ckpt, config=student_config).to(device))

In [25]:
teacher_ckpt = "transformersbook/bert-base-uncased-finetuned-clinc"
teacher_model = (AutoModelForSequenceClassification
                 .from_pretrained(teacher_ckpt, num_labels=num_labels)
                 .to(device))

distilbert_trainer = DistillationTrainer(model_init=student_init,
    teacher_model=teacher_model, args=student_training_args,
    train_dataset=clinc_enc['train'], eval_dataset=clinc_enc['validation'],
    compute_metrics=compute_metrics, tokenizer=student_tokenizer)

distilbert_trainer.train()

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  attn_output = torch.nn.functional.scaled_dot_product_attention(


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,3.29281,0.726774
2,3.798800,1.886249,0.851613
3,3.798800,1.174241,0.887742
4,1.713400,0.879092,0.907097
5,0.919400,0.79214,0.914516


TrainOutput(global_step=1590, training_loss=2.0666040432528128, metrics={'train_runtime': 238.5104, 'train_samples_per_second': 319.693, 'train_steps_per_second': 6.666, 'total_flos': 414689637990180.0, 'train_loss': 2.0666040432528128, 'epoch': 5.0})

In [26]:
distilbert_trainer.push_to_hub("Training completed!")

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/SkillRipper/distilbert-base-uncased-finetuned-clinc/commit/5befb28d26af986884856ef80fdb9ebfea181a6c', commit_message='Training completed!', commit_description='', oid='5befb28d26af986884856ef80fdb9ebfea181a6c', pr_url=None, pr_revision=None, pr_num=None)

In [27]:
# finetuned_ckpt = "transformersbook/distilbert-base-uncased-finetuned-clinc"
finetuned_ckpt = "SkillRipper/distilbert-base-uncased-finetuned-clinc"

pipe = pipeline("text-classification", model=finetuned_ckpt)

config.json:   0%|          | 0.00/8.54k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/1.25k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [28]:
optim_type = "DistilBERT"
pb = PerformanceBenchmark(pipe, clinc["test"], optim_type=optim_type)
perf_metrics.update(pb.run_benchmark())

Model size (MB) - 255.88
Average latency (ms) - 14.57 +\- 0.57
Accuracy on test set - 0.854


In [29]:
import pandas as pd

def plot_metrics(perf_metrics, current_optim_type):
    df = pd.DataFrame.from_dict(perf_metrics, orient='index')
    
    for idx in df.index:
        df_opt = df.loc[idx]
        # Add a dashed circle around the current optimization type
        if idx == current_optim_type:
            plt.scatter(df_opt["time_avg_ms"], df_opt["accuracy"] * 100,
                        alpha=0.5, s=df_opt["size_mb"], label=idx,
                        marker='$\u25CC$')
        else:
            plt.scatter(df_opt["time_avg_ms"], df_opt["accuracy"] * 100,
                        s=df_opt["size_mb"], label=idx, alpha=0.5)
    
    legend = plt.legend(bbox_to_anchor=(1,1))
    for handle in legend.legendHandles:
        handle.set_sizes([20])
    
    plt.ylim(80,90)
    # Use the slowest model to define the x-axis range
    xlim = int(perf_metrics["BERT baseline"]["time_avg_ms"] + 3)
    plt.xlim(1, xlim)
    plt.ylabel("Accuracy (%)")
    plt.xlabel("Average latency (ms)")
    plt.show()

### Finding Good Hyperparameters with Optuna

In [31]:
def objective(trial):
    x = trial.suggest_float("x", -2, 2)
    y = trial.suggest_float("y", -2, 2)
    return (1 - x) ** 2 + 100 * (y - x ** 2) ** 2

In [36]:
import optuna

study = optuna.create_study()
study.optimize(objective, n_trials=1000)

[I 2024-09-19 15:26:46,944] A new study created in memory with name: no-name-36b7188a-ca9d-419d-8a77-1d675a2578e1
[I 2024-09-19 15:26:46,948] Trial 0 finished with value: 407.59484307150524 and parameters: {'x': 1.93971281656703, 'y': 1.7457761566547214}. Best is trial 0 with value: 407.59484307150524.
[I 2024-09-19 15:26:46,949] Trial 1 finished with value: 685.9564209179578 and parameters: {'x': -1.5499477880735268, 'y': -0.20429605795784234}. Best is trial 0 with value: 407.59484307150524.
[I 2024-09-19 15:26:46,950] Trial 2 finished with value: 762.1737838095153 and parameters: {'x': -1.0088363378990381, 'y': -1.7356804852162147}. Best is trial 0 with value: 407.59484307150524.
[I 2024-09-19 15:26:46,951] Trial 3 finished with value: 2.8260157475521894 and parameters: {'x': -0.5843346063667703, 'y': 0.28524198556311786}. Best is trial 3 with value: 2.8260157475521894.
[I 2024-09-19 15:26:46,952] Trial 4 finished with value: 1690.499113313176 and parameters: {'x': 1.467530949111099,

[I 2024-09-19 15:26:47,237] Trial 42 finished with value: 14.319564273798427 and parameters: {'x': -0.06675624258800544, 'y': -0.358608273145629}. Best is trial 33 with value: 1.9438996505856836.
[I 2024-09-19 15:26:47,246] Trial 43 finished with value: 789.714340764756 and parameters: {'x': 1.6647518498138032, 'y': -0.03800058398552414}. Best is trial 33 with value: 1.9438996505856836.
[I 2024-09-19 15:26:47,254] Trial 44 finished with value: 53.851093188888754 and parameters: {'x': 0.17469927613356684, 'y': 0.7596972633920499}. Best is trial 33 with value: 1.9438996505856836.
[I 2024-09-19 15:26:47,262] Trial 45 finished with value: 2.3803794816839083 and parameters: {'x': 0.5835053988331442, 'y': 0.1919217691047851}. Best is trial 33 with value: 1.9438996505856836.
[I 2024-09-19 15:26:47,271] Trial 46 finished with value: 260.24886349855416 and parameters: {'x': 1.020258042879982, 'y': -0.5722953120247168}. Best is trial 33 with value: 1.9438996505856836.
[I 2024-09-19 15:26:47,279]

[I 2024-09-19 15:26:47,620] Trial 84 finished with value: 132.53293384616595 and parameters: {'x': 0.5607598267227767, 'y': 1.4648428293922116}. Best is trial 77 with value: 0.47494546485194655.
[I 2024-09-19 15:26:47,629] Trial 85 finished with value: 74.20156204341941 and parameters: {'x': 0.899231428964042, 'y': 1.6699615059336579}. Best is trial 77 with value: 0.47494546485194655.
[I 2024-09-19 15:26:47,638] Trial 86 finished with value: 1.8640274933522898 and parameters: {'x': 1.0854094070986737, 'y': 1.3143755612817318}. Best is trial 77 with value: 0.47494546485194655.
[I 2024-09-19 15:26:47,647] Trial 87 finished with value: 197.69579979317413 and parameters: {'x': 1.6266139648125233, 'y': 1.241226576893423}. Best is trial 77 with value: 0.47494546485194655.
[I 2024-09-19 15:26:47,656] Trial 88 finished with value: 98.8025315710884 and parameters: {'x': 1.03342192819534, 'y': 0.07397187496612723}. Best is trial 77 with value: 0.47494546485194655.
[I 2024-09-19 15:26:47,665] Tri

[I 2024-09-19 15:26:48,008] Trial 126 finished with value: 0.12460369501121005 and parameters: {'x': 0.6495794753782169, 'y': 0.4177000833371404}. Best is trial 126 with value: 0.12460369501121005.
[I 2024-09-19 15:26:48,016] Trial 127 finished with value: 1.5743103395290463 and parameters: {'x': 0.7018264531919487, 'y': 0.6144374755310591}. Best is trial 126 with value: 0.12460369501121005.
[I 2024-09-19 15:26:48,025] Trial 128 finished with value: 4.2322631566831355 and parameters: {'x': 0.6627508852349364, 'y': 0.6421802592620031}. Best is trial 126 with value: 0.12460369501121005.
[I 2024-09-19 15:26:48,033] Trial 129 finished with value: 11.375958787703508 and parameters: {'x': 0.4336143985439375, 'y': 0.5205145463750781}. Best is trial 126 with value: 0.12460369501121005.
[I 2024-09-19 15:26:48,043] Trial 130 finished with value: 0.2950703670213121 and parameters: {'x': 0.6377823778502187, 'y': 0.4472470681632175}. Best is trial 126 with value: 0.12460369501121005.
[I 2024-09-19 

[I 2024-09-19 15:26:48,357] Trial 168 finished with value: 13.846601575857092 and parameters: {'x': 0.7091796992255832, 'y': 0.8739078815122124}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:48,366] Trial 169 finished with value: 9.857943358518742 and parameters: {'x': 1.0045268855911742, 'y': 0.6951009709694737}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:48,374] Trial 170 finished with value: 1.8570185519330678 and parameters: {'x': 0.8251191819792943, 'y': 0.8159673359295898}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:48,383] Trial 171 finished with value: 0.36276855415906 and parameters: {'x': 0.9228896241026721, 'y': 0.9114598834185812}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:48,392] Trial 172 finished with value: 6.611468010365749 and parameters: {'x': 0.9375100216952176, 'y': 1.1359768451487717}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-0

[I 2024-09-19 15:26:48,741] Trial 210 finished with value: 0.19601769280998393 and parameters: {'x': 0.6839566991831225, 'y': 0.43679123059065217}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:48,750] Trial 211 finished with value: 0.715752024198779 and parameters: {'x': 0.7327892361370097, 'y': 0.45670862310911636}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:48,759] Trial 212 finished with value: 0.18850183063184853 and parameters: {'x': 0.6092536062009778, 'y': 0.3522640256838936}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:48,768] Trial 213 finished with value: 0.2013934796361042 and parameters: {'x': 0.6285081953665359, 'y': 0.4201993899524008}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:48,776] Trial 214 finished with value: 0.20667795818166057 and parameters: {'x': 0.6264818609196277, 'y': 0.4183951848851018}. Best is trial 149 with value: 0.0026389185181298733.
[

[I 2024-09-19 15:26:49,128] Trial 252 finished with value: 6.84275961967242 and parameters: {'x': 0.7672369653812124, 'y': 0.32810350636884344}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:49,137] Trial 253 finished with value: 30.2159336060276 and parameters: {'x': 0.8872905472836998, 'y': 0.2377098599036192}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:49,148] Trial 254 finished with value: 35.259656104446265 and parameters: {'x': 1.0069497267741983, 'y': 0.42014974109074316}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:49,157] Trial 255 finished with value: 6.2466867671510515 and parameters: {'x': 0.5262259927825599, 'y': 0.5223160154625366}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:49,168] Trial 256 finished with value: 1.9533524906909236 and parameters: {'x': 0.6959591911773035, 'y': 0.6207744323694309}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024

[I 2024-09-19 15:26:49,549] Trial 294 finished with value: 23.511412445036267 and parameters: {'x': 1.0853320457539894, 'y': 0.6931350588249109}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:49,561] Trial 295 finished with value: 1.0875126875609875 and parameters: {'x': 0.8699740788020325, 'y': 0.8603249875418086}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:49,574] Trial 296 finished with value: 8.862101852117386 and parameters: {'x': 0.8210457193504743, 'y': 0.9712705317117745}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:49,586] Trial 297 finished with value: 2.612008303148239 and parameters: {'x': 0.9226586316641162, 'y': 0.6898670263594988}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:49,597] Trial 298 finished with value: 0.04856326900440547 and parameters: {'x': 0.7796414748415081, 'y': 0.6076086786010383}. Best is trial 149 with value: 0.0026389185181298733.
[I 202

[I 2024-09-19 15:26:50,023] Trial 336 finished with value: 1.9057999650356097 and parameters: {'x': 0.684161088989445, 'y': 0.6024655971629103}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:50,035] Trial 337 finished with value: 4.156541001338989 and parameters: {'x': 0.8403202506201373, 'y': 0.909387806971711}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:50,046] Trial 338 finished with value: 3.847607341838333 and parameters: {'x': 0.9917091129351926, 'y': 0.7873355282060156}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:50,059] Trial 339 finished with value: 0.23069258014847177 and parameters: {'x': 0.7854143456355815, 'y': 0.6598461000868597}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:50,070] Trial 340 finished with value: 10.895391247832249 and parameters: {'x': 0.7000069285160836, 'y': 0.8187253126951666}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-

[I 2024-09-19 15:26:50,461] Trial 378 finished with value: 0.2970574282377114 and parameters: {'x': 0.7710393083138831, 'y': 0.5450410822243562}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:50,473] Trial 379 finished with value: 0.5343434576413234 and parameters: {'x': 0.8708838902403195, 'y': 0.686489289695488}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:50,485] Trial 380 finished with value: 0.3608903366695263 and parameters: {'x': 0.7330419552766506, 'y': 0.5911672099738888}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:50,496] Trial 381 finished with value: 0.16198120400107519 and parameters: {'x': 0.9539487517331832, 'y': 0.8700356639197526}. Best is trial 149 with value: 0.0026389185181298733.
[I 2024-09-19 15:26:50,508] Trial 382 finished with value: 0.4807146059573271 and parameters: {'x': 0.9962297507244767, 'y': 0.9231411559716487}. Best is trial 149 with value: 0.0026389185181298733.
[I 20

[I 2024-09-19 15:26:50,931] Trial 420 finished with value: 368.7913011294028 and parameters: {'x': 0.8702598422335212, 'y': -1.162997953768055}. Best is trial 392 with value: 0.002096877095971208.
[I 2024-09-19 15:26:50,942] Trial 421 finished with value: 4.198254513626927 and parameters: {'x': 0.7367796649704663, 'y': 0.7460429353605103}. Best is trial 392 with value: 0.002096877095971208.
[I 2024-09-19 15:26:50,953] Trial 422 finished with value: 0.09330276456096523 and parameters: {'x': 0.7940269011170116, 'y': 0.6530348379578377}. Best is trial 392 with value: 0.002096877095971208.
[I 2024-09-19 15:26:50,965] Trial 423 finished with value: 8.277822938155829 and parameters: {'x': 0.700097959148926, 'y': 0.7762819008107689}. Best is trial 392 with value: 0.002096877095971208.
[I 2024-09-19 15:26:50,976] Trial 424 finished with value: 3.840608227358014 and parameters: {'x': 0.8699376698488077, 'y': 0.5612489193477702}. Best is trial 392 with value: 0.002096877095971208.
[I 2024-09-19 

[I 2024-09-19 15:26:51,426] Trial 462 finished with value: 1.3394673411261477 and parameters: {'x': 0.8924603991484105, 'y': 0.6812509088712}. Best is trial 392 with value: 0.002096877095971208.
[I 2024-09-19 15:26:51,439] Trial 463 finished with value: 1.5263396938882436 and parameters: {'x': 0.7501316027794184, 'y': 0.44170545985630644}. Best is trial 392 with value: 0.002096877095971208.
[I 2024-09-19 15:26:51,451] Trial 464 finished with value: 6.113561786302894 and parameters: {'x': 0.9956256669612887, 'y': 0.7440146770405435}. Best is trial 392 with value: 0.002096877095971208.
[I 2024-09-19 15:26:51,463] Trial 465 finished with value: 1.7467151794950553 and parameters: {'x': 0.6497485369450849, 'y': 0.549610953620862}. Best is trial 392 with value: 0.002096877095971208.
[I 2024-09-19 15:26:51,476] Trial 466 finished with value: 34.133785585923526 and parameters: {'x': 1.1081326871598862, 'y': 0.6438168630161607}. Best is trial 392 with value: 0.002096877095971208.
[I 2024-09-19 

[I 2024-09-19 15:26:51,956] Trial 504 finished with value: 0.006373034447019985 and parameters: {'x': 0.9242053428316855, 'y': 0.8516491151286285}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:51,967] Trial 505 finished with value: 7.229980985364172 and parameters: {'x': 1.1520067640382985, 'y': 1.0586633509169958}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:51,980] Trial 506 finished with value: 3.3495756082715626 and parameters: {'x': 1.0608380133180348, 'y': 0.9424599773668039}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:51,992] Trial 507 finished with value: 0.08080267057250595 and parameters: {'x': 0.9428687745638554, 'y': 0.8611557555154404}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:52,003] Trial 508 finished with value: 0.031144519407054615 and parameters: {'x': 1.0075684804333471, 'y': 1.0328258160082193}. Best is trial 493 with value: 0.000442502037418168.
[I 202

[I 2024-09-19 15:26:52,452] Trial 546 finished with value: 0.0823245798307235 and parameters: {'x': 0.9125101040242086, 'y': 0.8600005198390436}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:52,465] Trial 547 finished with value: 3.0544751076410503 and parameters: {'x': 0.9677529081327824, 'y': 1.1112865059139128}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:52,478] Trial 548 finished with value: 0.16733502570706588 and parameters: {'x': 0.8642325867751904, 'y': 0.7854858164349232}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:52,490] Trial 549 finished with value: 3.04986318941873 and parameters: {'x': 1.0404479438673544, 'y': 0.9079401957645508}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:52,502] Trial 550 finished with value: 5.994658939895713 and parameters: {'x': 0.8811212397478185, 'y': 1.0209257949336088}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-1

[I 2024-09-19 15:26:52,978] Trial 588 finished with value: 6.1634100626081425 and parameters: {'x': 0.8532042058840912, 'y': 0.9757852010432738}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:52,990] Trial 589 finished with value: 2.0671585997711652 and parameters: {'x': 0.9525680224222476, 'y': 1.051083742937659}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:53,003] Trial 590 finished with value: 13.698410649270262 and parameters: {'x': 1.1413697793008506, 'y': 0.9328814219236063}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:53,016] Trial 591 finished with value: 0.6257000248359935 and parameters: {'x': 1.0238374397248193, 'y': 1.1273083798934025}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:53,027] Trial 592 finished with value: 53.620709827556915 and parameters: {'x': 0.8946222729557624, 'y': 1.5325348166635853}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-

[I 2024-09-19 15:26:53,510] Trial 630 finished with value: 3.150709027315092 and parameters: {'x': 0.8136661936673342, 'y': 0.8385743090833226}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:53,522] Trial 631 finished with value: 124.3539383380199 and parameters: {'x': 1.4776803320640717, 'y': 1.0694217559596508}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:53,534] Trial 632 finished with value: 257.30632977133115 and parameters: {'x': 1.0778915293129039, 'y': -0.4422080254200481}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:53,547] Trial 633 finished with value: 0.30981964396085604 and parameters: {'x': 0.9330696612752472, 'y': 0.9258765690799997}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:53,559] Trial 634 finished with value: 0.84442324650054 and parameters: {'x': 0.9574720347536279, 'y': 1.0085467395532821}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-1

[I 2024-09-19 15:26:54,053] Trial 672 finished with value: 0.03898293999715833 and parameters: {'x': 1.0156126843775246, 'y': 1.0117868524207128}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:54,068] Trial 673 finished with value: 7.400004183360896 and parameters: {'x': 1.1459602857949327, 'y': 1.0415873544397356}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:54,083] Trial 674 finished with value: 1.4920304271526337 and parameters: {'x': 1.0586131563040853, 'y': 1.2426698031705543}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:54,098] Trial 675 finished with value: 71.95158388313845 and parameters: {'x': -0.411162699271856, 'y': -0.6673673991834588}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:54,111] Trial 676 finished with value: 0.8151483032698853 and parameters: {'x': 1.044176523657617, 'y': 1.000127190880913}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-1

[I 2024-09-19 15:26:54,646] Trial 714 finished with value: 23.143743476256816 and parameters: {'x': 1.301142940051117, 'y': 1.212836965478999}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:54,661] Trial 715 finished with value: 5.081911699071389 and parameters: {'x': 1.2169584429758347, 'y': 1.2566033441518256}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:54,675] Trial 716 finished with value: 1.4990097087909482 and parameters: {'x': 1.1414423103962514, 'y': 1.4245048473530886}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:54,689] Trial 717 finished with value: 1.8803506607538616 and parameters: {'x': 1.149824438220306, 'y': 1.1857913122415256}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:54,705] Trial 718 finished with value: 0.9267321882186934 and parameters: {'x': 1.0958681161741795, 'y': 1.296715314217907}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 1

[I 2024-09-19 15:26:55,211] Trial 756 finished with value: 7.634846435722613 and parameters: {'x': 0.9088627515767894, 'y': 1.1021934181595734}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:55,224] Trial 757 finished with value: 1.1687330403064728 and parameters: {'x': 1.0893402868389284, 'y': 1.2944004317350506}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:55,239] Trial 758 finished with value: 8.280397206939657 and parameters: {'x': 1.20737283796029, 'y': 1.170740568701114}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:55,252] Trial 759 finished with value: 0.2173293459902658 and parameters: {'x': 0.9308846335516541, 'y': 0.9126496078101818}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:55,265] Trial 760 finished with value: 0.31085813326618866 and parameters: {'x': 0.9892492205325639, 'y': 1.0343583073864615}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 

[I 2024-09-19 15:26:55,766] Trial 798 finished with value: 6.648043989271654 and parameters: {'x': 1.2021506665638544, 'y': 1.188121888530306}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:55,779] Trial 799 finished with value: 0.07590981134300731 and parameters: {'x': 1.1004578079960043, 'y': 1.1853523603511553}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:55,792] Trial 800 finished with value: 4.675062509155056 and parameters: {'x': 1.04137446534016, 'y': 1.3006401161024057}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:55,805] Trial 801 finished with value: 205.70175355260506 and parameters: {'x': -0.08666018134831105, 'y': 1.4376181060309912}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:55,818] Trial 802 finished with value: 3.3637779133820542 and parameters: {'x': 1.1648282333087885, 'y': 1.1741609259629568}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-1

[I 2024-09-19 15:26:56,351] Trial 840 finished with value: 296.8452990311518 and parameters: {'x': 0.9162493747556801, 'y': -0.8833866301398019}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:56,369] Trial 841 finished with value: 2.0010226952986163 and parameters: {'x': 0.9997507399731527, 'y': 1.1409590492360149}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:56,386] Trial 842 finished with value: 17.245066576391938 and parameters: {'x': 1.1167450951802989, 'y': 0.8320119449584278}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:56,403] Trial 843 finished with value: 13.170274015567257 and parameters: {'x': 0.9131899800426029, 'y': 1.1967208188346308}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:56,418] Trial 844 finished with value: 1.6424766296352338 and parameters: {'x': 1.023357281050844, 'y': 0.919122266610541}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-1

[I 2024-09-19 15:26:56,964] Trial 882 finished with value: 507.80150716486776 and parameters: {'x': 1.855156782236607, 'y': 1.1897847264812231}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:56,978] Trial 883 finished with value: 1.9525426819861174 and parameters: {'x': 1.0617183128630538, 'y': 0.9876487296305537}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:56,991] Trial 884 finished with value: 680.449828340582 and parameters: {'x': 1.1357477127399902, 'y': -1.318585137400477}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:57,005] Trial 885 finished with value: 226.27909731545319 and parameters: {'x': 1.6276611635245162, 'y': 1.1463332966979793}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:57,019] Trial 886 finished with value: 2.5232557190909572 and parameters: {'x': 0.9481186720494091, 'y': 1.0576918597660752}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19

[I 2024-09-19 15:26:57,582] Trial 924 finished with value: 0.511918464335044 and parameters: {'x': 0.8375509765127463, 'y': 0.771171533428378}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:57,597] Trial 925 finished with value: 0.7784503638894703 and parameters: {'x': 0.880702474833817, 'y': 0.8630564387667109}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:57,611] Trial 926 finished with value: 7.953236786791593 and parameters: {'x': 0.7771977033244779, 'y': 0.8851696178326416}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:57,627] Trial 927 finished with value: 0.9695180537011531 and parameters: {'x': 0.9402301586054976, 'y': 0.7857502186360839}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:57,642] Trial 928 finished with value: 0.20375947862015883 and parameters: {'x': 0.9460837054707598, 'y': 0.8502578058441623}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19

[I 2024-09-19 15:26:58,206] Trial 966 finished with value: 1.4591442294018488 and parameters: {'x': 0.916734727369325, 'y': 0.9609102830956796}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:58,220] Trial 967 finished with value: 10.947703501905972 and parameters: {'x': 0.8440845788557969, 'y': 1.0429843569012156}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:58,235] Trial 968 finished with value: 17.806810085444152 and parameters: {'x': 1.1053057541719304, 'y': 0.799851064637535}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:58,249] Trial 969 finished with value: 3.0353000516265682 and parameters: {'x': 1.0396102676831889, 'y': 0.9066134173646745}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-19 15:26:58,264] Trial 970 finished with value: 1.0261252075884328 and parameters: {'x': 0.9479918813079404, 'y': 0.9998528479006221}. Best is trial 493 with value: 0.000442502037418168.
[I 2024-09-1

In [None]:
study.best_params

In [33]:
def hp_space(trial):

    return {"num_train_epochs": trial.suggest_int("num_train_epochs", 5, 10),
        "alpha": trial.suggest_float("alpha", 0, 1),
        "temperature": trial.suggest_int("temperature", 2, 20)}

In [None]:
best_run = distilbert_trainer.hyperparameter_search(
    n_trials=20, direction="maximize", hp_space=hp_space)

[I 2024-09-19 15:26:58,698] A new study created in memory with name: no-name-950748a5-06af-4420-a33e-12546db91619
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.410439,0.672581
2,0.644900,0.142742,0.842258
3,0.644900,0.074758,0.894839
4,0.166900,0.053175,0.910323
5,0.084500,0.044639,0.923226
6,0.084500,0.040241,0.922581
7,0.065000,0.037843,0.925806
8,0.057700,0.037051,0.927742


[I 2024-09-19 15:32:45,149] Trial 0 finished with value: 0.927741935483871 and parameters: {'num_train_epochs': 8, 'alpha': 0.3135032537062875, 'temperature': 2}. Best is trial 0 with value: 0.927741935483871.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.197174,0.569677
2,0.312600,0.100628,0.811935
3,0.312600,0.070571,0.871613
4,0.115900,0.057442,0.887097
5,0.079600,0.050582,0.892581
6,0.079600,0.048641,0.895161


[I 2024-09-19 15:37:10,449] Trial 1 finished with value: 0.8951612903225806 and parameters: {'num_train_epochs': 6, 'alpha': 0.979315817401237, 'temperature': 17}. Best is trial 0 with value: 0.927741935483871.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.202199,0.595484
2,0.324400,0.098591,0.828387
3,0.324400,0.065904,0.87871
4,0.114000,0.05115,0.893871
5,0.073400,0.042686,0.905161
6,0.073400,0.037792,0.909355
7,0.059000,0.035605,0.915484
8,0.052900,0.03473,0.915806


[I 2024-09-19 15:43:31,029] Trial 2 finished with value: 0.9158064516129032 and parameters: {'num_train_epochs': 8, 'alpha': 0.9664558038786579, 'temperature': 10}. Best is trial 0 with value: 0.927741935483871.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.192412,0.57871
2,0.308800,0.095389,0.822258
3,0.308800,0.064737,0.877419
4,0.110000,0.050768,0.891613
5,0.071800,0.042599,0.904194
6,0.071800,0.037887,0.908065
7,0.058000,0.035747,0.911613
8,0.052100,0.034889,0.913871


[I 2024-09-19 15:49:53,296] Trial 3 finished with value: 0.9138709677419354 and parameters: {'num_train_epochs': 8, 'alpha': 0.7609341909546662, 'temperature': 18}. Best is trial 0 with value: 0.927741935483871.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.201887,0.588065
2,0.322300,0.100059,0.822258
3,0.322300,0.068092,0.876774
4,0.115600,0.053779,0.889355
5,0.076300,0.045709,0.900323
6,0.076300,0.041688,0.904839
7,0.063000,0.04054,0.903871


[I 2024-09-19 15:55:28,991] Trial 4 finished with value: 0.9038709677419355 and parameters: {'num_train_epochs': 7, 'alpha': 0.8418102774144052, 'temperature': 11}. Best is trial 0 with value: 0.927741935483871.
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.421135,0.667419


In [None]:
print(best_run)

In [None]:
for k,v in best_run.hyperparameters.items():
    setattr(student_training_args, k, v)

# Define a new repository to store our distilled model
distilled_ckpt = "distilbert-base-uncased-distilled-clinc"
student_training_args.output_dir = distilled_ckpt

# Create a new Trainer with optimal parameters
distil_trainer = DistillationTrainer(model_init=student_init,
    teacher_model=teacher_model, args=student_training_args,
    train_dataset=clinc_enc['train'], eval_dataset=clinc_enc['validation'],
    compute_metrics=compute_metrics, tokenizer=student_tokenizer)

distil_trainer.train();

In [None]:
# distil_trainer.push_to_hub("Training complete")

### Benchmarking Our Distilled Model

In [None]:
distilled_ckpt = "transformersbook/distilbert-base-uncased-distilled-clinc"
pipe = pipeline("text-classification", model=distilled_ckpt)
optim_type = "Distillation"
pb = PerformanceBenchmark(pipe, clinc["test"], optim_type=optim_type)
perf_metrics.update(pb.run_benchmark())

In [None]:
plot_metrics(perf_metrics, optim_type)

### Making Models Faster with Quantization

In [None]:
import matplotlib.pyplot as plt

state_dict = pipe.model.state_dict()
weights = state_dict["distilbert.transformer.layer.0.attention.out_lin.weight"]
plt.hist(weights.flatten().numpy(), bins=250, range=(-0.3,0.3), edgecolor="C0")
plt.show()

In [None]:
zero_point = 0
scale = (weights.max() - weights.min()) / (127 - (-128))

In [None]:
(weights / scale + zero_point).clamp(-128, 127).round().char()

In [None]:
from torch import quantize_per_tensor

dtype = torch.qint8
quantized_weights = quantize_per_tensor(weights, scale, zero_point, dtype)
quantized_weights.int_repr()

In [None]:
%%timeit
weights @ weights

In [None]:
from torch.nn.quantized import QFunctional

q_fn = QFunctional()

In [None]:
%%timeit
q_fn.mul(quantized_weights, quantized_weights)

In [None]:
import sys

sys.getsizeof(weights.storage()) / sys.getsizeof(quantized_weights.storage())

In [None]:
from torch.quantization import quantize_dynamic

model_ckpt = "transformersbook/distilbert-base-uncased-distilled-clinc"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = (AutoModelForSequenceClassification
         .from_pretrained(model_ckpt).to("cpu"))

model_quantized = quantize_dynamic(model, {nn.Linear}, dtype=torch.qint8)

In [None]:
pipe = pipeline("text-classification", model=model_quantized,
                 tokenizer=tokenizer)
optim_type = "Distillation + quantization"
pb = PerformanceBenchmark(pipe, clinc["test"], optim_type=optim_type)
perf_metrics.update(pb.run_benchmark())

### Optimizing Inference with ONNX and the ONNX Runtime

In [None]:
import os
from psutil import cpu_count

os.environ["OMP_NUM_THREADS"] = f"{cpu_count()}"
os.environ["OMP_WAIT_POLICY"] = "ACTIVE"

In [None]:
from transformers.convert_graph_to_onnx import convert

model_ckpt = "transformersbook/distilbert-base-uncased-distilled-clinc"
onnx_model_path = Path("onnx/model.onnx")
convert(framework="pt", model=model_ckpt, tokenizer=tokenizer,
     output=onnx_model_path, opset=12, pipeline_name="text-classification")

In [None]:
from onnxruntime import (GraphOptimizationLevel, InferenceSession,
                         SessionOptions)

def create_model_for_provider(model_path, provider="CPUExecutionProvider"):
    options = SessionOptions()
    options.intra_op_num_threads = 1
    options.graph_optimization_level = GraphOptimizationLevel.ORT_ENABLE_ALL
    session = InferenceSession(str(model_path), options, providers=[provider])
    session.disable_fallback()
    return session

onnx_model = create_model_for_provider(onnx_model_path)

In [None]:
inputs = clinc_enc["test"][:1]
del inputs["labels"]
logits_onnx = onnx_model.run(None, inputs)[0]
logits_onnx.shape

In [None]:
np.argmax(logits_onnx)

In [None]:
clinc_enc["test"][0]["labels"]

In [None]:
from scipy.special import softmax

class OnnxPipeline:
    def __init__(self, model, tokenizer):
        self.model = model
        self.tokenizer = tokenizer
        
    def __call__(self, query):
        model_inputs = self.tokenizer(query, return_tensors="pt")
        inputs_onnx = {k: v.cpu().detach().numpy()
        for k, v in model_inputs.items()}
        logits = self.model.run(None, inputs_onnx)[0][0, :]
        probs = softmax(logits)
        pred_idx = np.argmax(probs).item()
        return [{"label": intents.int2str(pred_idx), "score": probs[pred_idx]}]

In [None]:
pipe = OnnxPipeline(onnx_model, tokenizer)
pipe(query)

In [None]:
class OnnxPerformanceBenchmark(PerformanceBenchmark):
    def __init__(self, *args, model_path, **kwargs):
        super().__init__(*args, **kwargs)
        self.model_path = model_path

    def compute_size(self):
        size_mb = Path(self.model_path).stat().st_size / (1024 * 1024)
        print(f"Model size (MB) - {size_mb:.2f}")
        return {"size_mb": size_mb}

In [None]:
optim_type = "Distillation + ORT"
pb = OnnxPerformanceBenchmark(pipe, clinc["test"], optim_type,
                              model_path="onnx/model.onnx")
perf_metrics.update(pb.run_benchmark())

In [None]:
plot_metrics(perf_metrics, optim_type)

In [None]:
from onnxruntime.quantization import quantize_dynamic, QuantType

model_input = "onnx/model.onnx"
model_output = "onnx/model.quant.onnx"
quantize_dynamic(model_input, model_output, weight_type=QuantType.QInt8)

In [None]:
onnx_quantized_model = create_model_for_provider(model_output)
pipe = OnnxPipeline(onnx_quantized_model, tokenizer)
optim_type = "Distillation + ORT (quantized)"
pb = OnnxPerformanceBenchmark(pipe, clinc["test"], optim_type,
                             model_path=model_output)
perf_metrics.update(pb.run_benchmark())