In [None]:
import matplotlib.pyplot as plt
from transformers import pipeline
import torch
from torch import quantize_per_tensor

bert_ckpt = "transformersbook/distilbert-base-uncased-finetuned-clinc"
pipe = pipeline("text-classification", model=bert_ckpt)
state_dict = pipe.model.state_dict()
state_dict.keys()

weights = state_dict["distilbert.transformer.layer.0.attention.out_lin.weight"]
scale = (weights.max() - weights.min()) / 255
zero_point = 0
dtype = torch.qint8
quantized_weights = quantize_per_tensor(weights, scale, zero_point, dtype)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
ax1.set_title('before quantization')
ax2.set_title('after quantization')

ax1.hist(weights.flatten().numpy(), bins=250, range=(-0.3, 0.3))
ax2.hist(quantized_weights.flatten().dequantize().numpy(), bins=250, range=(-0.3, 0.3))
plt.show()

In [None]:
weights = state_dict["distilbert.transformer.layer.0.attention.out_lin.weight"]
scale = (weights.max() - weights.min()) / 255
zero_point = 0
dtype = torch.qint8
quantized_weights = quantize_per_tensor(weights, scale, zero_point, dtype)

In [None]:
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(20, 10))
ax1.set_title('before quantization')
ax2.set_title('after quantization')

ax1.hist(weights.flatten().numpy(), bins=250, range=(-0.3, 0.3))
ax2.hist(quantized_weights.flatten().dequantize().numpy(), bins=250, range=(-0.3, 0.3))
# plt.axis('off')
plt.show()

In [None]:
%%timeit -r 10 -n 1000
weights @ weights

In [None]:
from torch.nn.quantized import QFunctional

q_fn = QFunctional()

In [None]:
%%timeit -r 10 -n 1000
q_fn.mul(quantized_weights, quantized_weights)

In [None]:
from torch.quantization import quantize_dynamic
from torch import nn
from transformers import AutoTokenizer, AutoModelForSequenceClassification

model_ckpt = 'transformersbook/bert-base-uncased-finetuned-clinc'
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt).to('cpu')
model_quantized = quantize_dynamic(model, {nn.Linear}, dtype=torch.qint8)

In [None]:
from datasets import load_metric
from pathlib import Path
from time import perf_counter
import numpy as np
from datasets import load_dataset

clinc = load_dataset("clinc_oos", "plus")
intents = clinc["test"].features["intent"]
accuracy_score = load_metric("accuracy")

class PerformanceBenchmark :
    def __init__(self, pipeline, dataset, optim_type="BERT baseline") :
        self.pipeline = pipeline
        self.dataset = dataset
        self.optim_type = optim_type
        
    def compute_accuracy(self) :
        preds, labels = [], []
        for example in self.dataset :
            pred = self.pipeline(example["text"])[0]["label"]
            label = example["intent"]
            preds.append(intents.str2int(pred))
            labels.append(label)
        accuracy = accuracy_score.compute(predictions=preds, references=labels)
        print(f"valid accuracy : {accuracy['accuracy']:.4f}")
        return accuracy
    
    def compute_size(self) :
        state_dict = self.pipeline.model.state_dict()
        tmp_path = Path("model.pt")
        torch.save(state_dict, tmp_path)
        size_mb = Path(tmp_path).stat().st_size / (1024*1024)
        tmp_path.unlink()
        print(f"model size : {size_mb:.4f} MB")
        return {"size_mb": size_mb}
            
    def time_pipeline(self, query="What is the pin number for my account?") :
        latencies = []
        for _ in range(10) :
            _ = self.pipeline(query)
        for _ in range(100) :
            start_time = perf_counter()
            _ = self.pipeline(query)
            latency = perf_counter() - start_time
            latencies.append(latency)
        time_avg_ms = 1000 * np.mean(latencies)
        time_std_ms = 1000 * np.std(latencies)
        print(f"time avg : {time_avg_ms:.4f} ms +\- {time_std_ms:.4f} ms")
        return {"time_avg_ms": time_avg_ms, "time_std_ms": time_std_ms}    
    
    def run_benchmark(self) :
        metrics = {}
        metrics[self.optim_type] = self.compute_size()
        metrics[self.optim_type].update(self.time_pipeline())
        metrics[self.optim_type].update(self.compute_accuracy())
        return metrics

In [None]:
# Original
pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
optim_type = "normal"
pb = PerformanceBenchmark(pipe, clinc["test"], optim_type)
perf_metrics = pb.run_benchmark()

In [None]:
# Quantized
pipe = pipeline("text-classification", model=model_quantized, tokenizer=tokenizer)
optim_type = "quantization"
pb = PerformanceBenchmark(pipe, clinc["test"], optim_type)
perf_metrics.update(pb.run_benchmark())

In [None]:
perf_metrics

In [1]:
from transformers import TrainingArguments

class DistilTrainingArguments(TrainingArguments):
    def __init__(self, *args, alpha=0.5, temperature=2.0, **kwargs):
        super().__init__(*args, **kwargs)
        self.alpha = alpha
        self.temperature = temperature
        
import torch.nn as nn
import torch.nn.functional as F
from transformers import Trainer

class DistilTrainer(Trainer):
    def __init__(self, *args, teacher=None, **kwargs):
        super().__init__(*args, **kwargs)
        self.teacher = teacher
        
    def compute_loss(self, model, inputs, return_outputs=False):
        # student의 예측 결과
        outputStudents = model(**inputs)
        
        # student의 loss와 logits        
        loss_ce = outputStudents.loss
        logit_stu = outputStudents.logits
        
        # teacher의 logits
        with torch.no_grad():
            ouputTeacher = self.teacher(**inputs)
            logit_tea = ouputTeacher.logits
            
        # Gumbel-Softmax
        loss_fct = nn.KLDivLoss(reduction='batchmean')
        temperature = self.args.temperature
        loss_kd = temperature**2 * loss_fct(F.log_softmax(logit_stu/temperature , dim=-1), F.softmax(logit_tea/temperature, dim=-1))
        
        # return averaged student loss
        loss = self.args.alpha * loss_ce + (1 - self.args.alpha) * loss_kd
        
        return (loss, outputStudents) if return_outputs else loss

In [2]:
from datasets import load_dataset, load_metric
from transformers import AutoModelForSequenceClassification

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

clinc = load_dataset("clinc_oos", "plus")
intents = clinc["test"].features["intent"]
num_labels = intents.num_classes

accuracy_score = load_metric("accuracy")

student_ckpt = "distilbert-base-uncased"
teacher_ckpt = "bert-base-uncased"

student = (AutoModelForSequenceClassification.from_pretrained(student_ckpt, num_labels=num_labels).to(device))
teacher = (AutoModelForSequenceClassification.from_pretrained(teacher_ckpt, num_labels=num_labels).to(device))

print(f"teacher 대비 student의 파라미터 비율: {student.num_parameters() / teacher.num_parameters() * 100:.4f}%")

In [3]:
from transformers import AutoTokenizer
from datasets import load_dataset, load_metric
import numpy as np

clinc = load_dataset("clinc_oos", "plus")
intents = clinc["test"].features["intent"]
accuracy_score = load_metric("accuracy")

student_ckpt = "distilbert-base-uncased"
student_tokenizer = AutoTokenizer.from_pretrained(student_ckpt)

def tokenize_text(batch):
    return student_tokenizer(batch["text"], truncation=True)

clinc_enc = clinc.map(tokenize_text, batched=True, remove_columns=['text'])
clinc_enc = clinc_enc.rename_column('intent', 'labels')

def compute_metrics(pred):
    preds, labels = pred
    preds = np.argmax(preds, axis=1)
    return accuracy_score.compute(predictions=preds, references=labels)

Found cached dataset clinc_oos (/root/.cache/huggingface/datasets/clinc_oos/plus/1.0.0/abcc41d382f8137f039adc747af44714941e8196e845dfbdd8ae7a7e020e6ba1)


  0%|          | 0/3 [00:00<?, ?it/s]

  accuracy_score = load_metric("accuracy")
Loading cached processed dataset at /root/.cache/huggingface/datasets/clinc_oos/plus/1.0.0/abcc41d382f8137f039adc747af44714941e8196e845dfbdd8ae7a7e020e6ba1/cache-fea0c49e7fa93460.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/clinc_oos/plus/1.0.0/abcc41d382f8137f039adc747af44714941e8196e845dfbdd8ae7a7e020e6ba1/cache-7fecf478efcd7d09.arrow
Loading cached processed dataset at /root/.cache/huggingface/datasets/clinc_oos/plus/1.0.0/abcc41d382f8137f039adc747af44714941e8196e845dfbdd8ae7a7e020e6ba1/cache-5bc73521e2f625e6.arrow


In [14]:
batch_size = 128

finetuned_ckpt = "distilbert-base-uncased-finetuned-clinc"
student_training_args = DistilTrainingArguments(output_dir=finetuned_ckpt, 
                                                 evaluation_strategy='epoch', 
                                                 num_train_epochs=100,
                                                 logging_steps=100,
                                                 learning_rate=2e-5, 
                                                 per_device_train_batch_size=batch_size, 
                                                 per_device_eval_batch_size=batch_size, 
                                                 alpha=1, 
                                                 temperature=2,
                                                 weight_decay=0.01, 
                                                 push_to_hub=False)

In [15]:
import torch
from transformers import pipeline, AutoConfig, AutoModelForSequenceClassification

bert_ckpt = "transformersbook/distilbert-base-uncased-finetuned-clinc"
pipe = pipeline("text-classification", model=bert_ckpt)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

id2label = pipe.model.config.id2label
label2id = pipe.model.config.label2id

num_labels = intents.num_classes
student_config = (AutoConfig.from_pretrained(student_ckpt, num_labels=num_labels, id2label=id2label, label2id=label2id))

def student_init():
    return (AutoModelForSequenceClassification.from_pretrained(student_ckpt, config=student_config).to(device))

In [17]:
teacher_ckpt = "transformersbook/bert-base-uncased-finetuned-clinc"
teacher = (AutoModelForSequenceClassification.from_pretrained(teacher_ckpt, num_labels=num_labels).to(device))

distilbert_trainer = DistilTrainer(model_init=student_init, 
                                    teacher=teacher, 
                                    args=student_training_args, 
                                    train_dataset=clinc_enc['train'],
                                    eval_dataset=clinc_enc['validation'],
                                    compute_metrics=compute_metrics,
                                    tokenizer=student_tokenizer)

distilbert_trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,4.994573,0.025484
2,No log,4.952716,0.047742
3,No log,4.894671,0.066129
4,No log,4.823017,0.108387
5,No log,4.741663,0.168065
6,No log,4.650447,0.244194
7,No log,4.553691,0.341935
8,No log,4.450787,0.444194
9,No log,4.342663,0.529032
10,No log,4.234004,0.585806


TrainOutput(global_step=500, training_loss=1.813368507385254, metrics={'train_runtime': 993.422, 'train_samples_per_second': 1535.098, 'train_steps_per_second': 0.503, 'total_flos': 1.191182670761034e+16, 'train_loss': 1.813368507385254, 'epoch': 100.0})

In [20]:
batch_size = 512

finetuned_ckpt = "distilbert-base-uncased-finetuned-clinc"
student_training_args = DistilTrainingArguments(output_dir=finetuned_ckpt, 
                                                 evaluation_strategy='epoch', 
                                                 num_train_epochs=100,
                                                 logging_steps=100,
                                                 learning_rate=2e-5, 
                                                 per_device_train_batch_size=batch_size, 
                                                 per_device_eval_batch_size=batch_size, 
                                                 alpha=0.7, 
                                                 temperature=2,
                                                 weight_decay=0.01, 
                                                 push_to_hub=False)

teacher_ckpt = "transformersbook/bert-base-uncased-finetuned-clinc"
teacher = (AutoModelForSequenceClassification.from_pretrained(teacher_ckpt, num_labels=num_labels).to(device))

distilbert_trainer = DistilTrainer(model_init=student_init, 
                                    teacher=teacher, 
                                    args=student_training_args, 
                                    train_dataset=clinc_enc['train'],
                                    eval_dataset=clinc_enc['validation'],
                                    compute_metrics=compute_metrics,
                                    tokenizer=student_tokenizer)

distilbert_trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,3.824503,0.025806
2,No log,3.793311,0.047097
3,No log,3.748479,0.068065
4,No log,3.692678,0.109677
5,No log,3.628998,0.174516
6,No log,3.557847,0.256129
7,No log,3.482363,0.353871
8,No log,3.402644,0.452258
9,No log,3.319339,0.529677
10,No log,3.235571,0.584194


TrainOutput(global_step=500, training_loss=1.4507065353393556, metrics={'train_runtime': 999.5099, 'train_samples_per_second': 1525.748, 'train_steps_per_second': 0.5, 'total_flos': 1.191182670761034e+16, 'train_loss': 1.4507065353393556, 'epoch': 100.0})

In [19]:
batch_size = 512

finetuned_ckpt = "distilbert-base-uncased-finetuned-clinc"
student_training_args = DistilTrainingArguments(output_dir=finetuned_ckpt, 
                                                 evaluation_strategy='epoch', 
                                                 num_train_epochs=100,
                                                 logging_steps=100,
                                                 learning_rate=2e-5, 
                                                 per_device_train_batch_size=batch_size, 
                                                 per_device_eval_batch_size=batch_size, 
                                                 alpha=0.7, 
                                                 temperature=10,
                                                 weight_decay=0.01, 
                                                 push_to_hub=False)

teacher_ckpt = "transformersbook/bert-base-uncased-finetuned-clinc"
teacher = (AutoModelForSequenceClassification.from_pretrained(teacher_ckpt, num_labels=num_labels).to(device))

distilbert_trainer = DistilTrainer(model_init=student_init, 
                                    teacher=teacher, 
                                    args=student_training_args, 
                                    train_dataset=clinc_enc['train'],
                                    eval_dataset=clinc_enc['validation'],
                                    compute_metrics=compute_metrics,
                                    tokenizer=student_tokenizer)

distilbert_trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,3.678409,0.026774
2,No log,3.649515,0.047742
3,No log,3.607407,0.067097
4,No log,3.55503,0.108065
5,No log,3.495088,0.174839
6,No log,3.428209,0.253548
7,No log,3.357166,0.356774
8,No log,3.282156,0.46
9,No log,3.203684,0.533226
10,No log,3.124844,0.58871


TrainOutput(global_step=500, training_loss=1.4141137008666993, metrics={'train_runtime': 1001.5923, 'train_samples_per_second': 1522.576, 'train_steps_per_second': 0.499, 'total_flos': 1.191182670761034e+16, 'train_loss': 1.4141137008666993, 'epoch': 100.0})

In [21]:
batch_size = 512

finetuned_ckpt = "distilbert-base-uncased-finetuned-clinc"
student_training_args = DistilTrainingArguments(output_dir=finetuned_ckpt, 
                                                 evaluation_strategy='epoch', 
                                                 num_train_epochs=200,
                                                 logging_steps=100,
                                                 learning_rate=2e-5, 
                                                 per_device_train_batch_size=batch_size, 
                                                 per_device_eval_batch_size=batch_size, 
                                                 alpha=0.5, 
                                                 temperature=5,
                                                 weight_decay=0.01, 
                                                 push_to_hub=False)

teacher_ckpt = "transformersbook/bert-base-uncased-finetuned-clinc"
teacher = (AutoModelForSequenceClassification.from_pretrained(teacher_ckpt, num_labels=num_labels).to(device))

distilbert_trainer = DistilTrainer(model_init=student_init, 
                                    teacher=teacher, 
                                    args=student_training_args, 
                                    train_dataset=clinc_enc['train'],
                                    eval_dataset=clinc_enc['validation'],
                                    compute_metrics=compute_metrics,
                                    tokenizer=student_tokenizer)

distilbert_trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,2.837262,0.025806
2,No log,2.816137,0.046129
3,No log,2.783457,0.068387
4,No log,2.741973,0.112581
5,No log,2.69393,0.185806
6,No log,2.640179,0.274194
7,No log,2.582905,0.373226
8,No log,2.522292,0.464516
9,No log,2.45923,0.535806
10,No log,2.395404,0.586774


TrainOutput(global_step=1000, training_loss=0.573679928779602, metrics={'train_runtime': 1991.9831, 'train_samples_per_second': 1531.137, 'train_steps_per_second': 0.502, 'total_flos': 2.381483026455019e+16, 'train_loss': 0.573679928779602, 'epoch': 200.0})

In [22]:
from datasets import load_metric
from pathlib import Path
from time import perf_counter
import numpy as np
from datasets import load_dataset

clinc = load_dataset("clinc_oos", "plus")
intents = clinc["test"].features["intent"]
accuracy_score = load_metric("accuracy")

class PerformanceBenchmark :
    def __init__(self, pipeline, dataset, optim_type="BERT baseline") :
        self.pipeline = pipeline
        self.dataset = dataset
        self.optim_type = optim_type
        
    def compute_accuracy(self) :
        preds, labels = [], []
        for example in self.dataset :
            pred = self.pipeline(example["text"])[0]["label"]
            label = example["intent"]
            preds.append(intents.str2int(pred))
            labels.append(label)
        accuracy = accuracy_score.compute(predictions=preds, references=labels)
        print(f"valid accuracy : {accuracy['accuracy']:.4f}")
        return accuracy
    
    def compute_size(self) :
        state_dict = self.pipeline.model.state_dict()
        tmp_path = Path("model.pt")
        torch.save(state_dict, tmp_path)
        size_mb = Path(tmp_path).stat().st_size / (1024*1024)
        tmp_path.unlink()
        print(f"model size : {size_mb:.4f} MB")
        return {"size_mb": size_mb}
            
    def time_pipeline(self, query="What is the pin number for my account?") :
        latencies = []
        for _ in range(10) :
            _ = self.pipeline(query)
        for _ in range(100) :
            start_time = perf_counter()
            _ = self.pipeline(query)
            latency = perf_counter() - start_time
            latencies.append(latency)
        time_avg_ms = 1000 * np.mean(latencies)
        time_std_ms = 1000 * np.std(latencies)
        print(f"time avg : {time_avg_ms:.4f} ms +\- {time_std_ms:.4f} ms")
        return {"time_avg_ms": time_avg_ms, "time_std_ms": time_std_ms}    
    
    def run_benchmark(self) :
        metrics = {}
        metrics[self.optim_type] = self.compute_size()
        metrics[self.optim_type].update(self.time_pipeline())
        metrics[self.optim_type].update(self.compute_accuracy())
        return metrics

Found cached dataset clinc_oos (/root/.cache/huggingface/datasets/clinc_oos/plus/1.0.0/abcc41d382f8137f039adc747af44714941e8196e845dfbdd8ae7a7e020e6ba1)


  0%|          | 0/3 [00:00<?, ?it/s]

In [23]:
# Original
model_ckpt = 'transformersbook/bert-base-uncased-finetuned-clinc'
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt).to('cpu')

pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
optim_type = "original-BERT"
pb = PerformanceBenchmark(pipe, clinc["validation"], optim_type)
perf_metrics = pb.run_benchmark()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--transformersbook--bert-base-uncased-finetuned-clinc/snapshots/795b076da71dc236dde692338e21560cbbffa6e4/config.json
Model config BertConfig {
  "_name_or_path": "transformersbook/bert-base-uncased-finetuned-clinc",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "restaurant_reviews",
    "1": "nutrition_info",
    "2": "account_blocked",
    "3": "oil_change_how",
    "4": "time",
    "5": "weather",
    "6": "redeem_rewards",
    "7": "interest_rate",
    "8": "gas_type",
    "9": "accept_reservations",
    "10": "smart_home",
    "11": "user_name",
    "12": "report_lost_card",
    "13": "repeat",
    "14": "whisper_mode",
    "15": "what_are_your_hobbies",
    "16": "order",
    "

model size : 418.1508 MB
time avg : 21.8375 ms +\- 2.5211 ms
valid accuracy : 0.9429


In [24]:
# Original

model_ckpt = 'transformersbook/bert-base-uncased-finetuned-clinc'
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt).to('cpu')

pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
optim_type = "normal"
pb = PerformanceBenchmark(pipe, clinc["test"], optim_type)
perf_metrics = pb.run_benchmark()

loading configuration file config.json from cache at /root/.cache/huggingface/hub/models--transformersbook--bert-base-uncased-finetuned-clinc/snapshots/795b076da71dc236dde692338e21560cbbffa6e4/config.json
Model config BertConfig {
  "_name_or_path": "transformersbook/bert-base-uncased-finetuned-clinc",
  "architectures": [
    "BertForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "restaurant_reviews",
    "1": "nutrition_info",
    "2": "account_blocked",
    "3": "oil_change_how",
    "4": "time",
    "5": "weather",
    "6": "redeem_rewards",
    "7": "interest_rate",
    "8": "gas_type",
    "9": "accept_reservations",
    "10": "smart_home",
    "11": "user_name",
    "12": "report_lost_card",
    "13": "repeat",
    "14": "whisper_mode",
    "15": "what_are_your_hobbies",
    "16": "order",
    "

model size : 418.1508 MB
time avg : 18.7100 ms +\- 0.7789 ms
valid accuracy : 0.8673


In [26]:
# Original

model_ckpt = './distilbert-base-uncased-finetuned-clinc/checkpoint-500'
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt).to('cpu')

pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
optim_type = "normal"
pb = PerformanceBenchmark(pipe, clinc["test"], optim_type)
perf_metrics = pb.run_benchmark()

loading file vocab.txt
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json
loading configuration file ./distilbert-base-uncased-finetuned-clinc/checkpoint-500/config.json
Model config DistilBertConfig {
  "_name_or_path": "./distilbert-base-uncased-finetuned-clinc/checkpoint-500",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "restaurant_reviews",
    "1": "nutrition_info",
    "2": "account_blocked",
    "3": "oil_change_how",
    "4": "time",
    "5": "weather",
    "6": "redeem_rewards",
    "7": "interest_rate",
    "8": "gas_type",
    "9": "accept_reservations",
    "10": "smart_home",
    "11": "user_name",
    "12": "report_lost_card",
    "13": "repeat",
    "14": "whisper_mode",
    "15": "what_are_your_hobbies",
    "16": "order",
    "17": "jump_

model size : 255.8814 MB
time avg : 9.1705 ms +\- 0.9124 ms
valid accuracy : 0.8705


In [28]:
# Distilation Model

model_ckpt = './distilbert-base-uncased-finetuned-clinc/checkpoint-500'
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
model = AutoModelForSequenceClassification.from_pretrained(model_ckpt).to('cpu')

pipe = pipeline("text-classification", model=model, tokenizer=tokenizer)
optim_type = "distilBert"
pb = PerformanceBenchmark(pipe, clinc["test"], optim_type)
perf_metrics = pb.run_benchmark()

loading configuration file ./distilbert-base-uncased-finetuned-clinc/checkpoint-500/config.json
Model config DistilBertConfig {
  "_name_or_path": "./distilbert-base-uncased-finetuned-clinc/checkpoint-500",
  "activation": "gelu",
  "architectures": [
    "DistilBertForSequenceClassification"
  ],
  "attention_dropout": 0.1,
  "dim": 768,
  "dropout": 0.1,
  "hidden_dim": 3072,
  "id2label": {
    "0": "restaurant_reviews",
    "1": "nutrition_info",
    "2": "account_blocked",
    "3": "oil_change_how",
    "4": "time",
    "5": "weather",
    "6": "redeem_rewards",
    "7": "interest_rate",
    "8": "gas_type",
    "9": "accept_reservations",
    "10": "smart_home",
    "11": "user_name",
    "12": "report_lost_card",
    "13": "repeat",
    "14": "whisper_mode",
    "15": "what_are_your_hobbies",
    "16": "order",
    "17": "jump_start",
    "18": "schedule_meeting",
    "19": "meeting_schedule",
    "20": "freeze_account",
    "21": "what_song",
    "22": "meaning_of_life",
    "2

model size : 255.8814 MB
time avg : 9.7953 ms +\- 0.3365 ms
valid accuracy : 0.8705
