In [1]:
import torch
import torch.nn as nn
from transformers import (
    BertForSequenceClassification,
    AutoTokenizer,
    AutoModel,
    BertPreTrainedModel,
)
from transformers import pipeline
from transformers import BertTokenizer, BertForSequenceClassification
from transformers.modeling_outputs import SequenceClassifierOutput
import torch.nn as nn
from transformers.models.auto.modeling_auto import (
    MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES,
)

In [2]:
from typing import Union


class BertWithCustomHead(BertForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)

        # Define the custom classification head
        self.custom_dropout = nn.Dropout(p=0.3)
        self.classifier = nn.Sequential(
            nn.Linear(self.config.hidden_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, self.config.num_labels),
        )

    def forward(
        self,
        input_ids=None,
        attention_mask=None,
        token_type_ids=None,
        labels=None,
        return_dict=True,
    ) -> Union[tuple[torch.Tensor], SequenceClassifierOutput]:
        # Get outputs from the base BERT model
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            return_dict=return_dict,
        )

        # Extract the [CLS] pooled output
        pooled_output = outputs.pooler_output

        # Pass pooled output through the custom classification head
        pooled_output = self.custom_dropout(pooled_output)
        logits = self.classifier(pooled_output)

        # Return logits and loss if labels are provided
        loss = None
        if labels is not None:
            loss_fn = nn.CrossEntropyLoss()
            loss = loss_fn(logits, labels)

        if not return_dict:
            return (loss, logits) if loss is not None else logits

        return SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )


MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES["bert_with_custom_head"] = (
    "BertWithCustomHead"
)
# del MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES["bert_with_custom_head"]

In [4]:
# A bunch of hacky tactics to make everything work
import transformers
transformers.BertWithCustomHead = BertWithCustomHead
from optimum.exporters.tasks import TasksManager
TasksManager._CUSTOM_CLASSES[("pt", "bert", "text-classification")] = ("transformers", "BertWithCustomHead")
def dummy(*args, **kwargs):
    return "transformers"
TasksManager.infer_library_from_model = dummy
from optimum.onnxruntime import ORTModelForSequenceClassification

from transformers import AutoTokenizer

model_checkpoint = "./roman-classifier"
save_directory = "onnx/"

# Load a model from transformers and export it to ONNX
ort_model = ORTModelForSequenceClassification.from_pretrained(model_checkpoint, export=True)
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)

# Save the onnx model and tokenizer
ort_model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

*****************here...
I have loaded the model Sequential(
  (0): Linear(in_features=128, out_features=128, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.3, inplace=False)
  (3): Linear(in_features=128, out_features=64, bias=True)
  (4): ReLU()
  (5): Linear(in_features=64, out_features=3, bias=True)
)


  if not return_dict:
  if not return_dict:
  if not return_dict:


('onnx/tokenizer_config.json',
 'onnx/special_tokens_map.json',
 'onnx/vocab.txt',
 'onnx/added_tokens.json',
 'onnx/tokenizer.json')

In [13]:
onnx_inference_pipeline = pipeline(
    "text-classification",
    model=ort_model,
    tokenizer=tokenizer,
)

Device set to use mps:0


In [14]:
model_path = "./roman-classifier"
model = BertWithCustomHead.from_pretrained(model_path)

In [15]:
inference_pipeline = pipeline(
    "text-classification",
    model=model,
    tokenizer=AutoTokenizer.from_pretrained(model_path),
)

Device set to use mps:0


In [16]:
inference_pipeline(["hi how you doing", "tero baje", "arigato gozaimasu"])

[{'label': 'LABEL_0', 'score': 0.7705088257789612},
 {'label': 'LABEL_2', 'score': 0.826322615146637},
 {'label': 'LABEL_1', 'score': 0.5561591982841492}]

In [17]:
import time
import pandas as pd

df = pd.read_csv("datasets/dataset.csv")

In [18]:
model.eval()
for i in range(10):
    start = time.time()
    df["preds"] = df.sentences.apply(lambda x: inference_pipeline(x)[0])
    end = time.time()
    print("Total time taken: ", end - start)

Total time taken:  6.128638982772827
Total time taken:  4.91447901725769
Total time taken:  4.506620168685913
Total time taken:  4.699854850769043
Total time taken:  4.491797924041748
Total time taken:  4.353509902954102
Total time taken:  4.253670692443848
Total time taken:  4.248823165893555
Total time taken:  4.246496200561523
Total time taken:  4.304681777954102


In [19]:
model.eval()
for i in range(10):
    start = time.time()
    df["preds"] = df.sentences.apply(lambda x: onnx_inference_pipeline(x)[0])
    end = time.time()
    print("Total time taken: ", end - start)

Total time taken:  1.264888048171997
Total time taken:  1.3372550010681152
Total time taken:  1.3093030452728271
Total time taken:  1.496689796447754
Total time taken:  1.380295991897583
Total time taken:  1.290701150894165
Total time taken:  1.3331658840179443
Total time taken:  1.6963269710540771
Total time taken:  2.06199312210083
Total time taken:  1.2847800254821777


In [22]:
from optimum.onnxruntime import ORTModelForSequenceClassification

In [110]:
tokenizer = AutoTokenizer.from_pretrained(model_path)
dummy_model_input = tokenizer("Hi there", return_tensors="pt")

In [111]:
dummy_model_input

{'input_ids': tensor([[ 101, 7632, 2045,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1]])}

*****************here...
********************model task text-classification model_type bert model_class_name None


ValueError: The library name could not be automatically inferred. If using the command-line, please provide the argument --library {transformers,diffusers,timm,sentence_transformers}. Example: `--library diffusers`.

In [113]:
import onnx
import onnxruntime as ort
import numpy as np

# Load the ONNX model
onnx_model_path = "onnx_model/torch-model.onnx"
onn_model = onnx.load(onnx_model_path)
ort_session = ort.InferenceSession(onnx_model_path)


# Run inference
outputs = ort_session.run(
    ["output"],  # Output names (None returns all outputs)
    tokenizer(["Yo"], return_tensors="pt"),  # Input dictionary
)

TypeError: run(): incompatible function arguments. The following argument types are supported:
    1. (self: onnxruntime.capi.onnxruntime_pybind11_state.InferenceSession, arg0: list[str], arg1: dict[str, object], arg2: onnxruntime.capi.onnxruntime_pybind11_state.RunOptions) -> list

Invoked with: <onnxruntime.capi.onnxruntime_pybind11_state.InferenceSession object at 0x14ba59070>, ['output'], {'input_ids': tensor([[  101, 10930,   102]]), 'token_type_ids': tensor([[0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1]])}, None

In [121]:
model.config.architectures

['CustomModelForSequenceClassification']