In [2]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import numpy as np



In [7]:
model_name = "syke9p3/bert-multilabel-tagalog-hate-speech-classifier"

tokenizer = AutoTokenizer.from_pretrained("syke9p3/bert-multilabel-tagalog-hate-speech-classifier")
model = AutoModelForSequenceClassification.from_pretrained("syke9p3/bert-multilabel-tagalog-hate-speech-classifier")

In [3]:
LABELS = ['Age', 'Gender', 'Physical', 'Race', 'Religion', 'Others']
id2label = {idx:label for idx, label in enumerate(LABELS)}
label2id = {label:idx for idx, label in enumerate(LABELS)}

text = "Putanginang bata to feeling amerikano"

encoding = tokenizer(text, return_tensors="pt")
encoding = {k: v.to(model.device) for k,v in encoding.items()}

In [4]:
# Get the shape of the input tensors
input_shape = encoding["input_ids"].shape
print(f"Input shape: {input_shape}")

Input shape: torch.Size([1, 8])


In [5]:
# Verify shapes
for key, tensor in encoding.items():
    print(f"{key}: {tensor.shape}")

input_ids: torch.Size([1, 8])
token_type_ids: torch.Size([1, 8])
attention_mask: torch.Size([1, 8])


In [6]:
dummy_input = {k: torch.zeros_like(v) for k, v in encoding.items()}


In [7]:
outputs = model(**encoding)
output = outputs.logits

# apply sigmoid + threshold
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(output.squeeze().cpu())
predictions = np.zeros(probs.shape)
predictions[np.where(probs >= 0.5)] = 1
# turn predicted id's into actual label names
predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
print(predicted_labels)

['Age', 'Race']


In [10]:
%pip install onnx
%pip install onnxscript


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Defaulting to user installation because normal site-packages is not writeable
Collecting onnxscript
  Downloading onnxscript-0.1.0.dev20240717-py3-none-any.whl (644 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m644.3/644.3 KB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting ml-dtypes
  Downloading ml_dtypes-0.4.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0mm
Installing collected packages: ml-dtypes, onnxscript
Successfully installed ml-dtypes-0.4.0 onnxscript-0.1.0.dev20240717
Note: you may need to restart the kernel to use updated packages.


In [8]:
# Get the input shape for the ONNX conversion
input_shape = encoding['input_ids'].shape
print(f"Input shape: {input_shape}")

Input shape: torch.Size([1, 8])


In [9]:
# Export the model to ONNX format
dummy_input = (torch.ones(input_shape).long().to(model.device),) # assuming only input_ids is used
onnx_model_path = "bert_multilabel_tagalog_hate_speech_classifier.onnx"


In [10]:
torch.onnx.export(
    model,
    dummy_input,  # model input (or a tuple for multiple inputs)
    onnx_model_path,  # where to save the model (can be a file or file-like object)
    export_params=True,  # store the trained parameter weights inside the model file
    opset_version=14,  # the ONNX version to export the model to
    input_names=['input_ids'],  # the model's input names
    output_names=['logits'],  # the model's output names
    dynamic_axes={'input_ids': {0: 'batch_size', 1: 'sequence_length'}, 'logits': {0: 'batch_size'}}  # variable length axes
)


In [12]:
import onnxruntime

# Load the ONNX model
onnx_model_path = "bert_multilabel_tagalog_hate_speech_classifier.onnx"
ort_session = onnxruntime.InferenceSession(onnx_model_path)


In [13]:
# Tokenize the example text
encoding = tokenizer(text, return_tensors="np")

# Prepare the input data for ONNX runtime
input_ids = encoding['input_ids']
attention_mask = encoding['attention_mask']


In [15]:
# Run inference
ort_inputs = {ort_session.get_inputs()[0].name: input_ids}
ort_outs = ort_session.run(None, ort_inputs)

# Get the output logits
output_logits = ort_outs[0]

# Apply sigmoid + threshold
sigmoid = lambda x: 1 / (1 + np.exp(-x))
probs = sigmoid(output_logits.squeeze())
predictions = np.zeros(probs.shape)
predictions[np.where(probs >= 0.5)] = 1

print(predictions)

# Turn predicted ids into actual label names
predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
print(predicted_labels)

[1. 0. 0. 1. 0. 0.]
['Age', 'Race']


In [None]:
python -m scripts.convert --quantize --model_id syke9p3/bert-multilabel-tagalog-hate-speech-classifier

In [1]:
import onnx
onnx_model = onnx.load("bert_multilabel_tagalog_hate_speech_classifier.onnx")
onnx.checker.check_model(onnx_model)

In [4]:
from optimum.onnxruntime import ORTModelForSequenceClassification


onnx_model = ORTModelForSequenceClassification.from_pretrained("syke9p3/bert-multilabel-tagalog-hate-speech-classifier",from_transformers=True)


The argument `from_transformers` is deprecated, and will be removed in optimum 2.0.  Use `export` instead
Framework not specified. Using pt to export the model.
Using the export variant default. Available variants are:
    - default: The default ONNX variant.

***** Exporting submodel 1/1: BertForSequenceClassification *****
Using framework PyTorch: 2.3.1+cu121
Overriding 1 configuration item(s)
	- use_cache -> False


In [8]:
onnx_path = "onnx"

onnx_model.save_pretrained("onnx")
tokenizer.save_pretrained("onnx")

('onnx/tokenizer_config.json',
 'onnx/special_tokens_map.json',
 'onnx/vocab.txt',
 'onnx/added_tokens.json',
 'onnx/tokenizer.json')

In [13]:
from optimum.onnxruntime import ORTOptimizer
from optimum.onnxruntime.configuration import OptimizationConfig

optimizer = ORTOptimizer.from_pretrained("syke9p3/bert-multilabel-tagalog-hate-speech-classifier")
optimization_config = OptimizationConfig(optimization_level=99)

optimizer.export(
    onnx_model_path=onnx_path / "model.onnx",
    onnx_optimized_model_output_path=onnx_path / "model-optimized.onnx",
    optimization_config=optimization_config,
)

ValueError: Unable to load the model from syke9p3/bert-multilabel-tagalog-hate-speech-classifier.