<a href="https://colab.research.google.com/github/palnavneet/dl-projects/blob/master/DistilbertTextClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [15]:
!pip install transformers torch onnx onnxruntime



In [16]:
from transformers import DistilBertForSequenceClassification, DistilBertTokenizer
import torch
from onnxruntime.quantization import quantize_dynamic, QuantType

In [17]:
# Load classification model and tokenizer
model_name = "distilbert/distilbert-base-uncased-finetuned-sst-2-english"
model = DistilBertForSequenceClassification.from_pretrained(model_name)
tokenizer = DistilBertTokenizer.from_pretrained(model_name)
model.eval()

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [18]:
# Dummy input for export
text = "Once upon a time"
inputs = tokenizer(text, return_tensors="pt")
dummy_input = (inputs['input_ids'], inputs['attention_mask'])

In [19]:
# Export to ONNX
onnx_file_path = "distilbert_classification.onnx"
torch.onnx.export(model, dummy_input, onnx_file_path,
    input_names=['input_ids', 'attention_mask'],
    output_names=['logits'],
    dynamic_axes={
        'input_ids': {0: 'batch_size', 1: 'sequence_length'},
        'attention_mask': {0: 'batch_size', 1: 'sequence_length'},
        'logits': {0: 'batch_size'}
    },
    opset_version=14
)

In [20]:
# Quantize
quantized_model_path = "distilbert_classification_quantized.onnx"
quantize_dynamic(onnx_file_path, quantized_model_path, weight_type=QuantType.QUInt8)

print(f"The quantized classification model has been saved as '{quantized_model_path}'!")



The quantized classification model has been saved as 'distilbert_classification_quantized.onnx'!


In [27]:
# Test the output
import onnxruntime as ort
import numpy as np

# Load tokenizer (same one used during export)
tokenizer = DistilBertTokenizer.from_pretrained("distilbert-base-uncased")
inputs = tokenizer("I love you", return_tensors="np")  # Use NumPy arrays

# Load quantized model
session = ort.InferenceSession("distilbert_classification_quantized.onnx")

# Run inference
outputs = session.run(['logits'], {
    'input_ids': inputs['input_ids'],
    'attention_mask': inputs['attention_mask']
})

# Output is a list; logits will be at index 0
logits = outputs[0]
print("Logits:", logits)

Logits: [[-4.251244  4.613521]]


In [28]:
import numpy as np

# Your logits
logits = logits

# Apply softmax
probs = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)

# Get the predicted class
predicted_class = np.argmax(probs, axis=1)[0]

# Optional: define class labels
labels = ["Negative", "Positive"]
print("Predicted:", labels[predicted_class])
print("Confidence:", probs[0][predicted_class])

Predicted: Positive
Confidence: 0.99985874


In [31]:
!pip install huggingface_hub



In [34]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: write

In [35]:
import os
from transformers import AutoTokenizer, AutoConfig

model_dir = "distilbert-onnx"
os.makedirs(model_dir, exist_ok=True)

# Save tokenizer and config files into the folder
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
config = AutoConfig.from_pretrained("distilbert-base-uncased")
tokenizer.save_pretrained(model_dir)
config.save_pretrained(model_dir)

# Move your quantized ONNX model into that folder
import shutil
shutil.move("distilbert_classification_quantized.onnx", f"{model_dir}/distilbert_classification_quantized.onnx")


'distilbert-onnx/distilbert_classification_quantized.onnx'

In [36]:
from huggingface_hub import create_repo, upload_folder

repo_id = "navneetpal07/distilbert-onnx"

# Create repo on Hugging Face
create_repo(repo_id, repo_type="model", private=False)

# Upload entire folder
upload_folder(
    repo_id=repo_id,
    folder_path=model_dir,
    repo_type="model"
)

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

distilbert_classification.onnx:   0%|          | 0.00/268M [00:00<?, ?B/s]

distilbert_classification_quantized.onnx:   0%|          | 0.00/67.4M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/navneetpal07/distilbert-onnx/commit/a671de7389f1692ee8c6a058e953cd73f786b29e', commit_message='Upload folder using huggingface_hub', commit_description='', oid='a671de7389f1692ee8c6a058e953cd73f786b29e', pr_url=None, repo_url=RepoUrl('https://huggingface.co/navneetpal07/distilbert-onnx', endpoint='https://huggingface.co', repo_type='model', repo_id='navneetpal07/distilbert-onnx'), pr_revision=None, pr_num=None)