In [2]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
model.eval()  # Set model to inference mode


  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [4]:
import os

dummy_input = tokenizer("This is a great product!", return_tensors="pt")
output_path = "bert_sentiment.onnx"

torch.onnx.export(
    model,                       # Model
    (dummy_input["input_ids"],  # Inputs
     dummy_input["attention_mask"]),
    output_path,                # Output file
    input_names=["input_ids", "attention_mask"],
    output_names=["output"],
    dynamic_axes={
        "input_ids": {0: "batch_size", 1: "sequence"},
        "attention_mask": {0: "batch_size", 1: "sequence"},
        "output": {0: "batch_size"}
    },
    opset_version=14
)


In [8]:
import onnxruntime as ort
import numpy as np

# Create ONNX session
session = ort.InferenceSession("bert_sentiment.onnx")

def predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="np")
    input_ids = inputs["input_ids"].astype("int64")  # ✅ Cast to int64
    attention_mask = inputs["attention_mask"].astype("int64")  # ✅ Cast to int64
    
    outputs = session.run(None, {
        "input_ids": input_ids,
        "attention_mask": attention_mask
    })

    logits = outputs[0]
    predicted_class = int(np.argmax(logits, axis=1)[0])
    return "Positive" if predicted_class == 1 else "Negative"

print(predict_sentiment("I love this!"))  # Example


Positive


In [11]:
import nest_asyncio
import uvicorn

nest_asyncio.apply()

In [12]:
from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()

class TextRequest(BaseModel):
    text: str

@app.post("/predict")
def predict(req: TextRequest):
    sentiment = predict_sentiment(req.text)
    return {"sentiment": sentiment}


In [13]:
import threading

def run_api():
    uvicorn.run(app, host="0.0.0.0", port=8000)

thread = threading.Thread(target=run_api)
thread.start()


INFO:     Started server process [15908]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:52610 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:52610 - "GET /openapi.json HTTP/1.1" 200 OK
INFO:     127.0.0.1:52611 - "POST /predict HTTP/1.1" 200 OK
INFO:     127.0.0.1:52611 - "POST /predict HTTP/1.1" 200 OK
INFO:     127.0.0.1:52631 - "POST /predict HTTP/1.1" 200 OK
