# Sentiment analysis model training

Let's train a simple binary classifier using Scikit-Learn, and convert the pipeline to ONNX format.

In [None]:
from pathlib import Path

import nltk.corpus
import onnxruntime as ort
import pandas as pd
import skl2onnx
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import TfidfVectorizer

Download and prepare the dataset from NLTK movie reviews:

In [None]:
nltk.download("movie_reviews")
dataset_classes = nltk.corpus.movie_reviews.categories()
dataset = pd.DataFrame(
    [
        {
            "text": nltk.corpus.movie_reviews.raw(fileid),
            "sentiment": fileid.split("/")[0],
        }
        for fileid in nltk.corpus.movie_reviews.fileids()
    ]
)
dataset

Split the dataset for train and test sets:

In [None]:
X = dataset["text"]
y = dataset["sentiment"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

Train a Scikit-Learning pipeline, including vectorization / normalization step and a classification model:

In [None]:
pipeline = Pipeline(
    [
        ("tf-idf", TfidfVectorizer()),
        ("classifier", LogisticRegression()),
    ]
)
pipeline.fit(X_train, y_train)

Score the model (accuracy) on the test set:

In [None]:
pipeline.score(X_test, y_test)

Try to predict the sentiment of sample sentences:

In [None]:
pipeline.predict(["a nice and good take"])

In [None]:
pipeline.predict(["it hurts so bad"])

Export the model to ONNX format using `skl2onnx`:

In [None]:
onnx_options = {id(pipeline): {"zipmap": False, "output_class_labels": True}}
onnx_model = skl2onnx.to_onnx(pipeline, X_train[:1].values, options=onnx_options)
onnx_model_path = Path() / "model.onnx"
onnx_model_path.write_bytes(onnx_model.SerializeToString())

Load the ONNX model and run inference on a sample sentence:

In [None]:
session = ort.InferenceSession(onnx_model_path, providers=ort.get_available_providers())
session.run(None, {"X": ["it hurts so bad"]})