In [1]:
import numpy as np
import onnxruntime as rt
from catboost import CatBoostClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

from sneakers_ml.features.features import get_train_val_test
from sneakers_ml.models.onnx import save_sklearn_onnx

In [3]:
X_train, X_val, X_test, y_train, y_val, y_test = get_train_val_test("data/features/hog", "brands-classification")

In [5]:
param_grid = {"C": [0.1, 1, 10], "gamma": ["scale", "auto"], "kernel": ["linear"]}

svc = SVC()
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=5, scoring="accuracy", verbose=1)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits


In [7]:
pred = grid_search.best_estimator_.predict(X_test)
accuracy_score(y_test, pred)

0.7362534948741846

In [10]:
save_sklearn_onnx(grid_search.best_estimator_, X_train, "data/models/HOG-SVM-73acc.onnx")

In [11]:
param_grid = {"loss": ["log_loss", "hinge"], "alpha": [0.0001, 0.001]}

sgd = SGDClassifier()
grid_search = GridSearchCV(estimator=sgd, param_grid=param_grid, cv=5, scoring="accuracy", verbose=1)
grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 4 candidates, totalling 20 fits


In [14]:
pred = grid_search.best_estimator_.predict(X_test)
accuracy_score(y_test, pred)

0.7110904007455732

In [15]:
save_sklearn_onnx(grid_search.best_estimator_, X_train, "data/models/HOG-SGD-71acc.onnx")

In [2]:
model = CatBoostClassifier(verbose=True, iterations=200)

model.fit(X_train, y_train, eval_set=(X_val, y_val))

NameError: name 'X_train' is not defined

In [5]:
pred = model.predict(X_test)
accuracy_score(y_test, pred)

0.6160298229263746

In [6]:
model.save_model(
    "data/models/HOG-CatBoost-61acc.onnx",
    format="onnx",
    export_parameters={
        "onnx_domain": "ai.catboost",
        "onnx_model_version": 1,
        "onnx_doc_string": "iterations=100 default model",
        "onnx_graph_name": "CatBoostModel_for_MultiClassification",
    },
)

In [9]:
sess = rt.InferenceSession("data/models/HOG-CatBoost-61acc.onnx")

label, probabilities = sess.run(["label", "probabilities"], {"features": X_test.astype(np.float32)})

In [10]:
accuracy_score(y_test, label)

0.6160298229263746