In [2]:
from pathlib import Path

import numpy as np
from catboost import CatBoostClassifier
from skl2onnx import to_onnx
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

from sneakers_ml.features.features import get_train_val_test
from sneakers_ml.models.onnx import save_sklearn_onnx

In [3]:
x_train, x_val, x_test, y_train, y_val, y_test = get_train_val_test(
    "data/features/brands-classification-splits", "sift"
)

In [None]:
x_train = [arr[:128] for arr in x_train]
x_val = [arr[:128] for arr in x_val]
x_test = [arr[:128] for arr in x_test]

In [30]:
param_grid = {"C": [0.1, 1], "kernel": ["linear"]}
svc = SVC()
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=3, scoring="accuracy", verbose=1, n_jobs=-1)
grid_search.fit(x_train, y_train)
pred = grid_search.best_estimator_.predict(x_test)
print(f"Acc: {accuracy_score(y_test, pred)}")
print(f"F1-weighted: {f1_score(y_test,pred,average='weighted')}")
print(f"F1-macro: {f1_score(y_test,pred,average='macro')}")

Fitting 3 folds for each of 6 candidates, totalling 18 fits


KeyboardInterrupt: 

In [None]:
save_sklearn_onnx(grid_search.best_estimator_, x_train, "data/models/brands-classification/sift-svc.onnx")

In [31]:
param_grid = {"loss": ["log_loss", "hinge"], "alpha": [0.0001, 0.001]}
sgd = SGDClassifier()
grid_search = GridSearchCV(estimator=sgd, param_grid=param_grid, cv=5, scoring="accuracy", verbose=1, n_jobs=-1)
grid_search.fit(x_train, y_train)
pred = grid_search.best_estimator_.predict(x_test)
print(f"Acc: {accuracy_score(y_test, pred)}")
print(f"F1-weighted: {f1_score(y_test,pred,average='weighted')}")
print(f"F1-macro: {f1_score(y_test,pred,average='macro')}")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Acc: 0.17148182665424044
F1-weighted: 0.08466977247699556
F1-macro: 0.055014817534028225


In [32]:
grid_search.best_params_

{'alpha': 0.0001, 'loss': 'log_loss'}

In [37]:
onx = to_onnx(grid_search.best_estimator_, x_train[0].astype(np.float32))
with Path("data/models/brands-classification/sift-sgd.onnx").open("wb") as file:
    file.write(onx.SerializeToString())

In [39]:
model = CatBoostClassifier(verbose=True, iterations=500)
model.fit(x_train, y_train, eval_set=(x_val, y_val))
pred = model.predict(x_test)

Learning rate set to 0.144002
0:	learn: 2.4462049	test: 2.4496936	best: 2.4496936 (0)	total: 96.8ms	remaining: 48.3s
1:	learn: 2.3736972	test: 2.3782601	best: 2.3782601 (1)	total: 141ms	remaining: 35.1s
2:	learn: 2.3161040	test: 2.3261772	best: 2.3261772 (2)	total: 182ms	remaining: 30.2s
3:	learn: 2.2758664	test: 2.2866884	best: 2.2866884 (3)	total: 225ms	remaining: 27.8s
4:	learn: 2.2483453	test: 2.2607747	best: 2.2607747 (4)	total: 269ms	remaining: 26.6s
5:	learn: 2.2189307	test: 2.2371443	best: 2.2371443 (5)	total: 319ms	remaining: 26.2s
6:	learn: 2.1966348	test: 2.2200565	best: 2.2200565 (6)	total: 367ms	remaining: 25.8s
7:	learn: 2.1756628	test: 2.2049717	best: 2.2049717 (7)	total: 416ms	remaining: 25.6s
8:	learn: 2.1569282	test: 2.1926738	best: 2.1926738 (8)	total: 461ms	remaining: 25.1s
9:	learn: 2.1436150	test: 2.1828715	best: 2.1828715 (9)	total: 516ms	remaining: 25.3s
10:	learn: 2.1280278	test: 2.1723605	best: 2.1723605 (10)	total: 578ms	remaining: 25.7s
11:	learn: 2.1076529	

In [40]:
print(f"Acc: {accuracy_score(y_test, pred)}")
print(f"F1-weighted: {f1_score(y_test,pred,average='weighted')}")
print(f"F1-macro: {f1_score(y_test,pred,average='macro')}")

Acc: 0.37092264678471576
F1-weighted: 0.3064320072199292
F1-macro: 0.18959655855524382


In [41]:
model.save_model(
    "data/models/brands-classification/sift-catboost.onnx",
    format="onnx",
    export_parameters={
        "onnx_domain": "ai.catboost",
        "onnx_model_version": 1,
        "onnx_doc_string": "iterations=500 default model",
        "onnx_graph_name": "CatBoostModel_for_MultiClassification",
    },
)