In [11]:
from catboost import CatBoostClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

from sneakers_ml.features.features import get_train_val_test
from sneakers_ml.models.onnx import save_sklearn_onnx

In [12]:
x_train, x_val, x_test, y_train, y_val, y_test = get_train_val_test(
    "data/features/brands-classification-splits", "resnet"
)

In [13]:
param_grid = {"C": [0.1, 1, 10], "gamma": ["scale", "auto"], "kernel": ["linear"]}
svc = SVC()
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=5, scoring="accuracy", verbose=1, n_jobs=-1)
grid_search.fit(x_train, y_train)
pred = grid_search.best_estimator_.predict(x_test)
print(f"Acc: {accuracy_score(y_test, pred)}")
print(f"F1-weighted: {f1_score(y_test,pred,average='weighted')}")
print(f"F1-macro: {f1_score(y_test,pred,average='macro')}")

Fitting 5 folds for each of 6 candidates, totalling 30 fits


Acc: 0.7027027027027027
F1-weighted: 0.6991629247827992
F1-macro: 0.6787583310454817


In [14]:
save_sklearn_onnx(grid_search.best_estimator_, x_train, "data/models/brands-classification/resnet-svc.onnx")

In [15]:
param_grid = {"loss": ["log_loss", "hinge"], "alpha": [0.0001, 0.001]}
sgd = SGDClassifier()
grid_search = GridSearchCV(estimator=sgd, param_grid=param_grid, cv=5, scoring="accuracy", verbose=1, n_jobs=-1)
grid_search.fit(x_train, y_train)
pred = grid_search.best_estimator_.predict(x_test)
print(f"Acc: {accuracy_score(y_test, pred)}")
print(f"F1-weighted: {f1_score(y_test,pred,average='weighted')}")
print(f"F1-macro: {f1_score(y_test,pred,average='macro')}")

Fitting 5 folds for each of 4 candidates, totalling 20 fits
Acc: 0.7129543336439889
F1-weighted: 0.7114958865432788
F1-macro: 0.6910893282450099


In [16]:
grid_search.best_params_

{'alpha': 0.0001, 'loss': 'log_loss'}

In [17]:
save_sklearn_onnx(grid_search.best_estimator_, x_train, "data/models/brands-classification/resnet-sgd.onnx")

In [18]:
model = CatBoostClassifier(verbose=True, iterations=500, task_type="GPU")
model.fit(x_train, y_train, eval_set=(x_val, y_val))
pred = model.predict(x_test)



Learning rate set to 0.160002
0:	learn: 2.3596870	test: 2.3876313	best: 2.3876313 (0)	total: 240ms	remaining: 1m 59s
1:	learn: 2.2135173	test: 2.2611091	best: 2.2611091 (1)	total: 411ms	remaining: 1m 42s
2:	learn: 2.1136338	test: 2.1751617	best: 2.1751617 (2)	total: 570ms	remaining: 1m 34s
3:	learn: 2.0277172	test: 2.0950377	best: 2.0950377 (3)	total: 725ms	remaining: 1m 29s
4:	learn: 1.9651785	test: 2.0431752	best: 2.0431752 (4)	total: 868ms	remaining: 1m 25s
5:	learn: 1.9129020	test: 2.0022276	best: 2.0022276 (5)	total: 868ms	remaining: 1m 25s
6:	learn: 1.8644193	test: 1.9680372	best: 1.9680372 (6)	total: 3.11s	remaining: 4m 15s
7:	learn: 1.8108242	test: 1.9276837	best: 1.9276837 (7)	total: 3.26s	remaining: 3m 49s
8:	learn: 1.7733745	test: 1.9022018	best: 1.9022018 (8)	total: 3.42s	remaining: 3m 29s
9:	learn: 1.7389288	test: 1.8804485	best: 1.8804485 (9)	total: 3.57s	remaining: 3m 14s
10:	learn: 1.7115859	test: 1.8607403	best: 1.8607403 (10)	total: 3.71s	remaining: 3m 1s
11:	learn: 1

In [19]:
print(f"Acc: {accuracy_score(y_test, pred)}")
print(f"F1-weighted: {f1_score(y_test,pred,average='weighted')}")
print(f"F1-macro: {f1_score(y_test,pred,average='macro')}")

Acc: 0.6728797763280522
F1-weighted: 0.6548250406045764
F1-macro: 0.6137248140743309


In [20]:
model.save_model(
    "data/models/brands-classification/resnet-catboost.onnx",
    format="onnx",
    export_parameters={
        "onnx_domain": "ai.catboost",
        "onnx_model_version": 1,
        "onnx_doc_string": "iterations=500 default model",
        "onnx_graph_name": "CatBoostModel_for_MultiClassification",
    },
)