In [1]:
import numpy as np
from catboost import CatBoostClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

from sneakers_ml.features.sift import get_sift_features
from sneakers_ml.models.onnx import save_sklearn_onnx

In [2]:
x_train, y_train, class_to_idx, kmeans, vector_for_kmeans_save = get_sift_features(
    "data/training/brands-classification-splits/train", 2000, kmeans=None
)
x_val, y_val, class_to_idx, kmeans, vector_for_kmeans_save = get_sift_features(
    "data/training/brands-classification-splits/val", 2000, kmeans=kmeans
)
x_test, y_test, class_to_idx, kmeans, vector_for_kmeans_save = get_sift_features(
    "data/training/brands-classification-splits/test", 2000, kmeans=kmeans
)
x_train_val = np.concatenate((x_train, x_val), axis=0)
y_train_val = np.concatenate((y_train, y_val))

  0%|          | 0/3180 [00:00<?, ?it/s]

  0%|          | 0/3180 [00:00<?, ?it/s]

  0%|          | 0/3180 [00:00<?, ?it/s]

  0%|          | 0/1056 [00:00<?, ?it/s]

  0%|          | 0/1056 [00:00<?, ?it/s]

  0%|          | 0/1056 [00:00<?, ?it/s]

  0%|          | 0/1073 [00:00<?, ?it/s]

  0%|          | 0/1073 [00:00<?, ?it/s]

  0%|          | 0/1073 [00:00<?, ?it/s]

In [3]:
save_sklearn_onnx(kmeans, vector_for_kmeans_save, "data/models/brands-classification/sift-kmeans.onnx")

In [4]:
y_train = y_train[:, 1]
y_val = y_val[:, 1]
y_test = y_test[:, 1]
y_train_val = np.concatenate((y_train, y_val))

In [5]:
# x_train, x_val, x_test, y_train, y_val, y_test =
# get_train_val_test("data/features/brands-classification-splits", "hog")
# x_train_val = np.concatenate((x_train, x_val), axis=0)
# y_train_val = np.concatenate((y_train, y_val))

In [6]:
param_grid = {"C": [0.1, 1, 10, 100], "gamma": ["scale", "auto"], "kernel": ["linear", "rbf"]}
svc = SVC()
grid_search = GridSearchCV(estimator=svc, param_grid=param_grid, cv=3, scoring="f1_macro", verbose=10, n_jobs=-1)
grid_search.fit(x_train_val, y_train_val)
pred = grid_search.best_estimator_.predict(x_test)
print(f"F1-weighted: {f1_score(y_train_val, grid_search.best_estimator_.predict(x_train_val), average='weighted')}")
print(f"Acc: {accuracy_score(y_test, pred)}")
print(f"F1-weighted: {f1_score(y_test, pred, average='weighted')}")
print(f"F1-macro: {f1_score(y_test, pred, average='macro')}")

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV 3/3; 2/16] START C=0.1, gamma=scale, kernel=rbf.............................
[CV 1/3; 1/16] START C=0.1, gamma=scale, kernel=linear..........................
[CV 3/3; 1/16] START C=0.1, gamma=scale, kernel=linear..........................
[CV 2/3; 2/16] START C=0.1, gamma=scale, kernel=rbf.............................
[CV 1/3; 4/16] START C=0.1, gamma=auto, kernel=rbf..............................
[CV 1/3; 2/16] START C=0.1, gamma=scale, kernel=rbf.............................
[CV 3/3; 4/16] START C=0.1, gamma=auto, kernel=rbf..............................
[CV 2/3; 3/16] START C=0.1, gamma=auto, kernel=linear...........................
[CV 2/3; 4/16] START C=0.1, gamma=auto, kernel=rbf..............................
[CV 2/3; 1/16] START C=0.1, gamma=scale, kernel=linear..........................
[CV 1/3; 3/16] START C=0.1, gamma=auto, kernel=linear...........................
[CV 3/3; 3/16] START C=0.1, gamma=auto, kernel=l

In [7]:
grid_search.best_params_

{'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}

In [8]:
save_sklearn_onnx(grid_search.best_estimator_, x_train, "data/models/brands-classification/sift-svc.onnx")

In [9]:
param_grid = {"loss": ["log_loss", "hinge"], "alpha": [0.0001, 0.001, 0.00001], "penalty": ["l2", "elasticnet"]}
sgd = SGDClassifier()
grid_search = GridSearchCV(estimator=sgd, param_grid=param_grid, cv=3, scoring="f1_macro", verbose=1, n_jobs=-1)
grid_search.fit(x_train_val, y_train_val)
pred = grid_search.best_estimator_.predict(x_test)
print(f"F1-weighted: {f1_score(y_train_val, grid_search.best_estimator_.predict(x_train_val), average='weighted')}")
print(f"Acc: {accuracy_score(y_test, pred)}")
print(f"F1-weighted: {f1_score(y_test,pred,average='weighted')}")
print(f"F1-macro: {f1_score(y_test,pred,average='macro')}")

Fitting 3 folds for each of 12 candidates, totalling 36 fits
F1-weighted: 0.9631031875972167
Acc: 0.47996272134203166
F1-weighted: 0.46698655944020334
F1-macro: 0.4032191175025905


In [10]:
grid_search.best_params_

{'alpha': 0.001, 'loss': 'log_loss', 'penalty': 'l2'}

In [11]:
save_sklearn_onnx(grid_search.best_estimator_, x_train, "data/models/brands-classification/sift-sgd.onnx")

In [12]:
model = CatBoostClassifier(verbose=True, iterations=1000, task_type="GPU")
model.fit(x_train_val, y_train_val)

Learning rate set to 0.087133
0:	learn: 2.4624455	total: 44.8ms	remaining: 44.7s
1:	learn: 2.3893801	total: 75.7ms	remaining: 37.8s
2:	learn: 2.3326657	total: 101ms	remaining: 33.7s
3:	learn: 2.2891711	total: 123ms	remaining: 30.7s
4:	learn: 2.2546534	total: 157ms	remaining: 31.2s
5:	learn: 2.2250324	total: 188ms	remaining: 31.1s
6:	learn: 2.1990509	total: 220ms	remaining: 31.1s
7:	learn: 2.1767889	total: 250ms	remaining: 31s
8:	learn: 2.1566735	total: 281ms	remaining: 30.9s
9:	learn: 2.1401955	total: 299ms	remaining: 29.6s
10:	learn: 2.1246579	total: 311ms	remaining: 28s
11:	learn: 2.1086126	total: 323ms	remaining: 26.6s
12:	learn: 2.0959848	total: 336ms	remaining: 25.5s
13:	learn: 2.0849222	total: 357ms	remaining: 25.1s
14:	learn: 2.0743386	total: 373ms	remaining: 24.5s
15:	learn: 2.0634164	total: 383ms	remaining: 23.6s
16:	learn: 2.0536787	total: 402ms	remaining: 23.2s
17:	learn: 2.0458562	total: 416ms	remaining: 22.7s
18:	learn: 2.0383660	total: 430ms	remaining: 22.2s
19:	learn: 2.

<catboost.core.CatBoostClassifier at 0x7fc22e4cb280>

In [13]:
pred = model.predict(x_test)
print(f"Acc: {accuracy_score(y_test, pred)}")
print(f"F1-weighted: {f1_score(y_test,pred,average='weighted')}")
print(f"F1-macro: {f1_score(y_test,pred,average='macro')}")

Acc: 0.49487418452935694
F1-weighted: 0.4493902504659323
F1-macro: 0.35629033908769914


In [14]:
model.save_model(
    "data/models/brands-classification/sift-catboost.onnx",
    format="onnx",
    export_parameters={
        "onnx_domain": "ai.catboost",
        "onnx_model_version": 1,
        "onnx_doc_string": "default model",
        "onnx_graph_name": "CatBoostModel_for_MultiClassification",
    },
)