In [8]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, StratifiedKFold, cross_validate
from sklearn.svm import SVC

from preprocessing import TrainingPreProcessor

RANDOM_STATE = 0

train_set = pd.read_csv("input/train.csv")

training_preprocessor = TrainingPreProcessor()
training_preprocessor.fit(train_set, ignore_columns=["CLIENTNUM"])

X, y = training_preprocessor.transform(train_set)
class_weight = training_preprocessor.class_weight

In [4]:
svc_model = SVC(random_state=RANDOM_STATE, class_weight=class_weight)

## Grid search para SVC

In [9]:
kfold = StratifiedKFold(5, shuffle=True, random_state=RANDOM_STATE)
parameters = {
    "C": np.logspace(-3, 3, 7),
    "kernel": [
        "linear",
        "poly",
        "rbf",
        "sigmoid",
    ],
    "max_iter": [1000, 10000],
    "tol": [1e-3, 1e-4, 1e-5],
}

In [4]:
grid_search = GridSearchCV(
    svc_model,
    parameters,
    scoring="balanced_accuracy",
    cv=kfold,
    verbose=3,
    n_jobs=-1,
)

In [5]:
grid_search.fit(X, y)
print(grid_search.best_params_)
print(grid_search.best_score_)
grid_search.best_estimator_

Fitting 5 folds for each of 168 candidates, totalling 840 fits
{'C': 1.0, 'kernel': 'rbf', 'max_iter': 10000, 'tol': 0.001}
0.9028567836731707


In [10]:
best_model = SVC(
    random_state=RANDOM_STATE,
    class_weight=class_weight,
    **{"C": 1.0, "kernel": "rbf", "max_iter": 10000, "tol": 0.001}
)

In [11]:
metrics = cross_validate(
    best_model,
    X,
    y,
    cv=kfold,
    scoring=['accuracy', 'balanced_accuracy', 'f1', 'roc_auc'],
)
print(f"accuracy: {metrics['test_accuracy'].mean():.4f} +- {metrics['test_accuracy'].std():.4f}")
print(f"balanced_accuracy: {metrics['test_balanced_accuracy'].mean():.4f} +- {metrics['test_balanced_accuracy'].std():.4f}")
print(f"f1: {metrics['test_f1'].mean():.4f} +- {metrics['test_f1'].std():.4f}")
print(f"roc_auc: {metrics['test_roc_auc'].mean():.4f} +- {metrics['test_roc_auc'].std():.4f}")

accuracy: 0.9121 +- 0.0064
balanced_accuracy: 0.9029 +- 0.0126
f1: 0.9460 +- 0.0040
roc_auc: 0.9643 +- 0.0052
