In [None]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC

from umap import UMAP
import warnings
warnings.filterwarnings("ignore")
# Make a toy dataset
X, y = make_classification(
    n_samples=1000,
    n_features=300,
    n_informative=250,
    n_redundant=0,
    n_repeated=0,
    n_classes=2,
    random_state=1212,
)

# Split the dataset into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Classification with a linear SVM
svc = LinearSVC(dual=False, random_state=123, verbose=4)
params_grid = {"C": [10**k for k in range(-3, 4)]}
clf = GridSearchCV(svc, params_grid)
clf.fit(X_train, y_train)
print(
    "Accuracy on the test set with raw data: {:.3f}".format(clf.score(X_test, y_test))
)

# Transformation with UMAP followed by classification with a linear SVM
umap = UMAP(random_state=42)
pipeline = Pipeline([("umap", umap), ("svc", svc)], verbose=True)
params_grid_pipeline = {
    "umap__n_neighbors": [5, 10, 15, 20],
    "umap__n_components": [2, 4, 5, 10, 50, 100],
    "svc__C": [10**k for k in range(-3, 2)],
}


clf_pipeline = GridSearchCV(pipeline, params_grid_pipeline, return_train_score=True, verbose=4)
clf_pipeline.fit(X_train, y_train)
print(
    "Accuracy on the test set with UMAP transformation: {:.3f}".format(
        clf_pipeline.score(X_test, y_test)
    )
)

[LibLinear]iter  1 act 4.498e-01 pre 4.316e-01 delta 9.952e-02 f 6.400e-01 |g| 1.218e+01 CG   5
cg reaches trust region boundary
iter  2 act 6.009e-02 pre 5.232e-02 delta 1.173e-01 f 1.902e-01 |g| 2.064e+00 CG  13
iter  3 act 2.752e-02 pre 2.582e-02 delta 1.173e-01 f 1.301e-01 |g| 9.359e-01 CG  16
iter  4 act 2.291e-03 pre 5.600e-03 delta 3.687e-02 f 1.026e-01 |g| 3.977e-01 CG  17
iter  5 act 3.725e-03 pre 3.862e-03 delta 3.687e-02 f 1.003e-01 |g| 6.212e-01 CG  11
iter  6 act 2.693e-04 pre 3.271e-04 delta 3.687e-02 f 9.657e-02 |g| 1.292e-01 CG  17
iter  7 act 5.328e-05 pre 5.328e-05 delta 3.687e-02 f 9.630e-02 |g| 7.530e-02 CG   9
iter  8 act 1.980e-06 pre 1.980e-06 delta 3.687e-02 f 9.624e-02 |g| 6.853e-03 CG  20
[LibLinear]iter  1 act 4.393e-01 pre 4.200e-01 delta 9.702e-02 f 6.400e-01 |g| 1.234e+01 CG   5
cg reaches trust region boundary
iter  2 act 7.039e-02 pre 6.073e-02 delta 1.154e-01 f 2.007e-01 |g| 2.268e+00 CG  13
iter  3 act 2.661e-02 pre 2.530e-02 delta 1.154e-01 f 1.303e-0