Open
Description
Describe the bug
I have train the Gaussian Process classifier on a 200 points dataset. But it takes 1.5 hour still not get the result. Actually it is not a problem on the intel cpu Mac, but when move the same code on M chip Mac, the problem happens.
Steps/Code to Reproduce
import numpy as np
import pandas as pd
from scipy.special import logsumexp
from sklearn.datasets import make_moons, make_circles, make_classification
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from copy import deepcopy
def generate_data(n, seed, shape='circular', noise=0.5):
np.random.seed(seed)
var = noise
assert n % 2 == 0
if shape == 'circular':
# sample polar coordinates
angles = np.random.uniform(low=0, high=2*np.pi, size=n)
radii = ys = np.random.binomial(n=1, p=0.5, size=n)
# transform to cartesian coordinates and add noise
x1 = np.sin(angles)*radii + np.random.normal(scale=var, size=n)
x2 = np.cos(angles)*radii + np.random.normal(scale=var, size=n)
elif shape == 'binormal':
ys = np.random.binomial(n=1, p=0.5, size=n)
mu_1 = 0.5 - ys
mu_2 = ys - 0.5
x1 = np.random.normal(loc=mu_1, scale=var, size=n)
x2 = np.random.normal(loc=mu_2, scale=var, size=n)
elif shape == 'moon':
pass
xs = np.array([x1, x2]).T
return xs, ys
def get_datasets(seed, n_samples=100, n_test_samples=200):
moon_set = (
make_moons(n_samples=n_samples, noise=0.3, random_state=seed),
make_moons(n_samples=n_test_samples, noise=0.3, random_state=seed+1000)
)
circular_set = (
generate_data(n=n_samples, shape='circular', seed=seed, noise=0.3),
generate_data(n=n_test_samples, shape='circular', seed=seed+1000, noise=0.3),
)
binormal_set = (
generate_data(n=n_samples, shape='binormal', seed=seed, noise=0.6),
generate_data(n=n_test_samples, shape='binormal', seed=seed+1000, noise=0.6),
)
return moon_set, circular_set, binormal_set
def get_models(clf, reps, n_samples=100):
results = [[], [], []]
for seed in range(reps):
for ds_cnt, ((X_train, y_train), (X_test, y_test)) in enumerate(get_datasets(seed, n_samples=n_samples)):
new_clf = deepcopy(clf)
new_clf.fit(X_train, y_train)
results[ds_cnt].append(new_clf)
return results
get_models(GaussianProcessClassifier(1.0 * RBF(1.0)), reps=64, n_samples=200)
Expected Results
get the trained model
Actual Results
Runtime too long didn't get the result. So I stop it by KeyboardInterrupt
Versions
System:
python: 3.10.4 (main, Mar 27 2024, 14:28:43) [Clang 15.0.0 (clang-1500.3.9.4)]
executable: /Users/hb70ur/.pyenv/versions/3.10.4/envs/UE/bin/python
machine: macOS-14.4-arm64-arm-64bit
Python dependencies:
sklearn: 1.1.2
pip: 24.0
setuptools: 58.1.0
numpy: 1.26.4
scipy: 1.12.0
Cython: None
pandas: 2.2.1
matplotlib: 3.8.3
joblib: 1.3.2
threadpoolctl: 3.4.0
Built with OpenMP: True
threadpoolctl info:
user_api: openmp
internal_api: openmp
num_threads: 12
prefix: libomp
filepath: /Users/hb70ur/.pyenv/versions/3.10.4/envs/UE/lib/python3.10/site-packages/sklearn/.dylibs/libomp.dylib
version: None
user_api: blas
internal_api: openblas
num_threads: 12
prefix: libopenblas
filepath: /Users/hb70ur/.pyenv/versions/3.10.4/envs/UE/lib/python3.10/site-packages/numpy/.dylibs/libopenblas64_.0.dylib
version: 0.3.23.dev
threading_layer: pthreads
architecture: armv8
user_api: blas
internal_api: openblas
num_threads: 12
prefix: libopenblas
filepath: /Users/hb70ur/.pyenv/versions/3.10.4/envs/UE/lib/python3.10/site-packages/scipy/.dylibs/libopenblas.0.dylib
version: 0.3.21.dev
threading_layer: pthreads
architecture: armv8