In [1]:
import numpy as np
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import make_blobs
from sklearn.utils import shuffle
from sklearn.utils._testing import create_memmap_backed_data, set_random_state
from sklearn.utils import check_array

In [2]:
X, y = make_blobs(n_samples=50, random_state=1)
X, y = shuffle(X, y, random_state=7)
X = StandardScaler().fit_transform(X)
rng = np.random.RandomState(7)
X_noise = np.concatenate([X, rng.uniform(low=-3, high=3, size=(5, 2))])

# parameters calculsted via the agglomerative initialization part of GaussianMixtureIC
params = {'covariance_type': 'tied', 'init_params': 'kmeans', 'max_iter': 5,
        'means_init': np.array([[-0.28980479, -1.11348895], [1.29539205, 1.27690036],
        [-1.0226346, -0.22891076]]), 'n_components': 3, 'n_init': 1,
        'precisions_init': np.array([[22.6024477, 6.76433097],
        [ 6.76433097, 34.21063503]]), 'random_state': 0, 'reg_covar': 0,
        'weights_init': np.array([0.32, 0.34, 0.34])}

gm = GaussianMixture(**params)
set_random_state(gm)

# without memmap
gm.fit(X)
print('without memmap:', gm.bic(X))

# using memmap
X, y, X_noise = create_memmap_backed_data([X, y, X_noise])
gm.fit(X)
print('using memmap:', gm.bic(X))

# with lists (as in "sklearn/utils/estimator_checks.py::check_clustering")
gm.fit(X.tolist())
print('with lists:', gm.bic(X))

# try using a copy
gm.fit(X.copy())
print('using a copy:', gm.bic(X))

without memmap: 105.20017905516684
using memmap: 105.20017905516679
with lists: 105.20017905516684
using a copy: 105.20017905516684
