In [1]:
import numpy as np
from sklearn.cluster import MeanShift, estimate_bandwidth
from sklearn.datasets import make_blobs


In [9]:
# #############################################################################
# Generate sample data
centers = [[1, 1], [-1, -1], [1, -1]]
X, _ = make_blobs(n_samples=10000, centers=centers, cluster_std=0.6)


In [10]:
X

array([[-0.65207096, -0.37073488],
       [ 0.22335351, -0.77303389],
       [ 1.98714369,  1.5418393 ],
       ...,
       [-1.27684009, -1.60643971],
       [ 0.74644675, -0.23451677],
       [ 1.18465668, -0.25175197]])

In [5]:
# #############################################################################
# Compute clustering with MeanShift

# The following bandwidth can be automatically detected using
bandwidth = estimate_bandwidth(X, quantile=0.2, n_samples=500)

In [6]:
bandwidth

1.0225502744029276

In [None]:



ms = MeanShift(bandwidth=bandwidth, bin_seeding=True)
ms.fit(X)
labels = ms.labels_
cluster_centers = ms.cluster_centers_

labels_unique = np.unique(labels)
n_clusters_ = len(labels_unique)

print("number of estimated clusters : %d" % n_clusters_)

# #############################################################################
# Plot result
import matplotlib.pyplot as plt
from itertools import cycle

plt.figure(1)
plt.clf()

colors = cycle('bgrcmykbgrcmykbgrcmykbgrcmyk')
for k, col in zip(range(n_clusters_), colors):
    my_members = labels == k
    cluster_center = cluster_centers[k]
    plt.plot(X[my_members, 0], X[my_members, 1], col + '.')
    plt.plot(cluster_center[0], cluster_center[1], 'o', markerfacecolor=col,
             markeredgecolor='k', markersize=14)
plt.title('Estimated number of clusters: %d' % n_clusters_)
plt.show()

In [11]:
from sklearn.cluster import DBSCAN
import numpy as np
X = np.array([[1, 2], [2, 2], [2, 3],
              [8, 7], [8, 8], [25, 80]])
clustering = DBSCAN(eps=3, min_samples=2).fit(X)
clustering.labels_

array([ 0,  0,  0,  1,  1, -1], dtype=int64)

In [12]:
X

array([[ 1,  2],
       [ 2,  2],
       [ 2,  3],
       [ 8,  7],
       [ 8,  8],
       [25, 80]])

In [18]:
from sklearn.mixture import GaussianMixture

In [19]:
gm = GaussianMixture().fit(X)

In [17]:
gm.covariances_

array([[[9.7600010e+00, 7.9200000e+00],
        [7.9200000e+00, 6.6400010e+00]],

       [[1.0000000e-06, 9.6935228e-27],
        [9.6935228e-27, 1.0000000e-06]]])

In [1]:
from sklearn.datasets import load_iris
from sklearn.gaussian_process import GaussianProcessClassifier
# from sklearn.gaussian_process.kernels import RBF
X, y = load_iris(return_X_y=True)
# kernel = 1.0 * RBF(1.0)
gpc = GaussianProcessClassifier().fit(X, y)
gpc.score(X, y)

gpc.predict_proba(X[:2,:])


array([[0.88823135, 0.0560192 , 0.05574944],
       [0.86374261, 0.06999318, 0.06626421]])

In [2]:
from sklearn.ensemble import RandomForestClassifier

from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=4,
                           n_informative=2, n_redundant=0,
                           random_state=0, shuffle=False)
clf = RandomForestClassifier()
clf.fit(X, y)

print(clf.predict([[0, 0, 0, 0]]))

[1]


In [3]:
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis