In [1]:
from backend.offline.offline_algorithms import Birch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import string

def add_data_frame(birch, df):
    index = df.index.values
    values = df.values
    for i, v in zip(index, values):
        birch._add_data_point(i, v)

In [2]:
def plot_clustering(centers, labels, unique_labels, X):
    plt.plot(centers[:, 0], centers[:, 1], 'x')
    colors = plt.cm.Spectral(np.linspace(0, 1, len(unique_labels)))
    for center, label in zip(centers, range(max(labels) + 1)) :
        #print center
        class_member_mask = (labels == label)
        X_class = X[class_member_mask]
        radius = 0
        for member in X_class:
            distance = np.linalg.norm(member - center)
            if distance > radius:
                radius = distance
        #print radius
        circle = plt.Circle(center,radius,color='r',fill=False)
        plt.gca().add_artist(circle)
    for label, col in zip(unique_labels, colors):
        class_member_mask = (labels == label)
        X_class = X[class_member_mask]
        plt.plot(X_class[:, 0], X_class[:, 1], 'o', markerfacecolor=col)
    plt.show()
    
def plot_cluster_list(centers, clusters, df):
    plt.plot(centers[:, 0], centers[:, 1], 'x')
    colors = plt.cm.Spectral(np.linspace(0, 1, len(clusters)))
    for cluster_indices, col in zip(clusters, colors):
        cluster_data = df.loc[cluster_indices].values
        plt.plot(cluster_data[:, 0], cluster_data[:, 1], 'o', markerfacecolor=col)
    plt.show()
    
def test(birch, df):
    #print birch.count
    #print birch.is_fitted(mode='local')
    #print brc.is_fitted(mode='global')
    #print('')
    #print birch.get_number_of_clusters(mode='local')
    #print birch.get_number_of_clusters(mode='global')
    local_centers, local_clusters = birch.get_cluster_list(mode='local')
    global_centers, global_clusters = birch.get_cluster_list(mode='global')
    #print(local_centers)
    #print(local_clusters)
    print(global_centers)
    print(global_clusters)
    #print(len(local_centers))
    #print(len(global_centers))
    plot_cluster_list(local_centers, local_clusters, df)
    plot_cluster_list(global_centers, global_clusters, df)

In [3]:
mean1 = [10, 10]
mean2 = [20, 20]
mean3 = [30, 30]
mean4 = [40, 40]
mean5 = [50, 50]
cov1 = [[2.5, 0], [0, 2.5]]
cov2 = [[1, 0], [0, 1]]
n = 10
X1= np.random.multivariate_normal(mean1, cov1, n)
X2= np.random.multivariate_normal(mean2, cov1, n)
X3= np.random.multivariate_normal(mean3, cov1, n)
X4 = np.random.multivariate_normal(mean4, cov2, n)
X5 = np.random.multivariate_normal(mean5, cov2, n)
X1_4 = np.vstack((X1, X2, X3, X4))
order = np.arange(len(X1_4))
np.random.shuffle(order)
X1_4 = X1_4[order]
X = np.vstack((X1_4, X5))
#print X
# np.save('test_array', X)


df = pd.DataFrame(X, index=[hex(i) for i in range(len(X))])
#df = pd.DataFrame(X)
#print(df)


threshold = 2
brc = Birch(threshold, 'd1', 'r', 4)
add_data_frame(brc, df.iloc[0:40])
test(brc, df)

#### Incremental adding

add_data_frame(brc, df.iloc[40:])
test(brc, df)


[[ 20.23484904  20.19346678]
 [ 40.39890738  39.54367337]
 [ 30.12107314  29.559135  ]
 [ 10.2774402   10.60103375]]
[['0x0', '0x4', '0x7', '0xa', '0xc', '0xe', '0x14', '0x19', '0x1f', '0x22'], ['0x1', '0x3', '0xd', '0x15', '0x16', '0x1a', '0x1d', '0x21', '0x24', '0x27'], ['0x2', '0x9', '0xb', '0x11', '0x13', '0x18', '0x1c', '0x23', '0x25', '0x26'], ['0x5', '0x1e', '0x6', '0x8', '0xf', '0x10', '0x12', '0x17', '0x1b', '0x20']]
[[ 25.17796109  24.87630089]
 [ 40.39890738  39.54367337]
 [ 10.2774402   10.60103375]
 [ 50.18282742  49.93117015]]
[['0x0', '0x4', '0x7', '0xa', '0xc', '0xe', '0x14', '0x19', '0x1f', '0x22', '0x2', '0x9', '0xb', '0x11', '0x13', '0x18', '0x1c', '0x23', '0x25', '0x26'], ['0x1', '0x3', '0xd', '0x15', '0x16', '0x1a', '0x1d', '0x21', '0x24', '0x27'], ['0x5', '0x1e', '0x6', '0x8', '0xf', '0x10', '0x12', '0x17', '0x1b', '0x20'], ['0x28', '0x29', '0x2a', '0x2b', '0x2c', '0x2d', '0x2e', '0x2f', '0x30', '0x31']]


In [8]:
brc.unique_global_labels

['0.0', '2.0', '1.0', '3.0']