In [1]:
import time
import numpy as np
from scipy.spatial.distance import cdist
from sklearn.cluster import KMeans


def fcm(data, n_clusters=1, n_init=30, m=2, max_iter=1, tol=1e-16):

    min_cost = np.inf
    for iter_init in range(n_init):

        # Randomly initialize centers
#         centers = data[np.random.choice(
#             data.shape[0], size=n_clusters, replace=False
#             ), :]
        centers=[[1,2], [8,7],[14, 5]]

        # Compute initial distances
        # Zeros are replaced by eps to avoid division issues
        dist = np.fmax(
            cdist(centers, data, metric='sqeuclidean'),
            np.finfo(np.float64).eps
        )

        for iter1 in range(max_iter):

            # Compute memberships       
            u = (1 / dist) ** (1 / (m-1))
            um = (u / u.sum(axis=0))**m

            # Recompute centers
            prev_centers = centers
            centers = um.dot(data) / um.sum(axis=1)[:, None]

            dist = cdist(centers, data, metric='sqeuclidean')

            if np.linalg.norm(centers - prev_centers) < tol:
                break

        # Compute cost
        cost = np.sum(um * dist)
        if cost < min_cost:
            min_cost = cost
            min_centers = centers
            mem = um.argmax(axis=0)

    return min_centers, mem, um




In [2]:
points = {'A':(2,3),
          'B':(7,9),
          'C':(11,4),
          'D':(3,3),
          'E':(14,12),
          'F':(4,5),
          'G':(12,5),
          'H':(9,7),
          'J':(6,4),
          'K':(2,9),
          'L':(4,7),
          'M':(6,8)}

XY = np.array(list(points.values()))
X_labels = np.array(list(points.keys())) # global variable used for functions CompiteDistance

N=3

In [3]:
if __name__ == '__main__':
    data = XY
    k = 3

    repeats = 1

    # Time This
    fcm_time = 0
    for iter1 in range(repeats):
        fcm_start = time.time()
        centers, mem, um = fcm(
            data, n_clusters=k, n_init=30, m=2, max_iter=300, tol=1e-16
        )
        fcm_time += (time.time() - fcm_start)
    print('Average FCM time =', fcm_time/repeats)

    # Time This (as well)
    km_time = 0
    for iter1 in range(repeats):
        km_start = time.time()
        km1 = KMeans(
            n_clusters=k, n_init=30, max_iter=300, tol=1e-16
        ).fit(data)
        km_time += (time.time() - km_start)
    print('Average kMeans time =', km_time/repeats)

    print('Ratio of time =', fcm_time / km_time)

Average FCM time = 0.20055508613586426
Average kMeans time = 0.03120732307434082
Ratio of time = 6.426539234336443


In [4]:
mem, centers

(array([0, 1, 2, 0, 2, 0, 2, 2, 0, 1, 1, 1], dtype=int64),
 array([[ 3.51278138,  4.17955506],
        [ 6.18455139,  8.32568971],
        [11.47993327,  5.78004154]]))

In [44]:
um.argmax(axis=0), um.max()

(array([0, 1, 2, 0, 2, 0, 2, 2, 0, 1, 1, 1], dtype=int64), 0.9788386769450795)

In [37]:
km1.labels_

array([0, 2, 1, 0, 1, 0, 1, 1, 0, 2, 2, 2])

In [8]:
import pandas as pd
pd.DataFrame(um.T)

Unnamed: 0,0,1,2
0,0.80016,0.005149,0.001138
1,0.000876,0.876031,0.001185
2,0.002817,0.005048,0.767158
3,0.883755,0.001631,0.000381
4,0.019868,0.104615,0.286872
5,0.867578,0.002869,0.000225
6,0.000137,0.00036,0.939541
7,0.010174,0.157189,0.252668
8,0.43337,0.04769,0.015205
9,0.139702,0.282018,0.009059


In [29]:
centers = data[np.random.choice(data.shape[0], size=k, replace=False)]

In [30]:
data[np.random.choice(data.shape[0], size=2, replace=False)]

array([[ 6,  4],
       [14, 12]])

In [32]:
centers=[[1,2], [8,7],[14, 5]]

In [9]:
min_cost = np.inf
for iter_init in range(n_init):

    # Randomly initialize centers
#         centers = data[np.random.choice(
#             data.shape[0], size=n_clusters, replace=False
#             ), :]
    centers=[[1,2], [8,7],[14, 5]]

    # Compute initial distances
    # Zeros are replaced by eps to avoid division issues
    dist = np.fmax(
        cdist(centers, data, metric='sqeuclidean'),
        np.finfo(np.float64).eps
    )

    for iter1 in range(max_iter):

        # Compute memberships       
        u = (1 / dist) ** (1 / (m-1))
        um = (u / u.sum(axis=0))**m

        # Recompute centers
        prev_centers = centers
        centers = um.dot(data) / um.sum(axis=1)[:, None]

        dist = cdist(centers, data, metric='sqeuclidean')

        if np.linalg.norm(centers - prev_centers) < tol:
            break

    # Compute cost
    cost = np.sum(um * dist)
    if cost < min_cost:
        min_cost = cost
        min_centers = centers
        mem = um.argmax(axis=0)

NameError: name 'n_init' is not defined

In [12]:
points = {'A':(2,3),
          'B':(7,9),
          'C':(11,4),
          'D':(3,3),
          'E':(14,12),
          'F':(4,5),
          'G':(12,5),
          'H':(9,7),
          'J':(6,4),
          'K':(2,9),
          'L':(4,7),
          'M':(6,8)}

XY = np.array(list(points.values()))
X_labels = np.array(list(points.keys())) # global variable used for functions CompiteDistance

N=3
centers=[[1,2], [8,7],[14, 5]]

In [11]:
from sklearn.metrics import pairwise_distances

In [13]:
s = pairwise_distances(XY,centers )

In [16]:
pd.DataFrame(s).to_csv('fuzzy_distances.csv')