# Multivariate Fuzzy C-means method: Implementation

## Equations

### $J= \sum_{i=1}^{c} \sum_{k=1}^{n} \sum_{j=1}^{p} \left(u_{ijk} \right)^{m} d_{ijk}$ - Objective function to minimize.

### $d_{ijk} = \left(x_{jk} - y_{ij} \right)^{2}$ - euclidian distance squared.

### $ y_{ij} = \frac{\sum_{k=1}^{n} \left(u_{ijk} \right)^{m} x_{jk}} {\sum_{k=1}^{n} \left(u_{ijk}\right)^{m}}$ - prototype coordinate of a given cluster in feature j.

### $ u_{ijk} =  \left[\sum_{h=1}^{c}\sum_{l=1}^{p} \left(\frac{d_{ijk}}{d_{hlk}}\right)^{(1/(m-1))}  \right]^{-1} $ - membership degree of pattern k in cluster $C_{i}$ on the feature j.

### $\delta_{ik} = \sum_{j=1}^{p} u_{ijk}$ - represents an aggregation measure for all the p features.

## Constraints:

### - $u_{ijk} \in [0, 1]$ for all i, j and k;
### - $0 < \sum_{j=1}^{p} \sum_{k=1}^{n} u_{ijk} < n$ for all i and
### - $\sum_{i=1}^{c}\sum_{j=1}^{p}u_{ijk} = 1$ for all k.

In [90]:
import numpy as np
import pandas as pd
from sklearn.metrics import adjusted_rand_score
import seaborn as sns

In [91]:
df = sns.load_dataset('iris')
df["species"].replace({"setosa": 0, "versicolor": 1, "virginica": 2}, inplace=True)
df.columns = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm", "Class"]
labels = df["Class"].values
df.drop("Class", axis=1, inplace=True)
dados = df.to_numpy()

In [92]:
class MFCM():
    def __init__(self, c, X, m):
        self.c = c
        self.n = X.shape[0]
        self.p = X.shape[1]
        self.m = m
    
    def initialize_u(self):
        return np.random.dirichlet(alpha=np.ones(self.c * self.p),
                                   size=self.n).reshape(self.n, self.c, self.p)
    
    def find_centroides(self, X, U):
        return np.sum((U ** self.m) * X[:, np.newaxis, :], axis=0) / np.sum(U ** self.m, axis=0)
    
    def get_distances(self, X, V): # as vezes tem umas distâncias muito pequenas
        return (X[:, np.newaxis, :] - V[np.newaxis, :, :]) ** 2
    
    def update_u(self, D):
        ratio = (D[:, np.newaxis, np.newaxis, :, :] / D[:, :, :, np.newaxis, np.newaxis]) ** (1/(self.m-1))
        return 1 / (np.sum(ratio, axis=(3, 4)))
    
    def get_objective_function(self, U, D):
        return np.sum((U ** self.m)*D)

In [93]:
def mfcm_run(dados, num_clusters, m=2, max_iter=10**3, epsilon=1e-5):
    mfcm = MFCM(c=num_clusters, X=dados, m=m) # create the MFCM object
    
    U = mfcm.initialize_u() # initialize the membership matrix
    
    for _ in range(max_iter):
        #print(_)
        centroids = mfcm.find_centroides(dados, U)
        D = mfcm.get_distances(dados, centroids)
        new_U = mfcm.update_u(D)
        if np.linalg.norm(U - new_U) < epsilon:
            break
        U = new_U
    
    #print(f"Membership matrix:\n{U}")
    Delta = np.sum(U, axis=2)  # summing over the second axis (variables `j`)
    #print(f"Delta matrix:\n{Delta}")
    
    return centroids, U, Delta

In [94]:
#dados = np.array([[1, 1], [1, 2], [2, 1], [2, 2], [4, 4], [4, 5], [5, 4], [5, 5]])
#centroids, U, Delta = mfcm_run(dados, 3, m=2)

In [95]:
def monte_carlo_simulation(dados, labels, num_clusters, num_trials):
    results = []
    for _ in range(num_trials):
        centroids, U, Delta = mfcm_run(dados, num_clusters)
        #print(f"Centroids:\n{centroids}")
        if np.array_equal(centroids[0], centroids[1]) or np.array_equal(centroids[0], centroids[2]) or np.array_equal(centroids[1], centroids[2]):
            print("Centroids are equal")
            exemplo_matriz = U
        predicted_labels = np.argmax(Delta, axis=1)
        #print(predicted_labels)
        ari = adjusted_rand_score(labels, predicted_labels)
        results.append(ari)
    mean_rand_index = np.mean(results)
    std_rand_index = np.std(results)
    return mean_rand_index, std_rand_index, exemplo_matriz

In [96]:
num_clusters = 3
num_trials = 100
mean_rand_index, std_rand_index, U_exemplo = monte_carlo_simulation(dados, labels, num_clusters, num_trials)

print(f"Mean ARI: {mean_rand_index}")
print(f"Std ARI: {std_rand_index}")

Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Centroids are equal
Mean ARI: 0.5212346670417246
Std ARI: 0.09169499694768228


In [97]:
mfcm = MFCM(c=num_clusters, X=dados, m=2)
centroids = mfcm.find_centroides(dados, U_exemplo)
centroids

array([[4.97521453, 3.81245839, 5.09127594, 0.28939754],
       [4.97521453, 3.81245839, 5.09127594, 0.28939754],
       [4.97521453, 3.81245839, 1.50896196, 1.93417016]])