# Multivariate Fuzzy C-means method: Implementation

## Equations

### $J= \sum_{i=1}^{c} \sum_{k=1}^{n} \sum_{j=1}^{p} \left(u_{ijk} \right)^{m} d_{ijk}$ - Objective function to minimize.

### $d_{ijk} = \left(x_{jk} - y_{ij} \right)^{2}$ - euclidian distance squared.

### $ y_{ij} = \frac{\sum_{k=1}^{n} \left(u_{ijk} \right)^{m} x_{jk}} {\sum_{k=1}^{n} \left(u_{ijk}\right)^{m}}$ - prototype coordinate of a given cluster in feature j.

### $ u_{ijk} =  \left[\sum_{h=1}^{c}\sum_{l=1}^{p} \left(\frac{d_{ijk}}{d_{hlk}}\right)^{(1/(m-1))}  \right]^{-1} $ - membership degree of pattern k in cluster $C_{i}$ on the feature j.

### $\delta_{ik} = \sum_{j=1}^{p} u_{ijk}$ - represents an aggregation measure for all the p features.

## Constraints:

### - $u_{ijk} \in [0, 1]$ for all i, j and k;
### - $0 < \sum_{j=1}^{p} \sum_{k=1}^{n} u_{ijk} < n$ for all i and
### - $\sum_{i=1}^{c}\sum_{j=1}^{p}u_{ijk} = 1$ for all k.

## Importando bibliotecas

In [11]:
import numpy as np
from sklearn.datasets import load_iris
from sklearn.metrics import adjusted_rand_score
from scipy.special import comb

class MFCM:
    def __init__(self, c, X, m=2, max_iter=100, tol=1e-5):
        self.c = c  # number of clusters
        self.n = X.shape[0]  # number of samples
        self.p = X.shape[1]  # number of features
        self.m = m  # fuzzifier parameter
        self.max_iter = max_iter
        self.tol = tol
        self.X = X
        
    def initialize_u(self):
        # Initialize membership tensor with Dirichlet distribution
        return np.random.dirichlet(alpha=np.ones(self.c * self.p), 
                                  size=self.n).reshape(self.n, self.c, self.p)
    
    def find_centroids(self, U):
        # Calculate centroids for each cluster and feature
        numerator = np.sum((U ** self.m) * self.X[:, np.newaxis, :], axis=0)
        denominator = np.sum(U ** self.m, axis=0)
        return numerator / denominator
    
    def get_distances(self, V):
        # Calculate squared Euclidean distances
        return (self.X[:, np.newaxis, :] - V[np.newaxis, :, :]) ** 2
    
    def update_u(self, D):
        # Update membership values
        D = np.clip(D, 1e-10, np.inf)  # avoid division by zero
        
        # Expand dimensions for broadcasting
        D_expanded = D[:, :, :, np.newaxis, np.newaxis]  # shape: (n, c, p, 1, 1)
        D_reshaped = D[:, np.newaxis, np.newaxis, :, :]  # shape: (n, 1, 1, c, p)
        
        # Calculate the ratio
        ratio = (D_expanded / D_reshaped) ** (1 / (self.m - 1))
        
        # Sum over all clusters and features
        denom = np.sum(ratio, axis=(3, 4))
        
        return 1 / denom
    
    def fit(self):
        self.U = self.initialize_u()
        
        for _ in range(self.max_iter):
            U_old = self.U.copy()
            
            self.V = self.find_centroids(self.U)
            D = self.get_distances(self.V)
            self.U = self.update_u(D)
            
            if np.linalg.norm(self.U - U_old) < self.tol:
                break
                
        self.delta = np.sum(self.U, axis=2)  # Sum over features
        return self
    
    def predict(self):
        return np.argmax(self.delta, axis=1)
    
    def get_objective(self):
        D = self.get_distances(self.V)
        return np.sum((self.U ** self.m) * D)

def fuzzy_rand_index(true_labels, U):
    n = U.shape[0]
    delta = np.sum(U, axis=2)  # Soma sobre as características
    
    # Matrizes de concordância
    a = np.zeros((n, n))  # Concordância no mesmo cluster
    b = np.zeros((n, n))  # Concordância em clusters diferentes
    
    for i in range(n):
        for j in range(i+1, n):  # Apenas triângulo superior
            if true_labels[i] == true_labels[j]:
                # Usar t-norma (mínimo) para concordância no mesmo cluster
                a[i,j] = np.sum(np.minimum(delta[i], delta[j]))
            else:
                # Usar s-norma (máximo) para concordância em clusters diferentes
                b[i,j] = np.sum(np.maximum(delta[i], delta[j]))
    
    # Total de pares únicos
    total_pairs = n * (n - 1) / 2
    
    # Soma dos acordos normalizada
    a_sum = np.sum(a) / total_pairs
    b_sum = np.sum(b) / total_pairs
    
    # FRI deve estar entre [0,1]
    fri = (a_sum + b_sum) / 2
    
    return np.clip(fri, 0, 1)  # Garante valor entre 0 e 1

# Load and normalize data
iris = load_iris()
X = iris.data
true_labels = iris.target
X = (X - X.mean(axis=0)) / X.std(axis=0)

# Run MFCM
np.random.seed(42)  # For reproducibility
mfcm = MFCM(c=3, X=X, m=2, max_iter=200)
mfcm.fit()
print(mfcm.V)  # Print centroids
print(mfcm.U)  # Print membership values

# Calculate indices
fri = fuzzy_rand_index(true_labels, mfcm.U)
ari = adjusted_rand_score(true_labels, mfcm.predict())

print(f"Fuzzy Rand Index: {fri:.3f}")
print(f"Adjusted Rand Index: {ari:.3f}")
print(f"Objective function: {mfcm.get_objective():.3f}")

[[-0.29782313  0.09815961  1.11064667  0.1419341 ]
 [-0.5387631  -0.13472459  0.56957599 -0.2504185 ]
 [-0.0502545   0.33170296 -1.30363754  1.18645221]]
[[[3.59799352e-03 1.54211822e-03 2.17695175e-04 6.15666937e-04]
  [9.98320402e-03 9.82387696e-04 3.58520145e-04 1.15284318e-03]
  [1.80807625e-03 2.76819094e-03 9.76764397e-01 2.08906430e-04]]

 [[1.04874511e-05 1.41449604e-04 1.24721363e-06 3.52726328e-06]
  [2.05184114e-05 9.94175742e-01 2.05402445e-06 6.60483970e-06]
  [6.27380527e-06 3.48450614e-05 5.59605361e-03 1.19686138e-06]]

 [[9.13168632e-06 2.03712215e-04 1.71742687e-06 5.08497049e-06]
  [1.50690988e-05 5.03513428e-05 2.79243698e-06 9.52166377e-06]
  [6.05908020e-06 9.98457481e-01 1.23735346e-03 1.72541835e-06]]

 ...

 [[6.29677384e-06 1.42157127e-04 8.88814883e-05 9.05229883e-06]
  [4.22820917e-06 9.99148548e-01 1.20448381e-04 4.42546389e-06]
  [1.05217321e-05 3.50193542e-05 1.67014441e-06 4.28750876e-04]]

 [[2.66974572e-02 2.98255525e-02 4.52180339e-01 8.32950022e-03]
