# Weighted Multivariate Fuzzy C-means method: Implementation

## Equations

### $d_{ijk} = \left(x_{jk} - y_{ij} \right)^{2}$ - euclidian distance squared.

### $ y_{ij} = \frac{\sum_{k=1}^n(u_{ijk})^mx_{jk}}{\sum_{k=1}^n(u_{ijk})^m}$ - prototype coordinate of a given cluster $i$ in feature $j$.

### $ u_{ijk} =  \left[\sum_{a=1}^c\sum_{b=1}^p\left(\frac{\lambda_{ij}d_{ijk}}{\lambda_{ab}d_{abk}}\right)^\frac{1}{m-1}\right]^{-1} $ - membership degree of pattern $k$ in cluster $C_{i}$ on the feature $j$.

### $\gamma_{ij} = \frac{\left\{\prod_{h=1}^p\left[\sum_{k=1}^n(u_{ihk})^md_{ihk}\right]\right\}^\frac{1}{p}}{\sum_{k=1}^n(u_{ijk})^md_{ijk}}$ - weight of the membership $u_{ijk}$ regarding the cluster $C_i$ and variable $j$

### $\delta_{ik} = \sum_{j=1}^{p} u_{ijk}$ - represents an aggregation measure for all the $p$ features.

## Constraints:

### - $u_{ijk} \in [0, 1]$ for all i, j and k;
### - $0 < \sum_{j=1}^{p} \sum_{k=1}^{n} u_{ijk} < n$ for all i and
### - $\sum_{i=1}^{c}\sum_{j=1}^{p}u_{ijk} = 1$ for all k.

## Importando bibliotecas

In [6]:
import numpy as np
import pandas as pd
from sklearn.metrics import adjusted_rand_score, adjusted_mutual_info_score
import seaborn as sns

## Tratamento dos dados

In [7]:
df = sns.load_dataset('iris')
df["species"].replace({"setosa": 0, "versicolor": 1, "virginica": 2}, inplace=True)
df.columns = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm", "Class"]
labels = df["Class"].values
df.drop("Class", axis=1, inplace=True)
dados = df.to_numpy()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["species"].replace({"setosa": 0, "versicolor": 1, "virginica": 2}, inplace=True)
  df["species"].replace({"setosa": 0, "versicolor": 1, "virginica": 2}, inplace=True)


## Método de agrupamento

In [8]:
class MFCM():
    def __init__(self, c, X, m):
        self.c = c
        self.n = X.shape[0]
        self.p = X.shape[1]
        self.m = m
    
    def initialize_u(self):
        return np.random.dirichlet(alpha=np.ones(self.c * self.p),
                                   size=self.n).reshape(self.n, self.c, self.p)
    
    def initialize_lambda(self):
        return np.ones((self.c, self.p))
    
    def find_centroides(self, X, U):
        u_m = U ** self.m # (u_ijk)^m
        
        num = u_m * X[:, np.newaxis, :] # (u_ijk)^m * x_jk
        numerador = np.sum(num, axis=0) # sum_k [ (u_ijk)^m * x_jk ]
        
        denominador = np.sum(u_m, axis=0) # sum_k [ (u_ijk)^m ]
        denominador = np.fmax(denominador, np.finfo(np.float64).eps) # evita por zero
        
        centroides = numerador / denominador
        return centroides
    
    def get_distances(self, X, V):
        return (X[:, np.newaxis, :] - V[np.newaxis, :, :]) ** 2

    def update_u(self, D, Lambda):
        # u_ijk = (lambda_ij * d_ijk)^(-P) / Sum_ab [ (lambda_ab * d_abk)^(-P) ]
        # onde P = 1 / (m - 1)

        P = 1.0 / (self.m - 1) # 1 / (m - 1)
        
        D_safe = np.fmax(D, np.finfo(np.float64).eps)
        dist_ponderada = Lambda * D_safe # lambda_ij * d_ijk
        dist_ponderada = np.fmax(dist_ponderada, np.finfo(np.float64).eps)
        numerador = (1.0 / dist_ponderada) ** P # (lambda * d)^(-P) = 1 / (lambda * d)^P
        
        denominador = np.sum(numerador, axis=(1, 2), keepdims=True) # Sum_ab [ (lambda_ab * d_abk)^(-P) ]
        denominador = np.fmax(denominador, np.finfo(np.float64).eps)
        
        u = numerador / denominador
        return u
    
    def update_lambda(self, D, U):
        n, c, p = D.shape
        
        D_safe = np.fmax(D, np.finfo(np.float64).eps)
        
        termo_ponderado = (U ** self.m) * D_safe # (u_ijk)^m * d_ijk
        denominador = np.sum(termo_ponderado, axis=0) # Sum_k [ (u_ijk)^m * d_ijk ]
        denominador = np.fmax(denominador, np.finfo(np.float64).eps)
        
        # exp( sum(log(S)) / p )
        log_S = np.log(denominador) # Logaritmo de S_ij
        soma_log_S = np.sum(log_S, axis=1) # Soma sobre p: Sum_k [ log(S_ij) ]
        numerador = np.exp(soma_log_S / p) # exp( sum(log(S)) / p )
        
        lambdaa = numerador[:, np.newaxis] / denominador
        
        return lambdaa

## Clustering

In [9]:
def mfcm_run(dados, num_clusters, m=2, max_iter=10**3, epsilon=1e-5):
    mfcm = MFCM(c=num_clusters, X=dados, m=m) # create the MFCM object
    
    U = mfcm.initialize_u() # initialize the membership matrix
    Lambda = mfcm.initialize_lambda() # initialize the weight matrix
    
    for _ in range(max_iter):
        centroids = mfcm.find_centroides(dados, U)
        D = mfcm.get_distances(dados, centroids)
        new_U = mfcm.update_u(D, Lambda)
        Lambda = mfcm.update_lambda(D, U)
        if np.linalg.norm(U - new_U) < epsilon:
            break
        U = new_U

    Delta = np.sum(U, axis=2) 
    
    return centroids, U, Delta

## Simulação de Monte Carlo

In [10]:
def monte_carlo_simulation(dados, labels, num_clusters, num_trials):
    results_ari = []
    results_ami = []
    for _ in range(num_trials):
        centroids, U, Delta = mfcm_run(dados, num_clusters)
        predicted_labels = np.argmax(Delta, axis=1)
        ari = adjusted_rand_score(labels, predicted_labels)
        ami = adjusted_mutual_info_score(labels, predicted_labels)
        results_ari.append(ari)
        results_ami.append(ami)
    mean_ari = np.mean(results_ari)
    std_ari = np.std(results_ari)
    mean_ami = np.mean(results_ami)
    std_ami = np.std(results_ami)
    return mean_ari, std_ari, mean_ami, std_ami

In [None]:
num_clusters = 3
num_trials = 100
mean_ari, std_ari, mean_ami, std_ami = monte_carlo_simulation(dados, labels, num_clusters, num_trials)

print(f"Mean ARI: {mean_ari}")
print(f"Std ARI: {std_ari}")
print("")
print(f"Mean AMI: {mean_ami}")
print(f"Std AMI: {std_ami}")