# Multivariate Fuzzy C-Means (MFCM)

In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics import adjusted_rand_score
import warnings
warnings.filterwarnings('ignore')

### 0. Tratamento de dados

In [4]:
df = pd.read_csv('/Users/thomazaraujo/Documents/CIn - UFPE/PIBIC/datasets/iris.csv')
df.head()
df.drop("Id", axis=1, inplace=True)
df["Species"].replace({"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2}, inplace=True)
df.columns = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm", "Class"]
labels = df["Class"].values
df.drop("Class", axis=1, inplace=True)
dados = df.to_numpy()

### 1. Inicialização da matriz de pertinência

In [5]:
def inicializacao_matriz_pertinencia(n, c, p):
    matriz_pertinencia = np.random.rand(n, c, p) # gera uma matriz inicial aleatória com valores entre 0 e 1
    matriz_pertinencia = matriz_pertinencia / matriz_pertinencia.sum(axis=(1, 2), keepdims=True) # normalização da matriz pra garantir que a soma dos graus dê um
    return matriz_pertinencia

### 2. Atualização dos protótipos

In [12]:
def atualizacao_prototipos(x, u, m):
    n = u.shape[0]
    c = u.shape[1]
    p = u.shape[2]
    u = u ** m
    y = np.zeros((c, p))
    for i in range(c):
        for j in range(p):
                soma_denominador = 0
                soma_numerador = 0
                for k in range(n):
                    soma_numerador += u[k, i, j] * x[k, j]
                    soma_denominador += u[k, i, j]
                y[i, j] = soma_numerador / soma_denominador
    return y

### 3. Cálculo das distâncias

In [6]:
def get_distancia(x, y):
    n = x.shape[0]
    c = y.shape[0]
    p = x.shape[1]
    d = np.zeros((n, c, p))
    for k in range(n):
        for i in range(c):
            for j in range(p):
                d[k, i, j] = (x[k, j] - y[i, j]) ** 2
    return d

### 4. Atualização da matriz de pertinência

In [7]:
def atualizacao_matriz_pertinencia(D, m=2):
    eps = 1e-10 # if (a, b) == (i, j) then the division is zero; eps is there to avoid it
    n = D.shape[0]
    c = D.shape[1]
    p = D.shape[2]
    U = np.zeros((n, c, p))
    for k in range(n):
        for i in range(c):
            for j in range(p):
                dijk = max(D[k, i, j], eps)
                soma = 0
                for a in range(c):
                    for b in range(p):
                        dabk = max(D[k, a, b], eps)
                        soma += (dijk / dabk) ** (1 / (m - 1))
                U[k, i, j] = 1 / soma
        soma_total = np.sum(U[k]) 
        U[k] /= soma_total # 3rd restriction
    return U

### 5. Cálculo da função de perda

In [8]:
def loss_function(U, D, m=2):
    n = D.shape[0]
    c = D.shape[1]
    p = D.shape[2]
    U = U ** m
    J = 0
    for k in range(n):
        for i in range(c):
            for j in range(p):
                J += U[k, i, j] * D[k, i, j]
    return J

### 6. Algoritmo

In [16]:
def mfcm(x, c, m=2, max_iter = 10**6, eps=1e-6):
    n, p = x.shape
    U = inicializacao_matriz_pertinencia(n, c, p)
    loss = 10**6
    for trial in range(max_iter):
        prototipo = atualizacao_prototipos(x, U, m)
        D = get_distancia(x, prototipo)
        U = atualizacao_matriz_pertinencia(D, m)
        new_loss = loss_function(U, D, m)
        if (loss - new_loss < eps):
            break
        loss = new_loss
    Delta = np.sum(U, axis=2)
    print(U)
    return Delta

### 7. Simulação de Monte Carlo

In [17]:
def simulacao_monte_carlo(dados, labels, c, trials):
    indices_rand = []
    for trial in range(trials):
        Delta = mfcm(dados, c)
        predicted_labels = np.argmax(Delta, axis=1)
        idx_rand = adjusted_rand_score(labels, predicted_labels)
        indices_rand.append(idx_rand)
    mean_idx_rand = np.mean(indices_rand)
    std_idx_rand = np.std(indices_rand)
    return mean_idx_rand, std_idx_rand

### 8. Chamada

In [18]:
c = 3
trials = 1
mean_idx_rand, std_idx_rand = simulacao_monte_carlo(dados, labels, c, trials)

print(f"Mean Adjusted Rand Index: {mean_idx_rand:.4f}")
print(f"Standard Deviation of Adjusted Rand Index: {std_idx_rand:.4f}")

[[[1.78586390e-04 2.52626687e-04 1.16491025e-05 2.27398084e-05]
  [9.86633206e-01 1.13579512e-02 9.91990480e-06 4.04037959e-05]
  [7.03023789e-04 6.74844918e-04 1.07370597e-05 1.04310915e-04]]

 [[3.66468256e-04 9.82904762e-01 4.20353250e-05 8.20556979e-05]
  [6.59391041e-03 1.35438429e-03 3.57955836e-05 1.45795497e-04]
  [9.35252159e-04 7.12439472e-03 3.87442547e-05 3.76401806e-04]]

 [[2.31812207e-04 5.11615943e-03 3.80400370e-05 8.05693957e-05]
  [1.55114477e-03 4.78312051e-03 3.25942235e-05 1.43154655e-04]
  [4.73370180e-04 9.87145277e-01 3.51740094e-05 3.69583914e-04]]

 ...

 [[3.80172195e-04 9.53755373e-01 1.20294991e-04 3.57577073e-02]
  [1.18623062e-04 1.31421816e-03 1.64045624e-04 9.02781842e-04]
  [1.97693534e-04 6.91311103e-03 1.39520623e-04 2.36458256e-04]]

 [[2.19909263e-03 3.01358881e-03 2.03829521e-04 3.58551882e-03]
  [4.24011882e-04 9.76929570e-01 2.66648597e-04 7.92824474e-04]
  [8.32638902e-04 1.12109877e-02 2.31918313e-04 3.09370656e-04]]

 [[6.82157365e-03 9.6336