In [14]:
import pandas as pd
import numpy as np

### 0. Tratamento de dados

In [15]:
df = pd.read_csv('/Users/thomazaraujo/Documents/CIn - UFPE/PIBIC/datasets/iris.csv')
df.head()
df.drop("Id", axis=1, inplace=True)
df["Species"].replace({"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2}, inplace=True)
df.columns = ["SepalLengthCm", "SepalWidthCm", "PetalLengthCm", "PetalWidthCm", "Class"]
labels = df["Class"].values
df.drop("Class", axis=1, inplace=True)
dados = df.to_numpy()
dados

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["Species"].replace({"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2}, inplace=True)
  df["Species"].replace({"Iris-setosa": 0, "Iris-versicolor": 1, "Iris-virginica": 2}, inplace=True)


array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
       [4.9, 3

### 1. Inicialização da matriz de pertinência

In [16]:
def inicializacao_matriz_pertinencia(n, c, p):
    matriz_pertinencia = np.random.rand(n, c, p) # gera uma matriz inicial aleatória com valores entre 0 e 1
    matriz_pertinencia = matriz_pertinencia / matriz_pertinencia.sum(axis=(1, 2), keepdims=True) # normalização da matriz pra garantir que a soma dos graus dê um
    return matriz_pertinencia

### 2. Atualização dos protótipos

In [17]:
def atualizacao_prototipos(x, u, m):
    n = u.shape[0]
    c = u.shape[1]
    p = u.shape[2]
    u = u ** m
    y = np.zeros((c, p))
    for i in range(c):
        for j in range(p):
                soma_denominador = 0
                soma_numerador = 0
                for k in range(n):
                    soma_numerador += u[k, i, j] * x[k, j]
                    soma_denominador += u[k, i, j]
                y[i, j] = soma_numerador / soma_denominador
    return y

### 3. Cálculo das distâncias

In [18]:
def get_distancia(x, y):
    n = x.shape[0]
    c = y.shape[0]
    p = x.shape[1]
    d = np.zeros((n, c, p))
    for k in range(n):
        for i in range(c):
            for j in range(p):
                d[k, i, j] = (x[k, j] - y[i, j]) ** 2
    return d

### 4. Atualização da matriz de pertinência

In [None]:
def atualizacao_matriz_pertinencia(D, m=2):
    eps = 1e-10 # if (a, b) == (i, j) then the division is zero; eps is there to avoid it
    n = D.shape[0]
    c = D.shape[1]
    p = D.shape[2]
    U = np.zeros((n, c, p))
    for k in range(n):
        for i in range(c):
            for j in range(p):
                dijk = max(D[k, i, j], eps)
                soma = 0
                for a in range(c):
                    for b in range(p):
                        dabk = max(D[k, a, b], eps)
                        soma += (dijk / dabk) ** (1 / (m - 1))
                U[k, i, j] = 1 / soma
        soma_total = np.sum(U[k]) 
        U[k] /= soma_total # 3rd restriction
    return U

### 5. Cálculo da função de perda

In [21]:
def loss_function(U, D, m=2):
    n = D.shape[0]
    c = D.shape[1]
    p = D.shape[2]
    U = U ** m
    J = 0
    for k in range(n):
        for i in range(c):
            for j in range(p):
                J += U[k, i, j] * D[k, i, j]
    return J

### 6. Algoritmo

In [None]:
def mfcm(x, c, m=2, max_iter = 10**6, eps=1e-6):
    n, p = x.shape
    U = inicializacao_matriz_pertinencia(n, c, p)
    loss = 10**6
    for trial in range(max_iter):
        prototipo = atualizacao_prototipos(x, U, m)
        D = get_distancia(x, prototipo)
        U = atualizacao_matriz_pertinencia(D, m)
        new_loss = loss_function(U, D, m)
        if (loss - new_loss < eps):
            break
        loss = new_loss
    Delta = np.sum(U, axis=2)
    return Delta