In [1]:
from ucimlrepo import fetch_ucirepo
import pandas as pd
import numpy as np
from sklearn.metrics import adjusted_mutual_info_score

In [2]:
np.random.seed(42)

In [3]:
# Buscar dataset com ID 17
dataset = fetch_ucirepo(id=45)

# Converter os dados em DataFrame
df = pd.DataFrame(dataset.data.features)

# Se quiser adicionar os rótulos (se existirem)
if dataset.data.targets is not None:
    # targets pode ser Series ou DataFrame dependendo do conjunto
    targets = pd.DataFrame(dataset.data.targets)
    df = pd.concat([df, targets], axis=1)

# Visualizar os dados
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,num
0,63,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0,0
1,67,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0,2
2,67,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0,1
3,37,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0,0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0,0


In [4]:
df = df.dropna()
labels = df["num"].values
df.drop("num", axis=1, inplace=True)
dados = df.to_numpy()

In [None]:
class MFCMedoids:
    def __init__(self, c, X, m):
        self.c = c
        self.n = X.shape[0]
        self.p = X.shape[1]
        self.m = m
        self.epsilon = 1e-10  # To prevent division by zero

    def initialize_u(self):
        return np.random.dirichlet(alpha=np.ones(self.c * self.p),
                                   size=self.n).reshape(self.n, self.c, self.p)
    
    def initialize_medoids(self, X):
        # Inicializa os medóides aleatoriamente
        medoids = np.zeros((self.c, self.p))
        for i in range(self.c):
            medoids[i] = X[np.random.choice(X.shape[0])]
        return medoids
    
    def get_distances(self, X, medoids):
        return (X[:, np.newaxis, :] - medoids[np.newaxis, :, :]) ** 2
    
    def find_medoids(self, X, U):
        medoids = np.zeros((self.c, self.p))
        U_m = U ** self.m  # (n, c, p)

        # Para cada possível q (0 <= q < n), criamos um tensor de distâncias quadradas para todos os outros k e p
        # (n, n, p) -> distances_squared[k, q, j] = (X[k, j] - X[q, j]) ** 2
        distances_squared = (X[:, np.newaxis, :] - X[np.newaxis, :, :]) ** 2  # shape (n, n, p)

        for i in range(self.c):
            # Para o cluster i, obtemos U_m[:, i, :] -> shape (n, p)
            # Queremos calcular o custo de cada q ser o medoide: somatório sobre j e k de u_m[k, i, j] * d(k, q, j)
            
            # Expand u_m para fazer broadcast: (n, 1, p) para multiplicar com (n, n, p)
            u_m_expanded = U_m[:, i, :][:, np.newaxis, :]  # shape (n, 1, p)

            # Custo total para cada q: soma sobre k e j
            cost_per_q = np.sum(u_m_expanded * distances_squared, axis=(0, 2))  # shape (n,)

            best_q = np.argmin(cost_per_q)
            medoids[i] = X[best_q]

        return medoids

    def update_u(self, D):
        D = np.maximum(D, self.epsilon)  # Avoid division by zero
        ratio = (D[:, np.newaxis, np.newaxis, :, :] / D[:, :, :, np.newaxis, np.newaxis]) ** (1 / (self.m - 1))
        return 1 / np.sum(ratio, axis=(3, 4))

    def get_objective_function(self, U, D):
        return np.sum((U ** self.m) * D)
    
def mfcm_run(dados, num_clusters, m=2, max_iter=1000, epsilon=1e-5):
    mfcm = MFCMedoids(c=num_clusters, X=dados, m=m)  # Create the MFCMedoids object

    U = mfcm.initialize_u()  # Initialize the membership matrix
    medoids = mfcm.initialize_medoids(dados)  # Initialize the medoids

    for _ in range(max_iter):
        D = mfcm.get_distances(dados, medoids)
        medoids = mfcm.find_medoids(dados, U)
        new_U = mfcm.update_u(D)
        
        # Check for convergence
        if np.linalg.norm(U - new_U) < epsilon:
            break
        
        U = new_U
    #print(U)
    Delta = np.sum(U, axis=2)  # Summing over the second axis (variables j)

    return medoids, U, Delta

def monte_carlo_simulation(dados, labels, num_clusters, num_trials):
    results = []
    for trial in range(num_trials):
        medoids, U, Delta = mfcm_run(dados, num_clusters)
        predicted_labels = np.argmax(Delta, axis=1)
        ami = adjusted_mutual_info_score(labels, predicted_labels)
        if ami > 0.1:
            results.append(ami)
    mean_ami = np.mean(results)
    std_ami = np.std(results)
    return mean_ami, std_ami

num_clusters = 5
num_trials = 100
mean_ami, std_ami = monte_carlo_simulation(dados, labels, num_clusters, num_trials)

print(f"Mean AMI: {mean_ami}")
print(f"Std AMI: {std_ami}")