In [64]:
pip install numpy -q

In [65]:
pip install matplotlib -q

In [66]:
pip install scikit-learn -q

In [67]:
pip install scikit-learn-extra -q

In [62]:
# Import necessary libraries
import numpy as np
from sklearn_extra.cluster import KMedoids
from sklearn.metrics import adjusted_rand_score

# Custom data points
X = np.array([
    [1.0, 2.0], [1.5, 2.5], [1.2, 1.8],  # Cluster 0
    [2.0, 3.0], [2.5, 2.7], [2.3, 3.2],  # Cluster 0 (overlapping with cluster 1)
    [3.5, 3.5], [4.0, 4.0], [3.8, 3.8],  # Cluster 1
    [5.0, 5.0], [5.5, 5.2], [4.9, 5.1],   # Cluster 1 (but overlapping with cluster 2)
    [7.0, 8.0], [7.5, 7.8], [6.8, 8.1],  # Cluster 2
])

#true_labels = np.array([0, 0, 0, 1, 1, 1])
true_labels = np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2])

# Monte Carlo Experiment Function
def monte_carlo_experiment(data, true_labels, n_clusters, num_trials):
    rand_indices = []
    final_medoids_list = []

    for trial in range(num_trials):
        # Initialize and fit K-Medoids
        kmedoids = KMedoids(n_clusters=n_clusters, random_state=trial)
        kmedoids.fit(data)

        # Get the predicted labels and calculate the Adjusted Rand Index
        predicted_labels = kmedoids.labels_
        rand_idx = adjusted_rand_score(true_labels, predicted_labels)
        rand_indices.append(rand_idx)

        # Store the final medoids for this trial
        final_medoids_list.append(kmedoids.cluster_centers_)

        # Print predicted labels for this trial
        #print(f"Trial {trial + 1} - Predicted labels: {predicted_labels}")

    # Calculate mean and standard deviation of the Rand Index
    mean_rand_index = np.mean(rand_indices)
    std_rand_index = np.std(rand_indices)

    return mean_rand_index, std_rand_index, final_medoids_list

# Parameters
num_trials = 1
n_clusters = 3

# Run Monte Carlo Experiment
mean_rand_idx, std_rand_idx, medoids_list = monte_carlo_experiment(X, true_labels, n_clusters, num_trials)

print(f"\nMonte Carlo K-Medoids Clustering Results ({num_trials} trials)")
print(f"Mean Rand Index: {mean_rand_idx:.4f}")
print(f"Standard Deviation of Rand Index: {std_rand_idx:.4f}")
#print("\nFinal medoids for each trial:")
for i, medoids in enumerate(medoids_list):
    print(f"Trial {i + 1} medoids:\n{medoids}")


Monte Carlo K-Medoids Clustering Results (1 trials)
Mean Rand Index: 0.6023
Standard Deviation of Rand Index: 0.0000
Trial 1 medoids:
[[3.8 3.8]
 [5.5 5.2]
 [1.5 2.5]]


In [77]:
import numpy as np
from sklearn.metrics import pairwise_distances
from sklearn.metrics import adjusted_rand_score

# Sample data
X = np.array([
    [1.0, 2.0], [1.5, 2.5], [1.2, 1.8],  # Cluster 0
    [2.0, 3.0], [2.5, 2.7], [2.3, 3.2],  # Cluster 0 (overlapping with cluster 1)
    [3.5, 3.5], [4.0, 4.0], [3.8, 3.8],  # Cluster 1
    [5.0, 5.0], [5.5, 5.2], [4.9, 5.1],   # Cluster 1 (but overlapping with cluster 2)
    [7.0, 8.0], [7.5, 7.8], [6.8, 8.1],  # Cluster 2
])

true_labels = np.array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2])

def pam(X, k):
    # Calcular todas as distâncias de um ponto a outro
    # Os k pontos que tiverem a menor distância somada a todos os outros pontos são selecionados como os medoides iniciais
    # É analisada a distância de cada um dos pontos a cada cada um dos medoides, a menor distância desse ponto pra um medoide específico, o ponto será selecionado para o cluster representado por esse medoide
    # Após isso todos os pontos que não forem medoides já devem ter um cluster
    # Dentro de cada cluster, para cada ponto, o medoide será trocado por esse ponto e será calculada a distância somada desse novo medoide para todos os outros pontos desse mesmo cluster
    # Se essa distância for menor do que a do medoide original, substitua ele definitivamente. Faça isso com todos os pontos do cluster, para todos os clusters
    # Depois de uma troca, cada ponto checa sua distâmncia para todos os medoides novamente e se realoca ao medoide mais próximo
    # Após isso os clusters já devem estar definidos
    # Crie uma label para cada um desses clusters (0 até k-1)
    # Retorne essas labels

    # Step 1: Greedy Initialization - choose k points with smallest summed distances to other points
    m, n = X.shape
    total_distances = np.sum(pairwise_distances(X), axis=1)
    medoid_indices = np.argsort(total_distances)[:k]  # Select k points with smallest summed distances
    medoids = X[medoid_indices]
    print(medoids)

    # Alocar os demais pontos aos medoides
    clusters = [[] for _ in range(k)]

    distances = pairwise_distances(X, metric='euclidean')
    idx = 0
    for point in X:
      if point not in medoids:
        mdd = 0
        l_mdd = 0
        l_dist = 0
        l_medoid_idx = 0
        for medoid_idx in medoid_indices:
          distance = distances[0][medoid_idx]
          if distance > l_dist:
            l_dist = distance
            l_medoid_idx = idx
            l_mdd = mdd
          mdd += 1
        clusters[l_mdd].append(point)
        idx += 1
    print(clusters)

    # Testar mudanças
    summed_distances = np.sum(distances, axis=1)
    summed_distances_medoids = []
    for medoid_idx in medoid_indices:
      summed_distances_medoids.append(summed_distances[medoid_idx])

    for cluster in range(k):
      if len(clusters[cluster]) > 0:
        idx_point = 0
        for point in clusters[cluster]:
          medoid = medoids[cluster]
          new_medoid = point
          new_medoid_idx = idx_point
          clusters[cluster].append(new_medoid)
          clusters[cluster].remove(point)

          ## TESTAR AS DIFERENÇAS
          ## FAZER AS TROCAS NECESSÁRIAS
          ## COLOCAR O MEDOIDE DENTRO DO CLUSTER
          ## DAR UMA LABEL A CADA MEDOIDE



# Apply PAM to find clusters
k = 3  # Number of clusters
pam(X, k)
#predicted_labels, medoids = pam(X, k)
#print("Predicted labels:", predicted_labels)
#print("Medoids:", medoids)

# Compute the Adjusted Rand Index (ARI)
#ari = adjusted_rand_score(true_labels, predicted_labels)
#print(f"Adjusted Rand Index: {ari:.4f}")

[[3.8 3.8]
 [4.  4. ]
 [3.5 3.5]]
[[], [array([1., 2.]), array([1.5, 2.5]), array([1.2, 1.8]), array([2., 3.]), array([2.5, 2.7]), array([2.3, 3.2]), array([5., 5.]), array([5.5, 5.2]), array([4.9, 5.1]), array([7., 8.]), array([7.5, 7.8]), array([6.8, 8.1])], []]
[[3.8 3.8]
 [4.  4. ]
 [3.5 3.5]]


In [58]:
X = np.array([
    [1.0, 2.0], [1.5, 2.5], [1.2, 1.8],
    [2.0, 3.0], [2.5, 2.7], [2.3, 3.2],
    [3.5, 3.5], [4.0, 4.0], [3.8, 3.8],
    [5.0, 5.0], [5.5, 5.2], [4.9, 5.1],
    [7.0, 8.0], [7.5, 7.8], [6.8, 8.1],
])

# Calculate pairwise distances
distances = pairwise_distances(X, metric='euclidean')

# Sum the distances for each point to every other point
summed_distances = np.sum(distances, axis=1)

print("Pairwise distances:\n", distances)
print("Summed distances for each point:\n", summed_distances)

Pairwise distances:
 [[0.         0.70710678 0.28284271 1.41421356 1.65529454 1.7691806
  2.91547595 3.60555128 3.3286634  5.         5.52177508 4.98196748
  8.48528137 8.71148667 8.4172442 ]
 [0.70710678 0.         0.76157731 0.70710678 1.0198039  1.06301458
  2.23606798 2.91547595 2.64196896 4.30116263 4.8259714  4.28018691
  7.77817459 8.00562302 7.71038261]
 [0.28284271 0.76157731 0.         1.44222051 1.58113883 1.78044938
  2.86006993 3.56089876 3.28024389 4.96789694 5.48178803 4.9578221
  8.48999411 8.7        8.42911621]
 [1.41421356 0.70710678 1.44222051 0.         0.58309519 0.36055513
  1.58113883 2.23606798 1.96977156 3.60555128 4.13400532 3.58050276
  7.07106781 7.3        7.00357052]
 [1.65529454 1.0198039  1.58113883 0.58309519 0.         0.53851648
  1.28062485 1.98494332 1.70293864 3.39705755 3.90512484 3.39411255
  6.95269732 7.14212853 6.90289794]
 [1.7691806  1.06301458 1.78044938 0.36055513 0.53851648 0.
  1.23693169 1.87882942 1.61554944 3.24499615 3.77359245 3.22