In [38]:
import numpy as np 
import cv2
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import pairwise_distances
from sklearn.metrics import davies_bouldin_score
from minisom import MiniSom
from collections import defaultdict
from sklearn.metrics import accuracy_score

In [61]:
# Define the path to the root folder of your dataset
root_folder = "MNIST Dataset/"

# Define lists to store the image data and labels
data = []
labels = []

# Loop over the folders in the root folder
for i in range(10):
    folder_path = root_folder + str(i) + "/"

    # Get the label corresponding to the folder name
    label = i

    # Loop over the images in the folder
    for filename in os.listdir(folder_path):
        if filename.endswith('.jpg'):
            image_path = os.path.join(folder_path, filename)

            # Read the image using OpenCV and convert it to grayscale
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

            # Add the image and label to the dataset lists
            data.append(image)
            labels.append(label)

# Convert the data and labels lists to numpy arrays
data = np.array(data)
labels = np.array(labels)

# Split the data into training and testing sets, with 90% of each class for training
train_data = []
train_labels = []
test_data = []
test_labels = []

for i in range(10):
    class_data = data[labels == i]
    class_labels = labels[labels == i]

    train_class_data, test_class_data, train_class_labels, test_class_labels = train_test_split(class_data, class_labels, test_size=0.1)

    train_data.append(train_class_data)
    train_labels.append(train_class_labels)
    test_data.append(test_class_data)
    test_labels.append(test_class_labels)

# Concatenate the training and testing data and labels for each class
train_data = np.concatenate(train_data)
train_labels = np.concatenate(train_labels)
test_data = np.concatenate(test_data)
test_labels = np.concatenate(test_labels)

# Create a pandas dataframe to store the data and labels
train_df = pd.DataFrame(train_data.reshape(train_data.shape[0], -1))
train_df['label'] = train_labels

test_df = pd.DataFrame(test_data.reshape(test_data.shape[0], -1))
test_df['label'] = test_labels

#shuffle data frame:
train_df = train_df.sample(frac = 1)
test_df = test_df.sample(frac = 1)


In [52]:
# prepare Train and Test  Data and labels
TrainLabels = train_df['label']
TrainData = train_df.drop('label',axis=1)
TestLabels = test_df['label']
TestData = test_df.drop('label',axis=1)
TrainData = np.array(TrainData)
TrainLabels = np.array(TrainLabels)
TestLabels = np.array(TestLabels)
TestData = np.array(TestData)

In [53]:
TestData.shape

(2000, 784)

In [63]:
test_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,775,776,777,778,779,780,781,782,783,label
551,0,0,0,0,0,0,0,0,0,7,...,0,0,0,0,0,0,0,0,0,2
1807,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,9
609,0,0,0,0,0,0,0,0,1,5,...,0,0,0,0,0,0,0,0,0,3
671,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,3
1052,0,0,0,0,0,0,0,0,0,3,...,0,0,0,0,0,0,0,0,0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
111,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1530,0,0,0,0,0,0,0,0,0,0,...,4,7,0,8,0,0,0,0,0,7
1176,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,5
1084,0,0,0,0,0,0,0,0,0,5,...,0,0,0,0,0,0,0,0,0,5


In [112]:
#winner takes all approach , many clusters 
# Load the training and test data
training_data = TrainData
test_data = TestData
training_labels = TrainLabels
test_labels = TestLabels

# Flatten the training and test data and normalize it:
training_data = training_data.reshape(training_data.shape[0], -1) / 255.
test_data = test_data.reshape(test_data.shape[0], -1) / 255.

# Define the parameters for the SOM:
input_len = 784  # number of features
classes = 10     # number of classes
som_size = 30    # size of the SOM
sigma = 1.0      # neighborhood radius
learning_rate = 0.5

# Implement the Winner-Takes-All approach:
# Create a SOM with one neuron for each class
som = MiniSom(som_size, som_size, input_len, sigma=sigma, learning_rate=learning_rate, 
              neighborhood_function='gaussian', random_seed=0)
som.pca_weights_init(training_data)
som.train_batch(training_data, 100000, verbose=True)

# Determine the number of images of each class that are placed in each cluster
labels_map = som.labels_map(training_data, training_labels)
class_counts = np.zeros((som_size, som_size, classes))
for index, label in np.ndenumerate(training_labels):
    i, j = som.winner(training_data[index])
    class_counts[i, j, label] += 1

# Determine the label for each cluster
class_labels = np.zeros((som_size, som_size))
for i in range(som_size):
    for j in range(som_size):
        class_labels[i, j] = np.argmax(class_counts[i, j])

# Compute the DBI evaluation criteria on the clustering result
labels_map = som.labels_map(training_data, training_labels)
# dbi_wta = davies_bouldin_score(training_data, np.array([class_labels[x] for x in labels_map]))
dbi_wta = davies_bouldin_score(training_data, training_labels)

# Compute the train data accuracy
train_labels_pred = [np.argmax(class_counts[som.winner(x)]) for x in training_data]
accuracy_wta_train = np.mean(np.equal(training_labels, train_labels_pred))
print('Train Accuracy for Winner-Takes-All approach:', accuracy_wta_train)

# Compute the test data accuracy
test_labels_pred = [np.argmax(class_counts[som.winner(x)]) for x in test_data]
accuracy_wta_test = np.mean(np.equal(test_labels, test_labels_pred))
print('Test Accuracy for Winner-Takes-All approach:', accuracy_wta_test)

print("Cluster Labels:",class_labels)
print()


 [ 100000 / 100000 ] 100% - 0:00:00 left 
 quantization error: 4.356806900798432
Train Accuracy for Winner-Takes-All approach: 0.9242222222222222
Test Accuracy for Winner-Takes-All approach: 0.911
Cluster Labels: [[5. 5. 4. 7. 8. 0. 5. 6. 5. 3. 3. 1. 7. 1. 1. 1. 1. 2. 1. 1. 7. 9. 5. 9.
  9. 9. 7. 7. 7. 7.]
 [5. 5. 8. 8. 2. 0. 6. 6. 6. 0. 6. 1. 1. 1. 1. 1. 1. 1. 1. 5. 7. 9. 5. 8.
  9. 7. 7. 7. 4. 9.]
 [5. 0. 4. 2. 4. 5. 5. 5. 5. 5. 6. 6. 8. 1. 1. 1. 1. 4. 3. 7. 7. 7. 5. 5.
  5. 7. 9. 9. 9. 9.]
 [2. 2. 2. 2. 6. 5. 5. 5. 5. 3. 6. 6. 6. 3. 3. 8. 4. 8. 3. 9. 7. 5. 5. 5.
  5. 9. 9. 9. 4. 9.]
 [2. 2. 2. 2. 3. 5. 5. 5. 5. 6. 6. 6. 6. 2. 2. 2. 2. 3. 3. 3. 5. 8. 5. 5.
  8. 9. 9. 9. 4. 4.]
 [2. 2. 2. 2. 3. 3. 5. 5. 1. 6. 6. 6. 2. 2. 2. 2. 2. 5. 3. 3. 8. 8. 8. 7.
  7. 7. 7. 7. 9. 9.]
 [2. 2. 2. 2. 2. 2. 0. 0. 5. 6. 6. 6. 2. 2. 2. 2. 2. 8. 8. 8. 8. 8. 8. 8.
  7. 7. 7. 7. 7. 7.]
 [2. 2. 2. 2. 2. 0. 0. 0. 0. 0. 5. 3. 3. 3. 0. 0. 0. 0. 0. 9. 5. 8. 8. 8.
  7. 7. 7. 7. 7. 7.]
 [3. 8. 2. 2. 0. 0. 0. 0. 0

In [114]:
#winner takes all approach 10 clusters 
# Load the training and test data
training_data = TrainData
test_data = TestData
training_labels = TrainLabels
test_labels = TestLabels

# Flatten the training and test data and normalize it:
training_data = training_data.reshape(training_data.shape[0], -1) / 255.
test_data = test_data.reshape(test_data.shape[0], -1) / 255.

# Define the parameters for the SOM:
input_len = 784  # number of features
classes = 10     # number of classes
som_size = 10    # size of the SOM
sigma = 1.0      # neighborhood radius
learning_rate = 0.2

# Implement the Winner-Takes-All approach:
# Create a SOM with one neuron for each class
som = MiniSom(som_size, som_size, input_len, sigma=sigma, learning_rate=learning_rate, 
              neighborhood_function='gaussian', random_seed=0)
som.pca_weights_init(training_data)
som.train_batch(training_data, 100000, verbose=True)

# Determine the number of images of each class that are placed in each cluster
labels_map = som.labels_map(training_data, training_labels)
class_counts = np.zeros((som_size, som_size, classes))
for index, label in np.ndenumerate(training_labels):
    i, j = som.winner(training_data[index])
    class_counts[i, j, label] += 1

# Determine the label for each cluster
class_labels = np.zeros((som_size, som_size))
for i in range(som_size):
    for j in range(som_size):
        class_labels[i, j] = np.argmax(class_counts[i, j])
# Determine the final class labels
class_labels_final = np.zeros((classes,))
for i in range(som_size):
    for j in range(som_size):
        class_labels_final[int(class_labels[i, j])] = np.argmax(class_counts[i, j])
# Compute the train accuracy
train_labels_pred = np.zeros(len(training_labels))
for i in range(len(training_data)):
    closest_neuron_dist, closest_neuron = som.winner(training_data[i])
    train_labels_pred[i] = class_labels_final[closest_neuron]
train_accuracy = accuracy_score(training_labels, train_labels_pred)
print('Train accuracy:', train_accuracy)


# Compute the test accuracy
test_labels_pred = np.zeros(len(test_labels))
for i in range(len(test_data)):
    closest_neuron_dist, closest_neuron = som.winner(test_data[i])
    test_labels_pred[i] = class_labels_final[closest_neuron]
test_accuracy = accuracy_score(test_labels, test_labels_pred)
print('Test accuracy:', test_accuracy)

# Compute the Euclidean distances between each training sample and each neuron in the SOM
weights = som.get_weights()
distances = np.zeros((len(training_data), som_size * som_size))
for i in range(len(training_data)):
    for j in range(som_size * som_size):
        distances[i, j] = np.linalg.norm(training_data[i] - weights[j // som_size, j % som_size])

        
# Assign each training sample to the neuron it is closest to
assignments = np.argmin(distances, axis=1)


# Compute the DBI for the SOM
dbi_wta = davies_bouldin_score(training_data, assignments)
print('DBI for Winner-Takes-All approach:', dbi_wta)

print("Cluster Labels:",class_labels)



 [ 100000 / 100000 ] 100% - 0:00:00 left 
 quantization error: 5.4095059518250626
Train accuracy: 0.13627777777777778
Test accuracy: 0.137
DBI for Winner-Takes-All approach: 2.901465773817106
Cluster Labels: [[0. 0. 8. 3. 3. 5. 1. 7. 4. 4.]
 [0. 0. 0. 6. 0. 6. 1. 7. 4. 4.]
 [0. 0. 0. 5. 3. 2. 1. 5. 5. 5.]
 [0. 3. 3. 5. 5. 3. 1. 8. 9. 7.]
 [5. 3. 3. 3. 3. 2. 1. 5. 4. 7.]
 [5. 3. 5. 5. 6. 2. 1. 3. 4. 7.]
 [8. 8. 8. 3. 6. 6. 6. 2. 7. 4.]
 [2. 9. 4. 2. 2. 6. 4. 4. 4. 9.]
 [9. 4. 4. 6. 2. 2. 5. 9. 4. 9.]
 [8. 8. 6. 0. 2. 2. 2. 4. 4. 4.]]


In [110]:
#one center on srround approach 
# Load the training and test data
training_data = TrainData
test_data = TestData
training_labels = TrainLabels
test_labels = TestLabels

# Reshape and normalize the data
# Flatten the training and test data and normalize it:
training_data = training_data.reshape(training_data.shape[0], -1) / 255.
test_data = test_data.reshape(test_data.shape[0], -1) / 255.


# Define the SOM
som_shape = (20, 20)
som = MiniSom(som_shape[0], som_shape[1], 784, sigma=1.0, learning_rate=0.5)

# Train the SOM on the training data
som.train_random(data=training_data, num_iteration=10000, verbose=True)

# Assign each data point in the training set to its closest neuron in the SOM
train_clusters = np.zeros((20, 20, 10), dtype=int)
for i in range(len(training_data)):
    x = training_data[i]
    winner = som.winner(x)
    label = training_labels[i]
    if label < 10:
        train_clusters[winner[0], winner[1], label] += 1


# Specify the label for each cluster
cluster_labels = np.zeros((20, 20), dtype=int)
for i in range(20):
    for j in range(20):
        cluster_labels[i, j] = np.argmax(np.sum(train_clusters[i, j], axis=0))

# Compute the Davies-Bouldin Index
# train_predictions = [cluster_labels[som.winner(x)] for x in training_data]
# train_dbi = davies_bouldin_score(training_data, train_predictions)

# Get the train data accuracy
train_predictions = [cluster_labels[som.winner(x)] for x in training_data]
train_accuracy = np.mean(train_predictions == training_labels)

# Get the test data accuracy
test_predictions = [cluster_labels[som.winner(x)] for x in test_data]
test_accuracy = np.mean(test_predictions == test_labels)

# Print out the results
print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)
print("Cluster Labels:")
print(cluster_labels)


 [ 10000 / 10000 ] 100% - 0:00:00 left 
 quantization error: 5.339722446266484
Train Accuracy: 0.1
Test Accuracy: 0.1
Cluster Labels:
[[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]


In [116]:
# Import necessary libraries
import numpy as np
from minisom import MiniSom
from sklearn.metrics import davies_bouldin_score

# Load the training and test data
training_data = TrainData
test_data = TestData
training_labels = TrainLabels
test_labels = TestLabels

# Reshape and normalize the data
training_data = training_data.reshape(training_data.shape[0], -1) / 255.
test_data = test_data.reshape(test_data.shape[0], -1) / 255.

# Define the SOM
som_shape = (20, 20)
som = MiniSom(som_shape[0], som_shape[1], 784, sigma=1.0, learning_rate=0.5)

# Train the SOM on the training data
som.train_random(data=training_data, num_iteration=10000, verbose=True)

# Assign each data point in the training set to its closest neuron in the SOM
train_clusters = np.zeros((20, 20, 10), dtype=int)
for i in range(len(training_data)):
    x = training_data[i]
    winner = som.winner(x)
    label = training_labels[i]
    if label < 10:
        train_clusters[winner[0], winner[1], label] += 1

# Specify the label for each cluster
cluster_labels = np.zeros((20, 20), dtype=int)
for i in range(20):
    for j in range(20):
        cluster_labels[i, j] = np.argmax(np.sum(train_clusters[i, j], axis=0))

# Compute the Davies-Bouldin Index
labels_map = som.labels_map(training_data, training_labels)
dbi_ocos = davies_bouldin_score(training_data, np.array([cluster_labels[x] for x in labels_map]))

# Get the train data accuracy
train_predictions = [cluster_labels[som.winner(x)] for x in training_data]
train_accuracy = np.mean(train_predictions == training_labels)

# Get the test data accuracy
test_predictions = [cluster_labels[som.winner(x)] for x in test_data]
test_accuracy = np.mean(test_predictions == test_labels)

# Print out the results
print("Train Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)
print("DBI Score:", dbi_ocos)
print("Cluster Labels:")
print(cluster_labels)


 [ 10000 / 10000 ] 100% - 0:00:00 left 
 quantization error: 5.318760437623651


ValueError: Found input variables with inconsistent numbers of samples: [18000, 288]

In [42]:
# def compute_dbi(data, labels, neuron_labels):
#     n_clusters = len(np.unique(neuron_labels))
#     cluster_centers = np.array([np.mean(data[labels==i], axis=0) for i in range(n_clusters)])
#     s = np.zeros(n_clusters)
#     for i in range(n_clusters):
#         s[i] = np.mean([np.linalg.norm(data[j]-cluster_centers[i]) for j in np.where(neuron_labels==i)[0]])
#     dbi = 0.0
#     for i in range(n_clusters):
#         max_val = 0.0
#         for j in range(n_clusters):
#             if i != j:
#                 val = (s[i] + s[j]) / np.linalg.norm(cluster_centers[i]-cluster_centers[j])
#                 if val > max_val:
#                     max_val = val
#         dbi += max_val
#     dbi /= n_clusters
#     return dbi


In [50]:
# training_data = TrainData
# test_data = TestData
# training_labels = TrainLabels
# test_labels = TestLabels

# # Reshape the data to 1D array
# training_data = np.reshape(train_data, (train_data.shape[0], -1))
# test_data = np.reshape(test_data, (test_data.shape[0], -1))

# # Normalize the data to [0, 1]
# training_data = train_data / 255.
# test_data = test_data / 255.
# test_labels.shape

In [44]:
print()




In [45]:
# # Load the data
# training_data = TrainData
# test_data = TestData
# training_labels = TrainLabels
# test_labels = TestLabels
# print(type(test_data))
# # Reshape the training and test data to have 28x28 dimensions
# training_data_reshaped = training_data.reshape(-1, 784)
# test_data_reshaped = test_data.reshape(-1, 784)

# # Initialize the SOM
# som = MiniSom(x=30, y=30, input_len=784, sigma=1.0, learning_rate=0.5, neighborhood_function='gaussian', random_seed=42)

# # Train the SOM
# som.train(data=training_data_reshaped, num_iteration=100000, verbose=True)

# # Get the winning neuron for each data point in both training and test data
# winning_neurons_train = np.zeros((training_data_reshaped.shape[0], 2))
# for idx, data_point in enumerate(training_data_reshaped):
#     winning_neurons_train[idx] = som.winner(data_point)
    
# winning_neurons_test = np.zeros((test_data_reshaped.shape[0], 2))
# for idx, data_point in enumerate(test_data_reshaped):
#     winning_neurons_test[idx] = som.winner(data_point)

# # Create a dictionary to hold the counts of each class in each cluster
# cluster_counts = {}

# # Iterate over each data point and its corresponding winning neuron in the training data
# for idx, winner in enumerate(winning_neurons_train):
#     # Get the class label for this data point
#     label = training_labels[idx]

#     # Convert the winning neuron indices to a tuple
#     winner_tuple = tuple(winner.astype(int))

#     # Add the class label to the appropriate cluster in the dictionary
#     if winner_tuple not in cluster_counts:
#         cluster_counts[winner_tuple] = {}
#     if label not in cluster_counts[winner_tuple]:
#         cluster_counts[winner_tuple][label] = 0
#     cluster_counts[winner_tuple][label] += 1

# # Print the counts for each cluster in the training data
# for cluster, counts in cluster_counts.items():
#     print('Training Cluster {}: {}'.format(cluster, counts))

# # Get the predicted labels for the test data
# predicted_labels = []
# for winner in winning_neurons_test:
#     winner_tuple = tuple(winner.astype(int))
#     if winner_tuple in cluster_counts:
#         cluster_count = cluster_counts[winner_tuple]
#         predicted_label = max(cluster_count, key=cluster_count.get)
#     else:
#         predicted_label = np.random.choice(training_labels)
#     predicted_labels.append(predicted_label)

# # Calculate the test accuracy
# test_accuracy = accuracy_score(test_labels, predicted_labels)
# print('Test accuracy:', test_accuracy)

# # Calculate the DBI score
# dbi_score = davies_bouldin_score(test_data_reshaped, predicted_labels)
# print('DBI score:', dbi_score)


In [None]:
# training_data = TrainData
# test_data = TestData
# training_labels = TrainLabels
# test_labels = TestLabels

In [115]:
# # Load the training and test data
# training_data = TrainData
# test_data = TestData
# training_labels = TrainLabels
# test_labels = TestLabels

# # Flatten the training and test data and normalize it:
# training_data = training_data.reshape(training_data.shape[0], -1) / 255.
# test_data = test_data.reshape(test_data.shape[0], -1) / 255.

# # Define the parameters for the SOM:
# input_len = 784  # number of features
# classes = 10     # number of classes
# som_size = 30    # size of the SOM
# sigma = 1.0      # neighborhood radius
# learning_rate = 0.5

# # Implement the Winner-Takes-All approach:
# # Create a SOM with one neuron for each class
# som = MiniSom(som_size, som_size, input_len, sigma=sigma, learning_rate=learning_rate, 
#               neighborhood_function='gaussian', random_seed=0)
# som.pca_weights_init(training_data)
# som.train_batch(training_data, 10000, verbose=True)

# # Determine the number of images of each class that are placed in each cluster
# labels_map = som.labels_map(training_data, training_labels)
# class_counts = np.zeros((som_size, som_size, classes))
# for index, label in np.ndenumerate(training_labels):
#     i, j = som.winner(training_data[index])
#     class_counts[i, j, label] += 1

# # Determine the label for each cluster
# class_labels = np.zeros((som_size, som_size))
# for i in range(som_size):
#     for j in range(som_size):
#         class_labels[i, j] = np.argmax(class_counts[i, j])

# # Compute the DBI evaluation criteria on the clustering result
# labels_map = som.labels_map(training_data, training_labels)
# # dbi_wta = davies_bouldin_score(training_data, np.array([class_labels[x] for x in labels_map]))
# dbi_wta = davies_bouldin_score(training_data, training_labels)

# print('DBI for Winner-Takes-All approach:', dbi_wta)

# # Compute the test data accuracy
# test_labels_pred = [np.argmax(class_counts[som.winner(x)]) for x in test_data]
# accuracy_wta = np.mean(np.equal(test_labels, test_labels_pred))
# print('Accuracy for Winner-Takes-All approach:', accuracy_wta)

# print("Cluster Labels:",cluster_labels)
# print()

In [39]:
# # Load the training and test data
# training_data = TrainData
# test_data = TestData
# training_labels = TrainLabels
# test_labels = TestLabels

# # Flatten the training and test data and normalize it:
# training_data = training_data.reshape(training_data.shape[0], -1) / 255.
# test_data = test_data.reshape(test_data.shape[0], -1) / 255.

# # Define the parameters for the SOM:
# input_len = 784  # number of features
# classes = 10     # number of classes
# sigma = 1.0      # neighborhood radius
# learning_rate = 0.5

# # Implement the On-Center, Off-Surround approach
# som_rect1 = MiniSom(20, 20, input_len, sigma=sigma, learning_rate=learning_rate, 
#                     neighborhood_function='gaussian', random_seed=0)
# som_rect1.pca_weights_init(training_data)
# som_rect1.train_batch(training_data, 10000, verbose=True)

# som_rect2 = MiniSom(30, 30, input_len, sigma=sigma, learning_rate=learning_rate, 
#                     neighborhood_function='gaussian', random_seed=0)
# som_rect2.pca_weights_init(training_data)
# som_rect2.train_batch(training_data, 10000, verbose=True)

# # Determine the number of images of each class that are placed in each cluster for both SOMs
# labels_map_rect1 = som_rect1.labels_map(training_data, training_labels)
# class_counts_rect1 = np.zeros((20, 20, classes))
# for index, label in np.ndenumerate(training_labels):
#     i, j = som_rect1.winner(training_data[index])
#     class_counts_rect1[i, j, label] += 1

# labels_map_rect2 = som_rect2.labels_map(training_data, training_labels)
# class_counts_rect2 = np.zeros((30, 30, classes))
# for index, label in np.ndenumerate(training_labels):
#     i, j = som_rect2.winner(training_data[index])
#     class_counts_rect2[i, j, label] += 1

# # Determine the label for each cluster for both SOMs
# class_labels_rect1 = np.zeros((20, 20))
# for i in range(20):
#     for j in range(20):
#         class_labels_rect1[i, j] = np.argmax(class_counts_rect1[i, j])

# class_labels_rect2 = np.zeros((30, 30))
# for i in range(30):
#     for j in range(30):
#         class_labels_rect2[i, j] = np.argmax(class_counts_rect2[i, j])

# # Compute the DBI evaluation criteria on the clustering result for both SOMs
# # DBI for SOM with 20x20 neurons
# dbi_rect1 = davies_bouldin_score(training_data, np.array([class_labels_rect1[x] for x in labels_map_rect1]))

# # DBI for SOM with 30x30 neurons
# dbi_rect2 = davies_bouldin_score(training_data, np.array([class_labels_rect2[x] for x in labels_map_rect2]))

# print('DBI for On-Center, Off-Surround approach (20x20 neurons):', dbi_rect1)
# print('DBI for On-Center, Off-Surround approach (30x30 neurons):', dbi_rect2)


# # Compute the test data accuracy for both SOMs
# test_labels_pred_rect1 = [np.argmax(class_counts_rect1[som_rect1.winner(x)]) for x in test_data]
# accuracy_rect1 = np.mean(np.equal(test_labels, test_labels_pred_rect1))
# test_labels_pred_rect2 = [np.argmax(class_counts_rect2[som_rect2.winner(x)]) for x in test_data]
# accuracy_rect2 = np.mean(np.equal(test_labels, test_labels_pred_rect2))

# print('Accuracy for On-Center, Off-Surround approach (20x20 neurons):', accuracy_rect1)
# print('Accuracy for On-Center, Off-Surround approach (30x30 neurons):', accuracy_rect2)



In [111]:
# from sklearn.metrics import davies_bouldin_score

# # Get the SOM predictions for the training data
# train_predictions = [cluster_labels[som.winner(x)] for x in training_data]

# # Check if SOM has learned more than one cluster
# unique_labels = np.unique(train_predictions)
# if len(unique_labels) > 1:
#     # Compute the Davies-Bouldin Index
#     train_dbi = davies_bouldin_score(training_data, train_predictions)
# else:
#     train_dbi = float('nan')
    
# # Get the test data accuracy
# test_predictions = [cluster_labels[som.winner(x)] for x in test_data]
# test_accuracy = np.mean(test_predictions == test_labels)

# # Print out the results
# print("Train DBI:", train_dbi)
# print("Test Accuracy:", test_accuracy)
# print("Cluster Labels:")
# print(cluster_labels)
