In [1]:
# http://mnemstudio.org/neural-networks-kohonen-self-organizing-maps.htm
# https://www.saedsayad.com/clustering_som.htm
# Packages and libraries used in this program
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [2]:
# Load dataset with labels
raw_data = np.load('Alphabets.npy').astype(float)
labels = np.load('Alphabet_labels.npy')
alphabets =  np.array(['C','I','O','P','S','U','X','Z'])

In [3]:
# Number of neurons (1-dimensional)
M = 625

# Dimension of the input patterns
N = raw_data.shape[1]

# Total number of input patterns
P = raw_data.shape[0]

learning_rate = 0.6

R = 0

MAX_ITERATIONS = 100

MAX_WEIGHT_DIFF = 0.0001

DECAY_FACTOR = 0.96

RADIUS_REDUCTION_STEP = 20

In [4]:
# Normalize input vectors
sc = MinMaxScaler(feature_range = (0, 1))
data = sc.fit_transform(raw_data)

In [5]:
# Step 1: Initialization of each node’s weights with a random number between 0 and 1
weight = np.random.rand(N,M)
print("Initial weights:")
print(weight)

initial_weight = weight
last_weight = weight

for iteration in range(MAX_ITERATIONS):
    print("\r\nIteration:", iteration)
    
    # Step 2: Choosing a random input patterns ordering
    random_ordering = np.arange(P)
    np.random.shuffle(random_ordering)
    print("Random input patterns ordering:", random_ordering)

    # For each input pattern do the steps 3-5
    for p in random_ordering:
        
        print("Pattern index:", p)
        
        # Step 3: Calculating the Best Matching Unit (BMU)

        # initialize distance vector
        distance_vector = np.zeros(M)
    
        # calculate distance of each weight from each input pattern
        for j in range(M):
            for i in range(N):
                distance_vector[j] = distance_vector[j] + (weight[i,j] - data[p,i])**2
                
        #print("distance_vector", distance_vector)

        # Step 4: find index j such that distance_vector[j] is a minimum
        min_distance_index = np.argmin(distance_vector)
        
        #print("min_distance_index", min_distance_index)
        
        # Step 5: Update weights for all units j within a specified neighberhood of min_distance_index and for all i
        # calculate neighborhood borders
        begin_j = min_distance_index - R
        if (begin_j < 0):
            begin_j = 0
        
        end_j = min_distance_index + R
        if (end_j > M):
            end_j = M
            
        #print("neighberhood", begin_j, end_j)
        
        for j in range(begin_j, end_j):
            for i in range(N):
                weight[i,j] = weight[i,j] + learning_rate * (data[p,i] - weight[i,j])
    
    
    print("Updated weights:")
    print(weight)


    # Step 6: Update learning rate
    learning_rate = DECAY_FACTOR * learning_rate
    print("Learning rate:", learning_rate)
    
    # Step 7: Reduce radius of topological neighborhood at specified times
    if (iteration % RADIUS_REDUCTION_STEP):
        if R > 0 :
            R = R - 1
        print("Neighborhood radius:", R)
    
    
    # Step 8: Test stopping condition
    weight_diff = np.amax(np.abs(weight - last_weight))
    
    print("weight_diff:", weight_diff)
    
    if (weight_diff < MAX_WEIGHT_DIFF):
        break
        
    last_weight = weight
    # end of for loop

Initial weights:
[[0.00426465 0.87654383 0.72063779 ... 0.08479562 0.98137279 0.58256539]
 [0.2310008  0.16707614 0.02830018 ... 0.68509223 0.84196684 0.6015581 ]
 [0.15090281 0.39393003 0.16255975 ... 0.50041238 0.45643967 0.46651945]
 ...
 [0.63670362 0.91570931 0.6866171  ... 0.13792174 0.70198117 0.82011448]
 [0.06034284 0.86855464 0.03897398 ... 0.55758048 0.64184294 0.42296552]
 [0.51782651 0.08693629 0.92598476 ... 0.86402027 0.71851123 0.95541586]]

Iteration: 0
Random input patterns ordering: [284 408 260 480 358 137  22  98 465 285 272 350 254 419 131 140  81 440
  27 332 314 387  14  43 159 472 401 212 414 240 484 177 232 172 195  25
 277 360 499  36 239 470 481  59 266 439 382 287 153 348  70 402 245 392
 116 371 251  41 182 474 217 222  88 453 346 400 313  56 393 433 286 167
 323 416 193 201 452 133 215 411 331 238  92 276   0 450 369 365 318 492
  84 298 469  44 213  87 340 164 444 275 351  55   6   5 119 432 250 320
 125 165 214  31  18 363  86 375 173 227  61 136 122 36

KeyboardInterrupt: 

In [None]:
print(weight.shape)

In [None]:
clustered_patterns = np.zeros((P, N+1))
original_data = sc.inverse_transform(data)

for p in range(P):

    distance_vector = np.zeros(M)

    # calculate distance of each weight from each input pattern
    for j in range(M):
        for i in range(N):
            distance_vector[j] = distance_vector[j] + (weight[i,j] - data[p,i])**2

    # find index j such that distance_vector[j] is a minimum
    min_distance_index = np.argmin(distance_vector)
    
    clustered_patterns[p,0:N] = original_data[p]
    clustered_patterns[p,N] = min_distance_index

In [None]:
for p in range(P):
    plt.imshow(clustered_patterns[p,0:N].reshape(28,28))
    #print(clustered_patterns[p,0:N])
    print(clustered_patterns[p,N])
    plt.show()

#print(clustered_inputs)