In [11]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import eig
import pandas as pd
from minisom import MiniSom  
import math
import ipynb
from sklearn.metrics import classification_report
%matplotlib inline

In [12]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

### Define functions

This function flattens and reshapes our data so that we can feed it to our SOM for processing

In [13]:
def flatten_and_reshape(data):
    res = np.reshape(data, (data.shape[0], data.shape[1]*data.shape[2]))
    return(res)

This function extracts the egine vector values from the original data

In [14]:
def extract_egine_vectors(data):
    extracted_vals = []
    for each_image in data:
        w,v=eig(each_image)
        extracted_vals.append(
            v
        )
    extracted_vals = np.array(extracted_vals)
    return extracted_vals

This function evaluates our SOM against our test and train data

In [15]:
def classify(som, x_test, x_train, y_train):
    winmap = som.labels_map(x_train, y_train)
    default_class = np.sum(list(winmap.values())).most_common()[0][0]
    result = []
    for d in x_test:
        win_position = som.winner(d)
        if win_position in winmap:
            result.append(winmap[win_position].most_common()[0][0])
        else:
            result.append(default_class)
    return result

### Create our features

First were going to look at egine values

In [8]:
x_test_eig = extract_egine_vectors(x_test)
x_train_eig = extract_egine_vectors(x_train)
x_test_eig = flatten_and_reshape(x_test_eig)
x_train_eig = flatten_and_reshape(x_train_eig)

In [81]:
np.set_printoptions(formatter={'float': '{: 0.100f}'.format})
#print(x_test_eig.real)

Now were going to look at basic pixle values

In [16]:
x_test = flatten_and_reshape(x_test)
x_train = flatten_and_reshape(x_train)

**Now we run a SOM with each of these datasets to get an idea of how well it performs**

Create SOM workflow, I'm just going to do a function

OK so I'm thinking of using the basic rule of X to determine the dimensions of the SOM. it uses the N_featues to create it. 

According to the MIN SOM package "A rule of thumb to set the size of the grid for a dimensionality reduction task is that it should contain 5*sqrt(N) neurons where N is the number of samples in the dataset to analyze"

In [23]:
def create_train_som(x_train, n_features):
    
    # Create SOM dimensions
    som_nurons = int((math.sqrt(5*math.sqrt(n_features))))
    print(som_nurons)
    x = som_nurons
    y = som_nurons
    
    #Create and train SOM
    som = MiniSom(x, y, n_features, sigma=0.3, learning_rate=0.5) # initialization of 6x6 SOM
    som.random_weights_init(x_train)
#     print("Training...")
    som.train_random(x_train,100, verbose=True) # training with 100 iterations
#     print("...ready!")
    return som

In [114]:
som_nurons = int((math.sqrt(5*math.sqrt(10))))

In [19]:
my_first_som = create_train_som(x_train[0:1000], n_features = 784)

11
Training...
 [   0 / 100 ]   0% - ? it/s [   0 / 100 ]   0% - ? it/s [   1 / 100 ]   1% - 0:00:00 left  [   2 / 100 ]   2% - 0:00:00 left  [   3 / 100 ]   3% - 0:00:00 left  [   4 / 100 ]   4% - 0:00:00 left  [   5 / 100 ]   5% - 0:00:00 left  [   6 / 100 ]   6% - 0:00:00 left  [   7 / 100 ]   7% - 0:00:00 left  [   8 / 100 ]   8% - 0:00:00 left  [   9 / 100 ]   9% - 0:00:00 left  [  10 / 100 ]  10% - 0:00:00 left  [  11 / 100 ]  11% - 0:00:00 left  [  12 / 100 ]  12% - 0:00:00 left  [  13 / 100 ]  13% - 0:00:00 left  [  14 / 100 ]  14% - 0:00:00 left  [  15 / 100 ]  15% - 0:00:00 left  [  16 / 100 ]  16% - 0:00:00 left  [  17 / 100 ]  17% - 0:00:00 left  [  18 / 100 ]  18% - 0:00:00 left  [  19 / 100 ]  19% - 0:00:00 left  [  20 / 100 ]  20% - 0:00:00 left  [  21 / 100 ]  21% - 0:00:00 left  [  22 / 100 ]  22% - 0:00:00 left  [  23 / 100 ]  23% - 0:00:00 left  [  24 / 100 ]  24% - 0:00:00 left  [  25 / 100 ]  25% - 0:00:00 left  [  26 / 100 ]  26% - 0:00

In [22]:
winmap = my_first_som.labels_map(x_train, y_train)
winmap

defaultdict(list,
            {(4, 9): Counter({5: 473, 3: 36, 0: 6, 6: 7, 2: 6, 9: 2, 8: 3}),
             (1, 5): Counter({0: 1214, 2: 4, 5: 2, 6: 5, 3: 4, 9: 1}),
             (4,
              2): Counter({4: 710,
                      9: 451,
                      7: 153,
                      5: 35,
                      8: 7,
                      3: 7,
                      2: 15,
                      6: 20,
                      0: 1}),
             (5,
              0): Counter({1: 591,
                      7: 6,
                      3: 1,
                      4: 7,
                      8: 13,
                      9: 2,
                      2: 2,
                      6: 7,
                      5: 3}),
             (10,
              2): Counter({9: 1258,
                      4: 269,
                      3: 15,
                      7: 163,
                      5: 104,
                      8: 31,
                      1: 1}),
             (8,
              7): Cou

In [12]:
my_second_som = create_train_som(x_test_eig.real)

Training...
 [ 100 / 100 ] 100% - 0:00:00 left 
 quantization error: 4.099520833291314
...ready!


How will it work:

1st step is to train your som on your training data. 
Each SOM should be trained on this same training data if we are to combine them

the number of training examples essentially needs to be the same so when I loop and create the sampling layer we're all G

so I'll pass the trained soms and their training data then loop through, grab the training example index and create a df that has the winning som INDEX at that point. 


what do we do if the training data is different? Like it has a different number of observations?




In [77]:
def create_sampling_layer(trained_somz, traing_dataz):
    # create empty output list
    int_output = []
    
    for n, som in enumerate(trained_somz):
        som_output = []
        training_data = traing_dataz[n]
        # find each SOM value 
        winning_pos = [som.winner(d) for d in training_data[0:10]]
        winning_int = 
        int_output.append(winning_pos)
        
    # Combine the winning SOM position for the given example.
    final_output = pd.DataFrame(int_output).transpose().to_numpy()
    return(final_output)

The below function takes the coordinants of the SOM and the size of the SOM to convert the coordinants to a single numerical value so that we can pass it to the next SOM.

In [124]:
def convert_coordinants(coordinants, som_y_size):
    x = coordinants[0]
    y = coordinants[1]
    num = x * som_y_size + y
    return(num)

Ok now the code below takes the results from our create sampling layer and converts them from coordinants to integer values using the function convert_coordiantnts above.

In [130]:
array1 = []
for obs in x :
    array = []
    for coordinant in obs:
        numeric_value = convert_coordinants(coordinant, 25)
        array.append(numeric_value)
    array1.append(array)

In [131]:
array1

[[618, 97],
 [140, 27],
 [555, 420],
 [150, 149],
 [249, 539],
 [345, 494],
 [605, 97],
 [618, 97],
 [350, 97],
 [271, 97]]

In [78]:
somz = [my_first_som, my_second_som]
dataz = [x_train, x_test_eig.real]

x = create_sampling_layer(somz, dataz)

In [98]:
#my_first_som.win_map(x_train[1:1000])

In [132]:
next_level_som = create_train_som(array1, n_features = 2)

2
Training...
 [   0 / 100 ]   0% - ? it/s [   0 / 100 ]   0% - ? it/s [   1 / 100 ]   1% - 0:00:00 left  [   2 / 100 ]   2% - 0:00:00 left  [   3 / 100 ]   3% - 0:00:00 left  [   4 / 100 ]   4% - 0:00:00 left  [   5 / 100 ]   5% - 0:00:00 left  [   6 / 100 ]   6% - 0:00:00 left  [   7 / 100 ]   7% - 0:00:00 left  [   8 / 100 ]   8% - 0:00:00 left  [   9 / 100 ]   9% - 0:00:00 left  [  10 / 100 ]  10% - 0:00:00 left  [  11 / 100 ]  11% - 0:00:00 left  [  12 / 100 ]  12% - 0:00:00 left  [  13 / 100 ]  13% - 0:00:00 left  [  14 / 100 ]  14% - 0:00:00 left  [  15 / 100 ]  15% - 0:00:00 left  [  16 / 100 ]  16% - 0:00:00 left  [  17 / 100 ]  17% - 0:00:00 left  [  18 / 100 ]  18% - 0:00:00 left  [  19 / 100 ]  19% - 0:00:00 left  [  20 / 100 ]  20% - 0:00:00 left  [  21 / 100 ]  21% - 0:00:00 left  [  22 / 100 ]  22% - 0:00:00 left  [  23 / 100 ]  23% - 0:00:00 left  [  24 / 100 ]  24% - 0:00:00 left  [  25 / 100 ]  25% - 0:00:00 left  [  26 / 100 ]  26% - 0:00:

ok so now I need to figure out how I'm going to convert the coordinants into a single value so I can pass it to the SOM. sigh there is quite a bit of effort here, I mean the other thing is I'm going to have to normalise this shit when I do pass it through, but that is something i can deal with later. I think I should just know how big the SOM is, so maybe when I get deep into development I will fork the repo but for now I will just manually enter it. So it'll all be part of a big ol function that I pass.

(10, 2)

(1, 2)

In [87]:
my_second_som = create_train_som(x_train_eig.real)

TypeError: create_train_som() missing 1 required positional argument: 'n_features'

In [None]:
# winning_pos = my_first_som.winner([result for result in x_train[1:10]])
x = [my_first_som.winner(d) for d in x_train]
# my_first_som.winner(x_train[1])

In [133]:
next_level_som

<minisom.MiniSom at 0x165e66198>

In [None]:
len(x_train)

In [None]:
print(classification_report(y_test, classify(my_second_som,  x_test_eig.real, x_train_eig.real, y_train)))

Finally we'll look at constructing a SOM using the output from the two above SOMs to see if it performs better

In [None]:
# We can use this to feed into the above SOM!!!
flatten_and_reshape(np.array(my_second_som.distance_map()))

In [62]:
extracted_vals

NameError: name 'extracted_vals' is not defined

In [40]:
x_train.shape

(60000, 784)

In [10]:
x_test[1]

array([  0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0, 116, 125, 171, 255, 255, 150,  93,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0, 169, 253, 253, 253, 253, 253, 253, 218,  30,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0, 169, 253, 253, 253, 213, 142, 176, 253,
       253, 122,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,  52, 250, 253, 210,  32,  1

In [None]:
def create_train_som(x_train):
    som = MiniSom(25, 25, 9, sigma=0.3, learning_rate=0.5) # initialization of 6x6 SOM
    som.random_weights_init(x_train)
    print("Training...")
    som.train_random(x_train,100, verbose=True) # training with 100 iterations
    print("...ready!")
    return som
