In [1]:
import numpy as np
from sklearn.metrics import euclidean_distances
import pandas as pd

import sys
sys.path.insert(0, '../SOM-LVQ')
import minisom
import LVQ

In [2]:
data = pd.read_csv(r"../data/SD-2X_rocktype.csv")
x = data.iloc[:, 0:6].values
y = data.iloc[:, 6].values
y[y==4] = 0
from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.1, random_state=44)

# use Minmaxscaler because we use euclidean distance
from sklearn.preprocessing import MinMaxScaler
minmax = MinMaxScaler()
x_train = minmax.fit_transform(x_train)
x_test = minmax.transform(x_test)



In [3]:
# Training the SOM
from minisom import MiniSom
som = MiniSom(x = 15, y = 15, input_len = 6, sigma = 1.0, learning_rate = 0.5)
som.random_weights_init(x_train)
som.train_random(data = x_train, num_iteration = 100)

In [4]:
labels = np.zeros((4, 15, 15))
propa = np.zeros((4, 15, 15))

In [5]:
# Visualizing the results
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['v', 's', 'o', '4']
colors = ['r', 'g', 'b', 'y']
for i, x in enumerate(x_train):
    w = som.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y_train[i]],
         markeredgecolor = colors[y_train[i]],
         markerfacecolor = 'None',
         markersize = 10,
         markeredgewidth = 2)
    if y_train[i] == 0:
        labels[0, w[0], w[1]] += 1
    elif y_train[i] == 1:
        labels[1, w[0], w[1]] += 1
    elif y_train[i] == 2:
        labels[2, w[0], w[1]] += 1
    elif y_train[i] == 3:
        labels[3, w[0], w[1]] += 1
show()

<Figure size 640x480 with 2 Axes>

In [6]:
for i in range(4):
    for j in range(15):
        for k in range(15):
            total = labels[0, j, k] + labels[1, j, k] + labels[2, j, k] + labels[3, j, k]
            if total == 0. :
                continue
            else:
                propa[i, j, k] = labels[i, j, k] / total
                propa[i, j, k] = round(propa[i, j, k], 2)

In [7]:
propa

array([[[1.  , 1.  , 1.  , 0.  , 0.67, 0.  , 1.  , 0.  , 0.  , 0.  ,
         0.  , 0.  , 0.  , 0.  , 1.  ],
        [1.  , 1.  , 0.95, 0.  , 0.  , 0.  , 1.  , 0.67, 0.  , 0.  ,
         0.  , 0.  , 0.  , 0.  , 0.  ],
        [1.  , 0.  , 0.  , 0.  , 0.  , 1.  , 1.  , 1.  , 0.  , 0.  ,
         0.  , 0.  , 0.  , 0.  , 0.02],
        [0.  , 0.  , 0.  , 0.  , 0.  , 0.99, 0.  , 0.  , 0.  , 0.  ,
         0.  , 0.  , 0.  , 0.  , 0.16],
        [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.75, 0.12, 0.  ,
         0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.  , 0.  , 0.  , 0.19, 1.  , 0.  , 0.  , 1.  ,
         0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.22, 0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.8 , 0.67, 1.  ,
         0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
         0.  , 0.  , 0.  , 0.  , 0.  ],
        [0.33, 0.  , 0.8 , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ,
         1.  , 0.  , 0.  , 0.  , 0.  ],
        [0.  , 1.  

In [8]:
p_vectors = som.weights

In [9]:
taggings = np.zeros((15, 15))
for i in range(15):
    for j in range(15):
        tmp = np.array([labels[0][i][j], labels[1][i][j], labels[2][i][j], labels[3][i][j]])
        sort = tmp.argsort()
        if (tmp[sort[3]] <= tmp[sort[2]] * 5): # must be improve
            taggings[i][j] = np.random.choice([sort[3], sort[2]])
        else:
            taggings[i][j] = sort[3]

In [10]:
def find_closest(in_vector, proto_vectors):
    """
    Find the closest prototype vector for a given vector

    Parameters
    -------
    in_vector: the given vector
    proto_vectors: the set of prototype vectors
    """
    closest_distance = 99999
    for i in range(15):
        for j in range(15):
#             if (labels[0, i, j] == 0 and labels[1, i, j] == 0 and labels[2, i, j] == 0 and labels[3, i, j] == 0):
#                 continue
#             else:
            distance = np.linalg.norm(in_vector - proto_vectors[i][j])
            if distance < closest_distance:
                closest_distance = distance
                closest = (i, j)         
    return closest

In [11]:
def predict(test_vector, p, taggings):
    position = find_closest(test_vector, p)
    return taggings[position[0], position[1]]

In [12]:
predicted_y1 = [predict(instance, p_vectors, taggings) for instance in x_test]

from sklearn.metrics import classification_report

print (classification_report(y_test, predicted_y1, target_names=['0','1', '2', '3']))

             precision    recall  f1-score   support

          0       0.92      0.91      0.92        54
          1       0.94      0.70      0.80        23
          2       0.31      0.62      0.42         8
          3       0.83      0.81      0.82        47

avg / total       0.86      0.82      0.83       132



In [13]:
data_new = pd.read_csv(r"../data/SD-3X_rocktype.csv")
x_new = data_new.iloc[:, 0:6].values
y_new = data_new.iloc[:, 6].values
y_new[y_new == 4] = 0
x_new = minmax.fit_transform(x_new)
predicted_y_new = [predict(instance, p_vectors, taggings) for instance in x_new]

from sklearn.metrics import classification_report

print (classification_report(y_new, predicted_y_new, target_names=['0', '1', '2', '3']))

             precision    recall  f1-score   support

          0       0.83      0.95      0.88       331
          1       0.93      0.27      0.42        95
          2       0.09      0.35      0.14        26
          3       0.68      0.48      0.56       205

avg / total       0.77      0.68      0.69       657



In [14]:
mappings = som.win_map(x_train)

In [15]:
mappings[(0,4)]

[array([0.40058981, 0.5017321 , 0.53805576, 0.64099659, 0.3959    ,
        0.30937774]),
 array([0.42148027, 0.55985373, 0.58477769, 0.66268837, 0.438     ,
        0.23984809]),
 array([0.39018151, 0.54445727, 0.57874906, 0.71453379, 0.3759    ,
        0.26877008])]