In [1]:
import numpy as np
from sklearn.metrics import euclidean_distances
import pandas as pd
import minisom
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold
import LVQ

In [2]:
data = pd.read_csv(r"../data/SVNE_Hieu.csv")
x = data.iloc[:, 0:7].values # try to retain the labels of x
y = data.iloc[:, 6].values
from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=20)

# use Minmaxscaler because we use euclidean distance
from sklearn.preprocessing import MinMaxScaler
minmax = MinMaxScaler()
x_train[:, 0:6] = minmax.fit_transform(x_train[:, 0:6] )
x_test[:, 0:6]  = minmax.transform(x_test[:, 0:6] )



In [3]:
# Training the SOM
from minisom import MiniSom
som = MiniSom(x = 10, y = 10, input_len = 6, sigma = 1.0, learning_rate = 0.5)
som.random_weights_init(x_train[:, 0:6])
som.train_random(data = x_train[:, 0:6], num_iteration = 100)

In [4]:
np.unique(y)

array([0, 1], dtype=int64)

In [5]:
labels = np.zeros((2, 10, 10))
labels

array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]

In [6]:
# Visualizing the results
from pylab import bone, pcolor, colorbar, plot, show
bone()
pcolor(som.distance_map().T)
colorbar()
markers = ['1', '2']
colors = ['r', 'g']
for i, x in enumerate(x_train[:, 0:6]):
    w = som.winner(x)
    plot(w[0] + 0.5,
         w[1] + 0.5,
         markers[y_train[i]],
         markeredgecolor = colors[y_train[i]],
         markerfacecolor = 'None',
         markersize = 10,
         markeredgewidth = 2)
    if y_train[i] == 0:
        labels[0, w[0], w[1]] += 1
    elif y_train[i] == 1:
        labels[1, w[0], w[1]] += 1
show()

<Figure size 640x480 with 2 Axes>

In [7]:
labels

array([[[30., 85., 37., 24., 43.,  0., 22.,  7.,  3.,  3.],
        [10., 17.,  8., 21., 17.,  3.,  2.,  0.,  9.,  7.],
        [18., 23.,  0.,  0.,  3.,  0.,  6.,  1.,  1.,  6.],
        [14.,  2.,  1.,  0.,  0.,  0.,  3.,  0.,  0.,  0.],
        [ 2.,  1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.],
        [ 3.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  2.,  4.],
        [ 5.,  0.,  2.,  0.,  2.,  0.,  3.,  0.,  2.,  0.],
        [ 0.,  0., 23.,  9.,  0.,  1.,  0.,  3.,  1.,  2.],
        [ 0.,  3.,  0.,  2.,  1.,  0.,  2.,  0.,  0.,  6.],
        [ 1.,  5.,  8.,  0.,  0.,  0.,  7.,  2.,  2., 16.]],

       [[ 0.,  0.,  1.,  2.,  2.,  0.,  6.,  8.,  7., 51.],
        [ 0.,  0.,  1.,  0.,  0.,  5.,  2.,  0.,  9., 37.],
        [ 1.,  1.,  0.,  0.,  0.,  0.,  5.,  2.,  2.,  9.],
        [ 0., 13., 15.,  3.,  1.,  3.,  1.,  0., 57.,  1.],
        [ 0.,  8., 40., 39., 12.,  1.,  3., 15.,  5.,  1.],
        [ 0.,  2.,  1., 11.,  0.,  2.,  3., 38.,  1.,  0.],
        [16.,  0.,  6.,  1.,  1.,  2.,

In [8]:
# parameter for LVQ
R = 50 # R is the # of initial prototype for each class
n_classes = 2
epsilon = 0.9
epsilon_dec_factor = 0.001

In [9]:
from random import randint
p_vectors = []
count_death = 0
for i in range(10):
    for j in range(10):
        if(labels[0, i, i] == 0 and labels[1, i, j] == 0):
#             p = LVQ.prototype(randint(0,1), som.weights[(i,j)], epsilon)
#             p_vectors.append(p)
            count_death = count_death + 1
        elif(labels[0, i, j] > 4 * labels[1, i, j]):
            p = LVQ.prototype(0, som.weights[(i,j)], epsilon)
            p_vectors.append(p)
        elif(4 * labels[0, i, j] < labels[1, i, j]):
            p = LVQ.prototype(1, som.weights[(i,j)], epsilon)
            p_vectors.append(p)
        else:
#             p = LVQ.prototype(randint(0,1), som.weights[(i,j)], epsilon)
#             p_vectors.append(p)
            count_death = count_death + 1
print("class id \t Initial prototype vector \n")
for p_v in p_vectors:
    print(p_v.class_id, '\t', p_v.p_vector)

class id 	 Initial prototype vector 

0 	 [0.20408381 0.22960001 0.45734529 0.25819779 0.19641066 0.76891201]
0 	 [0.21651889 0.20100191 0.43186353 0.26455732 0.14835851 0.79637358]
0 	 [0.28660437 0.1917542  0.38525171 0.38870594 0.19717371 0.73668148]
0 	 [0.32426019 0.22485942 0.38666373 0.46088653 0.29834715 0.62718204]
0 	 [0.37758395 0.25524921 0.3682054  0.51600858 0.41094458 0.47070446]
1 	 [0.25147858 0.33365082 0.40452325 0.5534917  0.59526976 0.03312754]
0 	 [0.28587851 0.2542753  0.45013259 0.33144308 0.29835302 0.6724055 ]
0 	 [0.24695997 0.25521794 0.46230403 0.28364618 0.25663166 0.71682257]
0 	 [0.28137983 0.23773819 0.42066249 0.34845339 0.25535046 0.70761953]
0 	 [0.338549   0.25513648 0.39989904 0.45005439 0.3364522  0.58704486]
0 	 [0.35247583 0.27795896 0.40208334 0.47348503 0.39111529 0.50957762]
1 	 [0.24574359 0.31681843 0.39728567 0.58502882 0.57964727 0.05613012]
0 	 [0.33494317 0.29033186 0.42975382 0.37988524 0.42870284 0.53919683]
0 	 [0.29771903 0.30551432

In [10]:
count_death

44

In [11]:
lvq = LVQ.LVQ(x_train[:, 0:6], y_train, n_classes=2, n_neurons=R, p_vectors=p_vectors, epsilon=0.9, epsilon_dec_factor=0.001)
neurons = lvq.fit(x_train[:, 0:6], y_train)

In [12]:
predicted_y = [lvq.predict(instance) for instance in x_test[:, 0:6]]

from sklearn.metrics import classification_report

print (classification_report(y_test, predicted_y, target_names=['0', '1']))

             precision    recall  f1-score   support

          0       0.96      0.72      0.82        60
          1       0.81      0.97      0.88        73

avg / total       0.87      0.86      0.85       133



In [13]:
A = np.array(predicted_y)-y_test
len(A[A==0])/len(A)

0.8571428571428571