In [93]:
import numpy as np
from sklearn.metrics import euclidean_distances
import pandas as pd
import minisom
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold
from minisom import MiniSom
from random import randint

In [94]:
data = pd.read_csv(r"SVNE_Hieu.csv")
x = data.iloc[:, 0:6].values
y = data.iloc[:, 6].values

In [95]:
result = 0

In [96]:
kfolds = KFold(n_splits=20, shuffle=True, random_state=40)
for train_index, test_index in kfolds.split(x):
#     print("Train:", train_index, "Validation:", test_index)
    x_train, x_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
    # Training the SOM
    som = MiniSom(x = 10, y = 10, input_len = 6, sigma = 1.0, learning_rate = 0.5)
    som.random_weights_init(x_train)
    som.train_random(data = x_train, num_iteration = 100)
    
    labels = np.zeros((10, 10))
    # parameter for LVQ
    R = 50 # R is the # of initial prototype for each class
    n_classes = 2
    epsilon = 0.9
    epsilon_dec_factor = 0.001
    for i, x_ in enumerate(x_train):
        w = som.winner(x_)
        if y_train[i] == 0:
            labels[w[0], w[1]] -= 1
        elif y_train[i] == 1:
            labels[w[0], w[1]] += 1
    p_vectors = []
    for i in range(10):
        for j in range(10):
            if(labels[i][j] == 0):
                p = prototype(randint(0,1), som.weights[(i,j)], epsilon)
                p_vectors.append(p)
            elif(labels[i][j] > 0):
                p = prototype(1, som.weights[(i,j)], epsilon)
                p_vectors.append(p)
            elif(labels[i][j] < 0):
                p = prototype(0, som.weights[(i,j)], epsilon)
                p_vectors.append(p)
    while epsilon >= 0.01:
        rnd_i = np.random.randint(0, 996)
        rnd_s = x_train[rnd_i]
        target_y = y_train[rnd_i]

        epsilon = epsilon - epsilon_dec_factor

        index, closest_pvector = find_closest(rnd_s, p_vectors)
    #     LVQ neighbor version
        if index >= 10 and index <90:
            update_p_vectors = [closest_pvector, p_vectors[index-1], p_vectors[index+1], p_vectors[index+10], p_vectors[index-10]]
            for p in update_p_vectors:
                if target_y == p.class_id:
                    p.update(rnd_s)
                else:
                    p.update(rnd_s, False)
                p.epsilon = epsilon
        else:
            if target_y == closest_pvector.class_id:
                closest_pvector.update(rnd_s)
            else:
                closest_pvector.update(rnd_s, False)
            closest_pvector.epsilon = epsilon

    predicted_y = [find_class_id(instance, p_vectors) for instance in x_test]
    A = np.array(predicted_y)-y_test
    result += len(A[A==0])/len(A)
print (result/20)

0.7831298055178653


In [97]:
# class of prototype vectors
class prototype(object):
    def __init__(self, class_id, p_vector, epsilon):
        self.class_id = class_id
        self.p_vector = p_vector
        self.epsilon = epsilon
    def update(self, u_vector, increment = True):
        if increment:
            # Move the prototype closer to input vector
            self.p_vector = self.p_vector + self.epsilon * (u_vector - self.p_vector)
        else:
            # Move the prototype away from input vector
            self.p_vector = self.p_vector - self.epsilon * (u_vector - self.p_vector)

In [98]:
# function to find the closest prototype vector for a given vector
def find_closest(in_vector, proto_vectors):
    position = None
    closest = None
    closest_distance = 99999
#     for p_v in proto_vectors:
#         distance = np.linalg.norm(in_vector - p_v.p_vector)
#         if distance < closest_distance:
#             closest_distance = distance
#             closest = p_v
    for i in range(len(proto_vectors)):
        distance = np.linalg.norm(in_vector - proto_vectors[i].p_vector)
        if distance < closest_distance:
            closest_distance = distance
            closest = proto_vectors[i]
            position = i
    return [position, closest]

In [99]:
def find_class_id(test_vector, p_vectors):
    return find_closest(test_vector, p_vectors)[1].class_id