#### Ensemble learning với thuật toán LVQ cơ bản sử dụng tỉ lệ mẫu trong từng neuron

In [1]:
import numpy as np
from sklearn.metrics import euclidean_distances
import pandas as pd

import sys
sys.path.insert(0, '../SOM-LVQ')
import LVQ

In [2]:
data = pd.read_csv(r"../data/AReM/Dataset1(combined).csv")
x = data.iloc[:, 0:6].values
y = data.iloc[:, 6].values
y[y==7] = 0
from sklearn.cross_validation import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=1)

# use Minmaxscaler because we use euclidean distance
from sklearn.preprocessing import MinMaxScaler
minmax = MinMaxScaler()
x_train = minmax.fit_transform(x_train)
x_test = minmax.transform(x_test)



In [3]:
lvq1 = LVQ.LVQ(x=x_train, y=y_train, n_classes=7, n_neurons=90, epsilon=0.9, p_vectors=[], epsilon_dec_factor=0.001)

neurons = lvq1.train_LVQ2(x_train, y_train)

In [4]:
predicted_y = [lvq1.predict(instance) for instance in x_test]

from sklearn.metrics import classification_report

print (classification_report(y_test, predicted_y, target_names=['0', '1', '2', '3', '4', '5', '6']))

             precision    recall  f1-score   support

          0       0.74      0.66      0.70       119
          1       0.99      1.00      1.00       124
          2       0.97      0.98      0.97       129
          3       0.68      0.74      0.71       130
          4       0.99      1.00      1.00       124
          5       0.81      0.81      0.81       107
          6       0.88      0.87      0.87       107

avg / total       0.87      0.87      0.87       840



In [5]:
lvq2 = LVQ.LVQ(x=x_train, y=y_train, n_classes=7, n_neurons=90, epsilon=0.9, p_vectors=[], epsilon_dec_factor=0.001)

neurons = lvq2.train_LVQ2(x_train, y_train)

In [6]:
predicted_y = [lvq2.predict(instance) for instance in x_test]

from sklearn.metrics import classification_report

print (classification_report(y_test, predicted_y, target_names=['0', '1', '2', '3', '4', '5', '6']))

             precision    recall  f1-score   support

          0       0.71      0.65      0.68       119
          1       1.00      1.00      1.00       124
          2       0.97      0.97      0.97       129
          3       0.68      0.75      0.71       130
          4       0.98      0.99      0.99       124
          5       0.89      0.80      0.84       107
          6       0.88      0.92      0.89       107

avg / total       0.87      0.87      0.87       840



In [7]:
lvq3 = LVQ.LVQ(x=x_train, y=y_train, n_classes=7, n_neurons=90, epsilon=0.9, p_vectors=[], epsilon_dec_factor=0.001)

neurons = lvq3.train_LVQ2(x_train, y_train)

In [8]:
predicted_y = [lvq3.predict(instance) for instance in x_test]

from sklearn.metrics import classification_report

print (classification_report(y_test, predicted_y, target_names=['0', '1', '2', '3', '4', '5', '6']))

             precision    recall  f1-score   support

          0       0.67      0.59      0.63       119
          1       0.99      1.00      1.00       124
          2       0.95      0.98      0.96       129
          3       0.68      0.74      0.71       130
          4       1.00      1.00      1.00       124
          5       0.85      0.82      0.83       107
          6       0.88      0.89      0.88       107

avg / total       0.86      0.86      0.86       840



#### Thuật toán LVQ được thêm vào các hàm tính win_map và tỉ lệ các mẫu trong từng neuron

In [9]:
mapping1 = lvq1.win_map_LVQ(x_train)
mapping2 = lvq2.win_map_LVQ(x_train)
mapping3 = lvq3.win_map_LVQ(x_train)

In [10]:
propa1 = lvq1.propabilityLVQ()
propa2 = lvq2.propabilityLVQ()
propa3 = lvq3.propabilityLVQ()

In [11]:
result = np.array([])

for sample in x_test:
    pos1 = lvq1.find_closest(sample)
    pos2 = lvq2.find_closest(sample)
    pos3 = lvq3.find_closest(sample)
    l1 = len(mapping1[pos1[0]])
    l2 = len(mapping2[pos2[0]])
    l3 = len(mapping3[pos3[0]])
    pro1 = (l1*propa1[0, pos1[0]] + l2*propa2[0, pos2[0]] + l3*propa3[0, pos3[0]])/(l1+l2+l3)
    pro2 = (l1*propa1[1, pos1[0]] + l2*propa2[1, pos2[0]] + l3*propa3[1, pos3[0]])/(l1+l2+l3)
    pro3 = (l1*propa1[2, pos1[0]] + l2*propa2[2, pos2[0]] + l3*propa3[2, pos3[0]])/(l1+l2+l3)
    pro4 = (l1*propa1[3, pos1[0]] + l2*propa2[3, pos2[0]] + l3*propa3[3, pos3[0]])/(l1+l2+l3)
    pro5 = (l1*propa1[4, pos1[0]] + l2*propa2[4, pos2[0]] + l3*propa3[4, pos3[0]])/(l1+l2+l3)
    pro6 = (l1*propa1[5, pos1[0]] + l2*propa2[5, pos2[0]] + l3*propa3[5, pos3[0]])/(l1+l2+l3)
    pro7 = (l1*propa1[6, pos1[0]] + l2*propa2[6, pos2[0]] + l3*propa3[6, pos3[0]])/(l1+l2+l3)
    tmp = np.argmax(np.array([pro1, pro2, pro3, pro4, pro5, pro6, pro7]))
    result = np.append(result, tmp)

In [12]:
print (classification_report(y_test, result, target_names=['0', '1', '2', '3', '4', '5', '6']))

             precision    recall  f1-score   support

          0       0.77      0.63      0.69       119
          1       0.99      1.00      1.00       124
          2       0.97      0.98      0.97       129
          3       0.68      0.82      0.75       130
          4       0.99      1.00      1.00       124
          5       0.87      0.80      0.83       107
          6       0.91      0.90      0.90       107

avg / total       0.88      0.88      0.88       840

