# Learining Vector Quantization (LVQ)

A limitation of k-Nearest Neighbors is that you must keep a large database of training examples in order to make predictions.

The Learning Vector Quantization algorithm addresses this by learning a much smaller subset of patterns that best represent the training data.

### Steps

- Euclidean Distance
- Best Matching Unit
- Training Codebook Vectors

#### Step 1

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from sklearn.model_selection import train_test_split
from random import randrange

In [2]:
def euclidean_distance(row1, row2):
    distance = 0.0
    for i in range(len(row1)-1):
        distance += (row1[i] - row2[i])**2
    return np.sqrt(distance)

In [3]:
dataset = [[2.7810836,2.550537003,0],
        [1.465489372,2.362125076,0],
        [3.396561688,4.400293529,0],
        [1.38807019,1.850220317,0],
        [3.06407232,3.005305973,0],
        [7.627531214,2.759262235,1],
        [5.332441248,2.088626775,1],
        [6.922596716,1.77106367,1],
        [8.675418651,-0.242068655,1],
        [7.673756466,3.508563011,1]]

In [4]:
row0 = dataset[0]
for row in dataset:
    distance = euclidean_distance(row0, row)
    print(distance)

0.0
1.3290173915275787
1.9494646655653247
1.5591439385540549
0.5356280721938492
4.850940186986411
2.592833759950511
4.214227042632867
6.522409988228337
4.985585382449795


#### Step 2

In [5]:
def get_best_matching_unit(codebooks, test_row):
    distances = list()
    for codebook in codebooks:
        dist = euclidean_distance(codebook, test_row)
        distances.append((codebook, dist))
    distances.sort(key=lambda tup: tup[1])
    return distances[0][0]

In [6]:
test_row = dataset[0]
bmu = get_best_matching_unit(dataset, test_row)
print(bmu)

[2.7810836, 2.550537003, 0]


#### Step 3

In [7]:
ctest_row = dataset[0]
bmu = get_best_matching_unit(dataset, test_row)
print(bmu)

[2.7810836, 2.550537003, 0]


In [8]:
def random_codebook(train):
    n_records = len(train)
    n_features = len(train[0])
    codebook = [train[randrange(n_records)][i] for i in range(n_features)]
    return codebook

In [9]:
def train_codebooks(train, n_codebooks, lrate, epochs):
    codebooks = [random_codebook(train) for i in range(n_codebooks)]
    for epoch in range(epochs):
        rate = lrate * (1.0-(epoch/float(epochs)))
        sum_error = 0.0
        for row in train:
            bmu = get_best_matching_unit(codebooks, row)
            for i in range(len(row)-1):
                error = row[i] - bmu[i]
                sum_error += error**2
                if bmu[-1] == row[-1]:
                    bmu[i] += rate * error
                else:
                    bmu[i] -= rate * error
        print('>epoch=%d, lrate=%.3f, error=%.3f' % (epoch, rate, sum_error))
    return codebooks

In [10]:
learn_rate = 0.3
n_epochs = 10
n_codebooks = 2
codebooks = train_codebooks(dataset, n_codebooks, learn_rate, n_epochs)
print('Codebooks: %s' % codebooks)

>epoch=0, lrate=0.300, error=48.609
>epoch=1, lrate=0.270, error=65.490
>epoch=2, lrate=0.240, error=211.902
>epoch=3, lrate=0.210, error=411.947
>epoch=4, lrate=0.180, error=672.630
>epoch=5, lrate=0.150, error=886.389
>epoch=6, lrate=0.120, error=1106.568
>epoch=7, lrate=0.090, error=1365.875
>epoch=8, lrate=0.060, error=1509.847
>epoch=9, lrate=0.030, error=1528.022
Codebooks: [[16.226803378809347, 0.534610264137006, 1], [4.006090801809536, 18.36796337334561, 1]]
