In [114]:
from scipy.io import arff
import pandas as pd
import numpy as np
from sklearn import preprocessing
import math
from sklearn.model_selection import StratifiedKFold

In [115]:
data = arff.loadarff('./Data/cm1.arff.txt')
df = pd.DataFrame(data[0])
df['defects'] = df['defects'].apply(lambda x: str(x)[1:]) #removing 'b' from classes
numberOfColumns = len(df.columns) #qtd of columns with defects column
justClass = df[['defects']].values #only the classes values
justData = df.drop(['defects'], axis=1) #removing the classes column for normalization
valuesOfColums = justData.columns.values #get attributes names
justDataValues = justData.values #get data as numpy array

In [116]:
min_max_scaler = preprocessing.MinMaxScaler()
normalized_data = min_max_scaler.fit_transform(justDataValues)
df = pd.DataFrame(normalized_data)
df.columns = valuesOfColums
df.insert(loc=numberOfColumns-1, column='defects', value=justClass)

In [117]:
def euclidian_distance(x, y):
    distance = 0
    for i in range(len(x)-1):
        distance += np.square(x[i] - y[i])
    return np.sqrt(distance)

def calc_dists(propotypes, testInstance):
    d = []
    for p in propotypes:
        d.append([p, euclidean_distance(testInstance, p)])
    distance.sort(key=lambda dist: dist[1])
    return d

In [118]:
def lvq(trainSet, prototypes, lrate, epochs):
    for epoch in range(epochs):
        rate = lrate * (1.0 - (epoch / float(epochs)))
        sum_error = 0
        for example in trainSet:
            neighbor = calc_dists(prototypes, example)[0][0]
            error = np.subtract(example[:-1], neighbor[:-1])
            sum_error += np.sum(np.square(error))
            if(example[-1] == neighbor[-1]):
                neighbor[:-1] = np.add(neighbor[:-1], np.dot(error, rate))
            else:
                neighbor[:-1] = np.subtract(neighbor[:-1], np.dot(error, rate))
    return prototypes

In [119]:
def window(neighbor1, neighbor2, instance, w):
    di = euclidian_distance(neighbor1, instance)
    dj = euclidian_distance(neighbor2, instance)
    if di != 0 and dj != 0:
        mini = min(di/dj, dj/di)
    else:
        mini = 0
    s = ((1-w)/(1+w))
    return (mini > s)

In [120]:
def lvq2_1(trainSet, prototypes, lrate, epochs):
    prots = lvq(trainSet, lrate, prototypes, epochs)
    for epoch in range(epochs):
        rate = lrate * (1.0 - (epoch / float(epochs)))
        sum_error = 0
        for example in trainSet:
            neighbors = calc_dists(prototypes, example)
            ei, ej = neighbors[0][0], neighbors[1][0]
            if window(ei, ej, example, 0.3):
                if(ei[-1] != ej[-1]):
                    if(ei[-1] == example[-1]):
                        error_i = np.subtract(example[:-1], ei[:-1])
                        error_j = np.subtract(example[:-1], ej[:-1])
                        sum_error += np.sum(np.square(error_i))
                        ei[:-1] = np.add(ei[:-1], np.dot(error_i, rate))
                        ej[:-1] = np.subtract(ej[:-1], np.dot(error_j, rate))
                    elif(ej[-1] == example[-1]):
                        error_i = np.subtract(example[:-1], ei[:-1])
                        error_j = np.subtract(example[:-1], ej[:-1])
                        sum_error += np.sum(np.square(error_j))
                        ej[:-1] = np.add(ej[:-1], np.dot(error_j, rate))
                        ei[:-1] = np.subtract(ei[:-1], np.dot(error_i, rate))
    return prototypes

In [121]:
def lvq3(trainSet, prototypes, lrate, epochs):
    prots = lvq(trainSet, lrate, prototypes, epochs)
    for epoch in range(epochs):
        rate = lrate * (1.0 - (epoch / float(epochs)))
        sum_error = 0
        for example in trainSet:
            neighbors = calc_dists(prototypes, example)
            ei, ej = neighbors[0][0], neighbors[1][0]
            if window(ei, ej, example, 0.3):
                if(ei[-1] != ej[-1]):
                    if(ei[-1] == example[-1]):
                        error_i = np.subtract(example[:-1], ei[:-1])
                        error_j = np.subtract(example[:-1], ej[:-1])
                        sum_error += np.sum(np.square(error_i))
                        ei[:-1] = np.add(ei[:-1], np.dot(error_i, rate))
                        ej[:-1] = np.subtract(ej[:-1], np.dot(error_j, rate))
                    elif(ej[-1] == example[-1]):
                        error_i = np.subtract(example[:-1], ei[:-1])
                        error_j = np.subtract(example[:-1], ej[:-1])
                        sum_error += np.sum(np.square(error_j))
                        ej[:-1] = np.add(ej[:-1], np.dot(error_j, rate))
                        ei[:-1] = np.subtract(ei[:-1], np.dot(error_i, rate))
                elif (ei[-1] == ej[-1]) and (ei[-1] == example[-1]):
                    error_i = np.subtract(example[:-1], ei[:-1])
                    error_j = np.subtract(example[:-1], ej[:-1])
                    sum_error += np.sum(np.square(np.add(error_i, error_j)))
                    ei[:-1] = np.add(ei[:-1], np.dot(error_i, (rate * 0.3)))
                    ej[:-1] = np.add(ej[:-1], np.dot(error_j, (rate * 0.3)))
    return prototypes

In [None]:
#CONSTANTS