In [117]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import warnings

from numpy import arange, meshgrid, hstack
from sklearn.datasets import make_blobs
from sklearn.preprocessing import label_binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler

from sklearn.pipeline import Pipeline

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_validate
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import GridSearchCV

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import make_scorer

def geradataset(tamanho=20, centros=[[0,0],[1,0],[1,1],[0,1]]):
    X, y = make_blobs(n_samples=tamanho, centers=centros, cluster_std=0.2)
    y = np.array(y%2, dtype=int)
    return X, y

def plotadataset(X, y):
    plt.xlabel('X1')
    plt.ylabel('X2')
    for k in set(y):
        plt.plot(X[:,0][y==k],X[:,1][y==k], "o", alpha=0.5)

def plotahiperplano(vetor, bias=0, xmin=0, xmax=1):
    xs = np.linspace(xmin, xmax, num=2)
    ys = (-vetor[0] / vetor[1]) * xs - bias / vetor[1]
    plt.plot(xs,ys)

In [3]:
class CustoPerceptron():
    @staticmethod
    def erro(y, ypred):
        return y - ypred
    @staticmethod
    def custo(y, ypred):
        return np.sum(CustoPerceptron.erro(y, ypred)**2)
    @staticmethod
    def gradiente(y, ypred, X):
        return np.matmul(X.T, CustoPerceptron.erro(y, ypred))

class Adaline():
    def __init__(self):
        self.preactivated = True
    @staticmethod
    def erro(y, ypred):
        return y - ypred
    @staticmethod
    def custo(y, ypred):
        return np.sum((1 - Adaline.erro(y, ypred))**2)
    @staticmethod
    def gradiente(y, ypred, X):
        return np.matmul(X.T, Adaline.erro(y, ypred))

# Algoritmos

In [4]:
class DescidaGradiente():
    def __init__(self, custo=Adaline(), maxiter=1000, alpha=0.005):
        self.custo = custo
        self.maxiter = maxiter
        self.alpha = alpha
    
    def getW(self, X, y, activation=lambda a: a):
        w = np.random.uniform(-1, -1, size=(X.shape[1], y.shape[1]))
        for _ in range(self.maxiter):
            ypred = activation(np.matmul(X, w))
            custo = self.custo.custo(y, ypred)
            if custo == 0:
                break
            w = w + self.alpha * self.custo.gradiente(y, ypred, X)
        return w

class PseudoInversa():
    def __init__(self):
        pass
    def getW(self, X, y):
        pinv = np.linalg.pinv(X)
        w = np.matmul(pinv, y)
        return w

In [78]:

def tanh(x, derivative=False):

    if derivative:
        y = tanh(x)
        return 1 - y**2
    
    return (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x))


def relu(x, derivative=False):

    if derivative:
        return np.where(x <= 0, 0, 1)
    
    return np.maximum(0, x)

In [100]:
from sklearn.base import BaseEstimator, ClassifierMixin
from scipy.special import expit

class ExtremeLearningMachine(BaseEstimator, ClassifierMixin):
    def __init__(self, algoritmo=PseudoInversa(), activation=tanh, hl_division=3):
        self.wih = None
        self.w = None
        self.threshold = 0
        self.activation = activation
        self.algoritmo = algoritmo
        self.hl_division = hl_division
    
    @staticmethod
    def includebias(X):
        bias = np.ones((X.shape[0],1))
        Xb = np.concatenate((bias,X), axis=1)
        return Xb
    
    def fit(self, X, y):
        self.wih = np.random.uniform(-1, 1, size=(X.shape[1],X.shape[0]//self.hl_division))
        Xh = np.matmul(X, self.wih)
        Xho = self.activation(Xh)
        X = ExtremeLearningMachine.includebias(Xho)
        self.labels = list(set(y))
        y = label_binarize(y, classes=self.labels)*2-1
        if len(self.labels) == 2 :
            y = y[:,0:1]
        # treinamento
        if hasattr(self.algoritmo, 'custo') and not (hasattr(self.algoritmo.custo, 'preactivated') and self.algoritmo.custo.preactivated):
            self.w = self.algoritmo.getW(X, y, self.activation)
        else:
            self.w = self.algoritmo.getW(X, y)

    def predict(self, X):
        Xh = np.matmul(X, self.wih)
        Xho = self.activation(Xh)
        Xb = ExtremeLearningMachine.includebias(Xho)
        a = np.matmul(Xb, self.w)
        if self.w.shape[1] > 1:
            idx = np.argmax(a, axis=1) 
        else:
            idx = np.array(self.activation(a) > self.threshold, dtype=int)[:,0]
        ypred = np.array([self.labels[i] for i in idx])
        return ypred



In [118]:
warnings.filterwarnings('ignore')

classifier = { "clf": ExtremeLearningMachine(),
        "parametros": { "clf__activation": [relu, tanh],
                        "clf__hl_division": [3,2] }        
      }

path = '../resources/fish/dataset.csv'

dataset = pd.read_csv(path)

X = dataset.drop('Species', axis=1)
y = dataset['Species']

labelencoder=LabelEncoder()
normalizer = MinMaxScaler()
for column in X.columns:
      X[column] = labelencoder.fit_transform(X[column])
      X[column] = normalizer.fit_transform(X[column].values.reshape(-1,1))

print(X.head(5))

if y.dtype == 'object':
      y = LabelEncoder().fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

def executarAvaliacao(pipeline):  

  scoring = { "accuracy": make_scorer(accuracy_score) }

  cv = RepeatedKFold(n_splits=10, n_repeats=2, random_state=2652124)
  
  pipeline.set_params(clf = classifier["clf"])    
  modelo = GridSearchCV(pipeline, classifier["parametros"])

  scores = cross_validate(modelo, X, y, cv=cv, scoring=scoring, return_train_score=True)

  modelo.fit(X_train, y_train)  
  ypred = modelo.predict(X_train)

  print('\nExtremeLearningMachine\n')
  print(f'best_params {modelo.best_params_}')
  
  for key, values in scores.items():
      print(key, ' mean ', values.mean())
      print(key, ' std ', values.std())

pipeline = Pipeline([('clf', ExtremeLearningMachine())])
executarAvaliacao(pipeline)

   Weight   Length1   Length2   Length3    Height     Width
0    0.46  0.408696  0.489130  0.487805  0.705882  0.430464
1    0.53  0.443478  0.521739  0.528455  0.771242  0.496689
2    0.57  0.434783  0.532609  0.520325  0.745098  0.582781
3    0.59  0.504348  0.597826  0.544715  0.803922  0.529801
4    0.61  0.513043  0.597826  0.552846  0.764706  0.668874

ExtremeLearningMachine

best_params {'clf__activation': <function tanh at 0x0000028578CF58B0>, 'clf__hl_division': 3}
fit_time  mean  0.13624176979064942
fit_time  std  0.013685474982978724
score_time  mean  0.0011631369590759278
score_time  std  0.00048417149894712015
test_accuracy  mean  0.8779166666666667
test_accuracy  std  0.08929799518715101
train_accuracy  mean  0.9675019425019427
train_accuracy  std  0.028431365330546948
