## Extreme Learning Machine to Multiclass Classification

### Libraries

In [1]:
import numpy as np
import pandas as pd
import joblib

### Functions

In [2]:
def split_data(X, y, test_size, random_state=None):
    
    np.random.seed(seed=random_state) # set random seed

    indices = np.arange(X.shape[0]) # arange the indices
    np.random.shuffle(indices) # shuffle the indices
    
    X = X[indices] # assign the shuffled X
    y = y[indices] # assign the shuffled y

    n_test = int(test_size * X.shape[0]) # calculate the number of test samples
    
    return X[:-n_test], X[-n_test:], y[:-n_test], y[-n_test:] # return the train and test data

In [3]:
def accuracy(y_true, y_pred):
    return np.mean(y_true == y_pred) # mean of the elements that are equal

In [4]:
def confusion_matrix(y_true, y_pred):

    n_classes = len(np.unique(y_true)) # Number of classes

    cm = np.zeros((n_classes, n_classes)) # initialize the confusion matrix
    
    for i in range(len(y_true)): # for each sample
        cm[y_true[i], y_pred[i]] += 1 # add 1 to the corresponding row and column
    
    return cm

In [5]:
def softmax(x):
    
    max = np.max(x, axis=1, keepdims=True) # Returns max of each row and keeps same dims
    e_x = np.exp(x - max) # Subtracts each row with its max value
    sum = np.sum(e_x, axis=1, keepdims=True) # Returns sum of each row and keeps same dims
    f_x = e_x / sum
    
    return f_x

In [6]:
class FeatureNormalization():
    
    def __init__(self):
        pass

    def fit(self, X, y=None):

        self.mu = np.mean(X, axis=0) # Mean of each feature
        self.sigma = np.std(X, axis=0) # Standard deviation of each feature

        return self

    def transform(self, X):

        X_transform = (X-self.mu)/self.sigma # Normalized data (zero mean and unite standard deviation)

        return X_transform

In [7]:
class ELMClassifier():

    def __init__(self, L, random_state=None):
        
        self.L = L # number of hidden neurons
        self.random_state = random_state # random state

    def fit(self, X, y=None):

        M = np.size(X, axis=0) # Number of examples
        N = np.size(X, axis=1) # Number of features

        np.random.seed(seed=self.random_state) # set random seed

        self.w1 = np.random.uniform(low=-1, high=1, size=(self.L, N+1)) # Weights with bias

        bias = np.ones(M).reshape(-1, 1) # Bias definition
        Xa = np.concatenate((bias, X), axis=1) # Input with bias

        S = Xa.dot(self.w1.T) # Weighted sum of hidden layer
        H = np.tanh(S) # Activation function f(x) = tanh(x), dimension M X L

        bias = np.ones(M).reshape(-1, 1) # Bias definition
        Ha = np.concatenate((bias, H), axis=1) # Activation function with bias

        # One-hot encoding
        n_classes = len(np.unique(y))
        y = np.eye(n_classes)[y]

        self.w2 = (np.linalg.pinv(Ha).dot(y)).T # w2' = pinv(Ha)*D

        return self

    def predict(self, X):

        M = np.size(X, axis=0) # Number of examples
        N = np.size(X, axis=1) # Number of features

        bias = np.ones(M).reshape(-1, 1) # Bias definition
        Xa = np.concatenate((bias, X), axis=1) # Input with bias

        S = Xa.dot(self.w1.T) # Weighted sum of hidden layer
        H = np.tanh(S) # Activation function f(x) = tanh(x), dimension M X L

        bias = np.ones(M).reshape(-1, 1) # Bias definition
        Ha = np.concatenate((bias, H), axis=1) # Activation function with bias

        y_pred = softmax(Ha.dot(self.w2.T)) # Predictions
        
        # Revert one-hot encoding
        y_pred = np.argmax(y_pred, axis=1) # axis=1 means that we want to find the index of the maximum value in each row

        return y_pred

    def predict_proba(self, X):

        M = np.size(X, axis=0) # Number of examples
        N = np.size(X, axis=1) # Number of features

        bias = np.ones(M).reshape(-1, 1) # Bias definition
        Xa = np.concatenate((bias, X), axis=1) # Input with bias

        S = Xa.dot(self.w1.T) # Weighted sum of hidden layer
        H = np.tanh(S) # Activation function f(x) = tanh(x), dimension M X L

        bias = np.ones(M).reshape(-1, 1) # Bias definition
        Ha = np.concatenate((bias, H), axis=1) # Activation function with bias

        y_pred = softmax(Ha.dot(self.w2.T)) # Predictions

        return y_pred

### Iris dataset

In [8]:
df = pd.read_csv('../data/Iris.csv', sep=',')
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


Adjust the target: one-hot enconding

In [9]:
target_column = 'Species' # Target column
specie_list = np.unique(df[target_column]) # Species list

print('Species:', specie_list)

Species: ['Iris-setosa' 'Iris-versicolor' 'Iris-virginica']


In [10]:
species_range = np.arange(0, len(specie_list)) # Species range
print('Species range:', species_range)

i=0
for specie in specie_list:
    
    df[target_column].replace(specie, species_range[i], inplace=True) # Replace species with range

    i+=1

df.head()

Species range: [0 1 2]


Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,0
1,2,4.9,3.0,1.4,0.2,0
2,3,4.7,3.2,1.3,0.2,0
3,4,4.6,3.1,1.5,0.2,0
4,5,5.0,3.6,1.4,0.2,0


In [11]:
y = df[target_column].values # Targets

### Training

Select the features

In [12]:
X = df.drop(labels=['Id', target_column], axis=1).values # Features

Train-test split

In [13]:
X_train, X_test, y_train, y_test = split_data(X, y, 0.2, random_state=42) # Split data

print('Train data shape:', X_train.shape)
print('Teste data shape:', X_test.shape)

Train data shape: (120, 4)
Teste data shape: (30, 4)


Normalization

In [14]:
fn = FeatureNormalization() # Feature normalization
fn.fit(X_train)

# Normalized data (zero mean and unite standard deviation)
X_train = fn.transform(X_train)
X_test = fn.transform(X_test)

print('Train mean:', np.mean(X_train, axis=0))
print('Train std:', np.std(X_train, axis=0))
print('Test mean:', np.mean(X_test, axis=0))
print('Test std:', np.std(X_test, axis=0))

Train mean: [-3.94129174e-16  1.91513472e-15 -1.34614542e-16 -2.26670534e-16]
Train std: [1. 1. 1. 1.]
Test mean: [ 0.24170962 -0.22868525  0.25176633  0.17420725]
Test std: [0.96304592 0.87236788 0.91839727 0.89025751]


Training the model

In [18]:
L = 24 # Number of hidden neurons

print('Number of hidden neuros:', L)

elm = ELMClassifier(L=L, random_state=42) # ELM
elm.fit(X_train, y_train) # Train

y_train_pred = elm.predict(X_train) # Train predictions
y_test_pred = elm.predict(X_test) # Test predictions

Number of hidden neuros: 24


Training the model: save and load model

In [19]:
# Save model
joblib.dump(elm, '../models/elm_clf.pkl')

['../models/elm_clf.pkl']

In [20]:
# Load model
elm = joblib.load('../models/elm_clf.pkl')

print(f'Number of hidden neurons: {elm.L}')
print(f'Random state: {elm.random_state}')

Number of hidden neurons: 24
Random state: 42


Accuracy

In [21]:
train_acc = accuracy(y_train, y_train_pred)
test_acc = accuracy(y_test, y_test_pred)

print('Training accuracy (%):', train_acc*100)
print('Testing accuracy (%):', test_acc*100)

Training accuracy (%): 98.33333333333333
Testing accuracy (%): 96.66666666666667


Confusion matrix

In [22]:
confusion_matrix(y_test, y_test_pred)

array([[ 7.,  0.,  0.],
       [ 0., 11.,  0.],
       [ 0.,  1., 11.]])

### Single classification

Get the data

In [23]:
data = df.drop(labels=['Id'], axis=1)
data.head()

Unnamed: 0,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [24]:
i = np.random.randint(0, len(data)) # select a random example

x_data = data.drop(labels=[target_column], axis=1).iloc[i].values
y_label = data[target_column].iloc[i]

print('Single example:', data.iloc[i])

Single example: SepalLengthCm    6.1
SepalWidthCm     3.0
PetalLengthCm    4.6
PetalWidthCm     1.4
Species          1.0
Name: 91, dtype: float64


In [25]:
print('Data:', x_data)
print('Target = %d and class = %s' % (y_label, specie_list[y_label]))

Data: [6.1 3.  4.6 1.4]
Target = 1 and class = Iris-versicolor


Normalization

In [26]:
x_data = x_data.reshape(1, -1) # Reshape data

x_data = fn.transform(x_data) # Normalized data (zero mean and unite standard deviation)

Prediction

In [27]:
y_data_pred = elm.predict_proba(x_data)

print('Class:', y_label)
print('Predicted class:', y_data_pred)

Class: 1
Predicted class: [[0.21831123 0.54342535 0.23826342]]
