# m-arcsinh: An Efficient and Reliable Function for SVM and MLP in scikit-learn
 

### Source link: https://arxiv.org/pdf/2009.07530

#### Author: 
Luca Parisi
 - Coventry, United Kingdom
 arXiv:2009.07530v1  [cs.LG]  16 Sep 2020
 - PhD in Machine Learning for Clinical Decision Support Systems
 - MBA Candidate with Artificial Intelligence Specialism

### Abstract
 This paper describes the ’m-arcsinh’, a modified (’m-’) version of the inverse hyperbolic
 sine function (’arcsinh’). Kernel and activation functions enable Machine Learning (ML)
based algorithms, such as Support Vector Machine (SVM) and Multi-Layer Perceptron
 (MLP), to learn from data in a supervised manner. m-arcsinh, implemented in the open
 source Python library ’scikit-learn’, is hereby presented as an efficient and reliable kernel
 and activation function for SVM and MLP respectively. Improvements in reliability and
 speed to convergence in classification tasks on fifteen (N = 15) datasets available from scikit
learn and the University California Irvine (UCI) Machine Learning repository are discussed.
 Experimental results demonstrate the overall competitive classification performance of both
 SVM and MLP, achieved via the proposed function. This function is compared to gold
 standard kernel and activation functions, demonstrating its overall competitive reliability
 regardless of the complexity of the classification tasks involved.

In [105]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn import svm

from sklearn.neural_network._base import ACTIVATIONS, DERIVATIVES

### 70% of the data was selected for training, whilst the remaining 30% for testing for the ’Handwritten Digits’ dataset;

In [157]:
digits = pd.read_csv('../m_arcsinh/data/digits.csv')
digits

Unnamed: 0,pixel_0_0,pixel_0_1,pixel_0_2,pixel_0_3,pixel_0_4,pixel_0_5,pixel_0_6,pixel_0_7,pixel_1_0,pixel_1_1,...,pixel_6_7,pixel_7_0,pixel_7_1,pixel_7_2,pixel_7_3,pixel_7_4,pixel_7_5,pixel_7_6,pixel_7_7,target
0,0.0,0.0,5.0,13.0,9.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,13.0,10.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,12.0,13.0,5.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,11.0,16.0,10.0,0.0,0.0,1
2,0.0,0.0,0.0,4.0,15.0,12.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,3.0,11.0,16.0,9.0,0.0,2
3,0.0,0.0,7.0,15.0,13.0,1.0,0.0,0.0,0.0,8.0,...,0.0,0.0,0.0,7.0,13.0,13.0,9.0,0.0,0.0,3
4,0.0,0.0,0.0,1.0,11.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,2.0,16.0,4.0,0.0,0.0,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1792,0.0,0.0,4.0,10.0,13.0,6.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,2.0,14.0,15.0,9.0,0.0,0.0,9
1793,0.0,0.0,6.0,16.0,13.0,11.0,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,6.0,16.0,14.0,6.0,0.0,0.0,0
1794,0.0,0.0,1.0,11.0,15.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.0,9.0,13.0,6.0,0.0,0.0,8
1795,0.0,0.0,2.0,10.0,7.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,12.0,16.0,12.0,0.0,0.0,9


In [158]:
X_digits = digits.drop(columns='target')
y_digits = digits['target']

In [159]:
X_train_digits, X_test_digits, y_train_digits, y_test_digits = train_test_split(X_digits, y_digits, shuffle=False, test_size=0.3)

### Standard MLPClassifier

In [160]:
for activation in ('identity', 'logistic', 'tanh', 'relu'):
    classifier_1 = MLPClassifier(
        activation=activation,
        random_state=1,
        max_iter=300
    )
    classifier_1.fit(X_train_digits, y_train_digits)
    
    y_pred = classifier_1.predict(X_test_digits)
    acc = accuracy_score(y_test_digits, y_pred)
    
    print(f"MLP ({activation}) -> Accuracy: {acc:.4f}")

MLP (identity) -> Accuracy: 0.9111
MLP (logistic) -> Accuracy: 0.9352
MLP (tanh) -> Accuracy: 0.9296
MLP (relu) -> Accuracy: 0.9167


### Standard SVM/SVC

In [161]:
for kernel in ('linear', 'poly', 'rbf', 'sigmoid'):
    classifier_2 = svm.SVC(
        kernel=kernel,
        gamma=0.001,
        random_state=13,
        class_weight='balanced'
    )
    classifier_2.fit(X_train_digits, y_train_digits)
    
    y_pred = classifier_2.predict(X_test_digits)
    acc = accuracy_score(y_test_digits, y_pred)
    
    print(f"SVM ({kernel}) -> Accuracy: {acc:.4f}")

SVM (linear) -> Accuracy: 0.9333
SVM (poly) -> Accuracy: 0.9481
SVM (rbf) -> Accuracy: 0.9685
SVM (sigmoid) -> Accuracy: 0.6759


### Using custom function - SVC/SVM and MLPClassifier

In [162]:
def m_arcsinh(X, Y):
    return np.dot(
        (1/3 * np.arcsinh(X)) * (1/4 * np.sqrt(np.abs(X))),
        (1/3 * np.arcsinh(Y.T)) * (1/4 * np.sqrt(np.abs(Y.T)))
    )

classifier_3 = svm.SVC(
    kernel=m_arcsinh,
    gamma=0.001,
    random_state=13,
    class_weight='balanced'
)

classifier_3.fit(X_train_digits, y_train_digits)

y_pred = classifier_3.predict(X_test_digits)
acc = accuracy_score(y_test_digits, y_pred)

print(f"SVM (m_arcsinh) -> Accuracy: {acc:.4f}")

SVM (m_arcsinh) -> Accuracy: 0.9481


In [163]:
import torch
import torch.nn as nn

def m_arcsinh_mlp(X):
    return (1/3 * np.arcsinh(X)) * (1/4 * np.sqrt(np.abs(X)))

X_train_trans = m_arcsinh_mlp(X_train_digits)
X_test_trans = m_arcsinh_mlp(X_test_digits)


classifier_4 = MLPClassifier(
    activation='relu',
    random_state=1,
    max_iter=300
)
classifier_4.fit(X_train_trans, y_train_digits)

y_pred = classifier_4.predict(X_test_trans)
acc = accuracy_score(y_test_digits, y_pred)

print(f"MLP (m_arcsinh-preprocessed) -> Accuracy: {acc:.4f}")

MLP (m_arcsinh-preprocessed) -> Accuracy: 0.9333


In [164]:
import time
from sklearn.metrics import precision_score, recall_score, f1_score


# Custom kernel
def m_arcsinh(X, Y):
    X_m = (1/3 * np.arcsinh(X)) * (1/4 * np.sqrt(np.abs(X)))
    Y_m = (1/3 * np.arcsinh(Y)) * (1/4 * np.sqrt(np.abs(Y)))
    return np.dot(X_m, Y_m.T)

results = []

# === SVM kernels ===
kernels = {
    "m-arcsinh (this study)": m_arcsinh,
    "RBF": "rbf",
    "Linear": "linear",
    "Poly": "poly",
    "Sigmoid": "sigmoid"
}

for name, kernel in kernels.items():
    start = time.time()
    clf = svm.SVC(
        kernel=kernel, 
        gamma=0.001, 
        random_state=13, 
        class_weight='balanced'
    )
    clf.fit(X_train_digits, y_train_digits)
    train_time = time.time() - start

    y_pred = clf.predict(X_test_digits)
    acc = accuracy_score(y_test_digits, y_pred)
    prec = precision_score(y_test_digits, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test_digits, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test_digits, y_pred, average='weighted', zero_division=0)

    results.append(["SVM", name, train_time, acc, prec, rec, f1])

# === MLP m-arcsinh-preprocessed ===
def m_arcsinh_mlp(X):
    return (1/3 * np.arcsinh(X)) * (1/4 * np.sqrt(np.abs(X)))

X_train_trans = m_arcsinh_mlp(X_train_digits)
X_test_trans = m_arcsinh_mlp(X_test_digits)

start = time.time()
clf = MLPClassifier(
    activation='relu', 
    random_state=1, 
    max_iter=300)
clf.fit(X_train_trans, y_train_digits)
train_time = time.time() - start

y_pred = clf.predict(X_test_trans)
acc = accuracy_score(y_test_digits, y_pred)
prec = precision_score(y_test_digits, y_pred, average='weighted', zero_division=0)
rec = recall_score(y_test_digits, y_pred, average='weighted', zero_division=0)
f1 = f1_score(y_test_digits, y_pred, average='weighted', zero_division=0)

results.append(["MLP", "m-arcsinh (this study)", train_time, acc, prec, rec, f1])


# === MLP activations ===
activations = ['identity', 'logistic', 'tanh', 'relu']
for act in activations:
    start = time.time()
    clf = MLPClassifier(activation=act, random_state=1, max_iter=300)
    clf.fit(X_train_digits, y_train_digits)
    train_time = time.time() - start

    y_pred = clf.predict(X_test_digits)
    acc = accuracy_score(y_test_digits, y_pred)
    prec = precision_score(y_test_digits, y_pred, average='weighted', zero_division=0)
    rec = recall_score(y_test_digits, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test_digits, y_pred, average='weighted', zero_division=0)

    results.append(["MLP", act, train_time, acc, prec, rec, f1])


columns = ["Classifier", "Kernel / Activation", "Training time (s)", "Accuracy", "Precision", "Recall", "F1-score"]
df_results = pd.DataFrame(results, columns=columns)


print(df_results.to_string(index=False))

Classifier    Kernel / Activation  Training time (s)  Accuracy  Precision   Recall  F1-score
       SVM m-arcsinh (this study)           0.012395  0.948148   0.951115 0.948148  0.948458
       SVM                    RBF           0.019130  0.968519   0.969366 0.968519  0.968402
       SVM                 Linear           0.009137  0.933333   0.934325 0.933333  0.933499
       SVM                   Poly           0.010504  0.948148   0.948704 0.948148  0.948132
       SVM                Sigmoid           0.054535  0.675926   0.691598 0.675926  0.656278
       MLP m-arcsinh (this study)           0.721417  0.933333   0.934850 0.933333  0.933181
       MLP               identity           0.386909  0.911111   0.913880 0.911111  0.911310
       MLP               logistic           1.212305  0.935185   0.936148 0.935185  0.934969
       MLP                   tanh           0.432888  0.929630   0.931756 0.929630  0.929511
       MLP                   relu           0.323896  0.916667   0.918