# TinyML - Support Vector Machine (Classifier)

In [49]:
#!pip install micromlgen

## 1. Importing libraries

In [50]:
from micromlgen import port
import pandas as pd
import plotly.graph_objects as go
import numpy as np
import plotly.express as px

from sklearn.svm import SVC
from sklearn.calibration import LabelEncoder
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn import metrics

## 2. Load Dataset

The Iris dataset is a classic dataset in the field of machine learning and statistics. It was introduced by Sir Ronald A. Fisher in 1936 as an example of discriminant analysis. The dataset is often used for educational purposes and is a common starting point for the practice of pattern classification.


Attributes:

- Sepal length (in centimeters)

- Sepal width (in centimeters)

- Petal length (in centimeters)


Species:

- 0 - Setosa

- 1 - Versicolor

In [51]:
# Load iris dataset
data = load_iris()

# Create a DataFrame
df_iris = pd.DataFrame(data.data, columns=data.feature_names)


# Add target variable to the DataFrame
df_iris['target'] = data.target

# Remove NaN values
df = df_iris.dropna(axis='rows') #remove NaN

# Display the DataFrame
print(df_iris.head())

   sepal length (cm)  sepal width (cm)  petal length (cm)  petal width (cm)  \
0                5.1               3.5                1.4               0.2   
1                4.9               3.0                1.4               0.2   
2                4.7               3.2                1.3               0.2   
3                4.6               3.1                1.5               0.2   
4                5.0               3.6                1.4               0.2   

   target  
0       0  
1       0  
2       0  
3       0  
4       0  


In [52]:
df=df_iris.iloc[:100,1:4]

In [53]:
X=df.to_numpy()

# Converting string value to int type for labels: Setosa = 0, Versicolor = 1
y=df_iris.iloc[:100,-1]
y = LabelEncoder().fit_transform(y)

In [54]:
print(df.head())

   sepal width (cm)  petal length (cm)  petal width (cm)
0               3.5                1.4               0.2
1               3.0                1.4               0.2
2               3.2                1.3               0.2
3               3.1                1.5               0.2
4               3.6                1.4               0.2


## 3. Dataset Visualization 

In [55]:
fig = go.Figure()


fig.add_trace(go.Scatter3d(x=df['sepal width (cm)'], y= df['petal length (cm)'], z=df['petal width (cm)'], mode='markers', marker=dict(color='blue')))

fig.update_layout(scene=dict(xaxis_title='Sepal Length (cm)', yaxis_title='Sepal Width (cm)', zaxis_title='Petal Width (cm)'),
                  scene_camera=dict(eye=dict(x=1.87, y=0.88, z=-0.64)),
                  width=1000, height=600)
fig.show()

In [56]:
print('Input shape: ', X.shape)
print('Target variable shape: ', y.shape)

Input shape:  (100, 3)
Target variable shape:  (100,)


## 4. Split into training and test data

In [57]:
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [58]:
y_train


array([0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0,
       0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 1, 1, 0, 0, 1, 0, 0], dtype=int64)

In [59]:
n = 7
print(y[n])
print(X[n])

0
[3.4 1.5 0.2]


In [60]:
X_train

array([[3. , 1.1, 0.1],
       [3.2, 1.2, 0.2],
       [2.4, 3.3, 1. ],
       [2.5, 4.9, 1.5],
       [3.3, 1.7, 0.5],
       [3.4, 1.4, 0.3],
       [3.1, 4.7, 1.5],
       [2.8, 4.8, 1.4],
       [2.5, 3. , 1.1],
       [2.7, 3.9, 1.4],
       [3.8, 1.7, 0.3],
       [3. , 4.4, 1.4],
       [3.1, 1.5, 0.2],
       [3.1, 4.9, 1.5],
       [3.8, 1.6, 0.2],
       [3.5, 1.5, 0.2],
       [2.9, 4.7, 1.4],
       [2.6, 3.5, 1. ],
       [2.9, 4.6, 1.3],
       [2.7, 5.1, 1.6],
       [3. , 4.2, 1.5],
       [3.4, 1.4, 0.2],
       [3. , 4.6, 1.4],
       [3. , 4.1, 1.3],
       [2.9, 4.3, 1.3],
       [3. , 4.2, 1.2],
       [3.1, 1.6, 0.2],
       [3.1, 1.5, 0.2],
       [2.9, 3.6, 1.3],
       [2.9, 4.2, 1.3],
       [3.4, 1.5, 0.2],
       [3.2, 4.7, 1.4],
       [4.4, 1.5, 0.4],
       [2.5, 4. , 1.3],
       [3.4, 1.9, 0.2],
       [2.2, 4.5, 1.5],
       [3.9, 1.3, 0.4],
       [3. , 1.4, 0.2],
       [2. , 3.5, 1. ],
       [2.6, 4. , 1.2],
       [2.4, 3.7, 1. ],
       [2.8, 4.6

## 5. Create the classification model

In [61]:
model = SVC(gamma=0.0000001, kernel='linear')

## 6. Train the model

In [62]:
model.fit(X_train, y_train)

## 6. Evaluating the model with the training data

In [66]:
training_predict = model.predict(X_train)

In [67]:
print(metrics.classification_report(y_train, training_predict, digits = 3))

              precision    recall  f1-score   support

           0      1.000     1.000     1.000        36
           1      1.000     1.000     1.000        39

    accuracy                          1.000        75
   macro avg      1.000     1.000     1.000        75
weighted avg      1.000     1.000     1.000        75



In [68]:
print(metrics.confusion_matrix(y_train, training_predict))

[[36  0]
 [ 0 39]]


In [69]:
print(f'Model accuracy: {round(metrics.accuracy_score(y_train, training_predict)*100,2)}%')

Model accuracy: 100.0%


## 7. Hyperlane Train Data Visualization 

In [70]:
x_grid, y_grid = np.meshgrid(np.linspace(X_train[:, 0].min(), X_train[:, 0].max(), 100),
                             np.linspace(X_train[:, 1].min(), X_train[:, 1].max(), 100))
z_grid = np.zeros_like(x_grid)


for i in range(len(x_grid)):
    for j in range(len(y_grid)):
        z_grid[i, j] = model.decision_function([[x_grid[i, j], y_grid[i, j], 0]])



fig = go.Figure()

fig.add_trace(go.Scatter3d(x=X_train[:, 0], y=X_train[:, 1], z=X_train[:, 2], mode='markers',
                           marker=dict(size=5, color=y_train, opacity=0.7), name='Dados de Treinamento'))

fig.add_trace(go.Surface(z=z_grid, x=x_grid, y=y_grid, opacity=0.5, colorscale='Bluered_r'))


fig.update_layout(scene=dict(xaxis_title='Sepal Width (cm)',
                             yaxis_title='Petal Length (cm)',
                             zaxis_title='Petal Width (cm)'))

fig.update_layout(width=1000, height=600)

fig.show()

## 8. Evaluating the model with test data

In [71]:
test_predict = model.predict(X_test)

In [72]:
test_predict

array([0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,
       0, 0, 1], dtype=int64)

In [73]:
X_test

array([[3.2, 1.3, 0.2],
       [4. , 1.2, 0.2],
       [2.3, 4. , 1.3],
       [2.8, 4. , 1.3],
       [4.2, 1.4, 0.2],
       [3.5, 1.4, 0.3],
       [2.3, 1.3, 0.3],
       [3. , 5. , 1.7],
       [3.5, 1.3, 0.2],
       [3.8, 1.5, 0.3],
       [3.2, 1.4, 0.2],
       [3.3, 4.7, 1.6],
       [3.1, 4.4, 1.4],
       [2.7, 4.2, 1.3],
       [3.8, 1.9, 0.4],
       [3.5, 1.6, 0.6],
       [3.6, 1. , 0.2],
       [2.7, 3.9, 1.2],
       [3. , 4.5, 1.5],
       [3.7, 1.5, 0.2],
       [3.2, 4.8, 1.8],
       [2.5, 3.9, 1.1],
       [3.5, 1.4, 0.2],
       [3.4, 1.7, 0.2],
       [2.2, 4. , 1. ]])

In [74]:
print(metrics.classification_report(y_test, test_predict, digits = 3))

              precision    recall  f1-score   support

           0      1.000     1.000     1.000        14
           1      1.000     1.000     1.000        11

    accuracy                          1.000        25
   macro avg      1.000     1.000     1.000        25
weighted avg      1.000     1.000     1.000        25



In [75]:
print(metrics.confusion_matrix(y_test, test_predict))

[[14  0]
 [ 0 11]]


In [76]:
print(f'Model accuracy: {round(metrics.accuracy_score(y_test, test_predict)*100,2)}%')

Model accuracy: 100.0%


## 9. Hyperplane Test Data Visualization 

In [77]:
x_grid, y_grid = np.meshgrid(np.linspace(X_test[:, 0].min(), X_test[:, 0].max(), 100),
                             np.linspace(X_test[:, 1].min(), X_test[:, 1].max(), 100))
z_grid = np.zeros_like(x_grid)


for i in range(len(x_grid)):
    for j in range(len(y_grid)):
        z_grid[i, j] = model.decision_function([[x_grid[i, j], y_grid[i, j], 0]])


fig = go.Figure()


fig.add_trace(go.Scatter3d(x=X_test[:, 0], y=X_test[:, 1], z=X_test[:, 2], mode='markers',
                           marker=dict(size=5, color=y_test), name='Dados de Treinamento'))


fig.add_trace(go.Surface(z=z_grid, x=x_grid, y=y_grid, opacity=0.5, colorscale='Bluered_r'))

fig.update_layout(scene=dict(xaxis_title='Sepal Width (cm)',
                             yaxis_title='Petal Length (cm)',
                             zaxis_title='Petal Width (cm)'))

fig.update_layout(width=1000, height=600)

fig.show()

## 10. Obtaining the model to be implemented in the microcontroller

In [78]:
print(port(model))

#pragma once
#include <cstdarg>
namespace Eloquent {
    namespace ML {
        namespace Port {
            class SVM {
                public:
                    /**
                    * Predict class for features vector
                    */
                    int predict(float *x) {
                        float kernels[3] = { 0 };
                        float decisions[1] = { 0 };
                        int votes[2] = { 0 };
                        kernels[0] = compute_kernel(x,   3.3  , 1.7  , 0.5 );
                        kernels[1] = compute_kernel(x,   3.4  , 1.9  , 0.2 );
                        kernels[2] = compute_kernel(x,   2.5  , 3.0  , 1.1 );
                        float decision = -0.903345042107;
                        decision = decision - ( + kernels[0] * -0.743493539534  + kernels[1] * -9.21056e-07 );
                        decision = decision - ( + kernels[2] * 0.74349446059 );

                        return decision > 0 ? 0 : 1;
                    }



## 11. Saves the template in a .h file

In [79]:
with open('./SVMClassifier/SVMClassifier.h', 'w') as file:
    file.write(port(model))

## (BONUS) Hyperparameter tuning

RandomizedSearchCV is a function provided by the scikit-learn library in Python, commonly used for hyperparameter tuning in machine learning models through cross-validation. This technique proves beneficial when dealing with an extensive search space for hyperparameters and aims to identify the most effective combination of values.

Step-by-Step Explanation
1. Definition of Parameter Space:
Before utilizing RandomizedSearchCV, one needs to specify a search space for the model's hyperparameters. Rather than providing a specific grid of values, distributions are defined for each hyperparameter.

2. Random Sampling:
Instead of evaluating all conceivable combinations of hyperparameters (as in the case of GridSearchCV), RandomizedSearchCV randomly selects a fixed set of combinations for evaluation. This proves advantageous when dealing with a large search space.

3. Model Training:
For each randomly selected set of hyperparameters, RandomizedSearchCV trains the model using cross-validation. The data is divided into folds, with the model being trained on some folds and evaluated on the remaining folds.

4. Performance Evaluation:
Performance is measured using a specified metric (e.g., accuracy, F1-score). The objective is to find hyperparameters that maximize or minimize this metric, depending on the problem at hand (e.g., maximizing accuracy in a classification problem).

5. Selection of the Best Model:
Upon completion of the random search, RandomizedSearchCV returns the set of hyperparameters that led to the best average performance during cross-validation.

By employing RandomizedSearchCV, computational time can be saved compared to an exhaustive grid search (GridSearchCV), especially when dealing with a large search space. This efficiency stems from exploring a random sample of the hyperparameter space rather than evaluating all possible combinations.

### 1. Importing libraries

In [85]:
from sklearn.metrics import make_scorer, roc_auc_score
from sklearn.model_selection import RandomizedSearchCV

### 2. Set Grid search for Combinations of Parameters

In [86]:
params = {
    "C": np.arange(2, 10, 2),
    "gamma": np.arange(0.1, 1, 0.01),
    "kernel": ['rbf', 'linear', 'poly']}

### 3. Define Performance Measure

In [87]:
auc = make_scorer(roc_auc_score)

### 4. Runs the search for the best model

In [88]:
best_model = RandomizedSearchCV(model, param_distributions=params, random_state=42, n_iter=200, cv=3, verbose=1, n_jobs=1, return_train_score=True, scoring = auc)

In [89]:
best_model.fit(X_train, y_train)

Fitting 3 folds for each of 200 candidates, totalling 600 fits


### 5. Report the best model

In [90]:
def report_best_scores(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results['rank_test_score'] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print("Mean validation score: {0:.3f} (std: {1:.3f})".format(
                results['mean_test_score'][candidate],
                results['std_test_score'][candidate]))
            best_params = results['params'][candidate]
            print("Best parameters found:")
            for param, value in best_params.items():
                print("  {0}: {1}".format(param, value))
            print("")

In [91]:
report_best_scores(best_model.cv_results_, 1)

Model with rank: 1
Mean validation score: 1.000 (std: 0.000)
Best parameters found:
  kernel: rbf
  gamma: 0.6099999999999998
  C: 4

Model with rank: 1
Mean validation score: 1.000 (std: 0.000)
Best parameters found:
  kernel: poly
  gamma: 0.12
  C: 6

Model with rank: 1
Mean validation score: 1.000 (std: 0.000)
Best parameters found:
  kernel: poly
  gamma: 0.7299999999999996
  C: 6

Model with rank: 1
Mean validation score: 1.000 (std: 0.000)
Best parameters found:
  kernel: rbf
  gamma: 0.2599999999999999
  C: 6

Model with rank: 1
Mean validation score: 1.000 (std: 0.000)
Best parameters found:
  kernel: linear
  gamma: 0.8999999999999996
  C: 8

Model with rank: 1
Mean validation score: 1.000 (std: 0.000)
Best parameters found:
  kernel: linear
  gamma: 0.34999999999999987
  C: 6

Model with rank: 1
Mean validation score: 1.000 (std: 0.000)
Best parameters found:
  kernel: linear
  gamma: 0.6999999999999996
  C: 8

Model with rank: 1
Mean validation score: 1.000 (std: 0.000)
Bes