In [1]:
%matplotlib inline

import numpy as np
import pandas as pd # Data frames
import matplotlib.pyplot as plt # Visuals
import seaborn as sns 
sns.set()
import csv
import re
from sklearn.model_selection import train_test_split # Create training and test sets
from sklearn.tree import DecisionTreeClassifier # Decision Trees
from sklearn import tree 
from sklearn.ensemble import RandomForestClassifier # Random Forest
from sklearn import svm #SVM
from sklearn.metrics import roc_curve # ROC Curves
from sklearn.model_selection import cross_val_score  #cross validation 
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score
import pywt
from sklearn.metrics import mean_squared_error
import plotly.offline as py
from scipy import signal
import time
from scipy.signal import butter, filtfilt
py.init_notebook_mode(connected=True)
plt.style.use('ggplot')

In [2]:
import os

In [3]:
root = os.getcwd()
root

'/home/bench-user/data/mt/EMG/EMG-Signal-Classification/Src/Classification'

In [4]:
df_approx_dir = root + "/../Feature_extraction/approx_df.csv"
final_df_approx = pd.read_csv(df_approx_dir, index_col = 0)
final_df_approx

Unnamed: 0,data1,data2,data3,data4,data5,data6,data7,data8,class
0,17.261028,0.233085,1.054185,3.289182,0.646551,0.323208,0.362880,0.222239,1
1,18.791972,0.218241,1.004792,3.160344,0.636965,0.303699,0.341973,0.212599,1
2,7.517735,0.359031,1.352812,4.142039,0.830080,0.440532,0.506595,0.299962,1
3,0.037226,0.453570,1.555651,4.728338,1.127243,0.557710,0.619816,0.365921,1
4,0.467514,0.553521,1.653131,4.737151,1.136429,0.603048,0.651832,0.398648,1
...,...,...,...,...,...,...,...,...,...
405,0.153274,0.475463,0.523959,2.569312,0.424167,0.154487,0.396791,0.209755,6
406,0.076447,0.312237,0.342409,2.096912,0.202307,0.025613,-0.019038,0.032626,6
407,0.079854,0.303897,0.343658,2.109003,0.179577,0.026284,0.005412,0.042748,6
408,0.057217,0.282835,0.305431,2.049546,0.174430,0.008542,-0.099689,0.000454,6


In [5]:
train, test = train_test_split(final_df_approx, test_size = 0.20, random_state = 42)

# Create the training test omitting the class
# Store the column names in an array, ommiting only the 'class' column
column_names = final_df_approx.columns[:-1]
print(column_names)
training_set = train.loc[:, column_names]#

# Next we create the class set 
class_set = train.loc[:,train.columns=='class']

# Next we create the test set doing the same process as the training set
test_set = test.loc[:, column_names]

test_class_set = test.loc[:, train.columns=='class']

true_labels = test_class_set['class'] #for evaluating the metrics

Index(['data1', 'data2', 'data3', 'data4', 'data5', 'data6', 'data7', 'data8'], dtype='object')


## Decision Tree

In [6]:
model_dt1 = tree.DecisionTreeClassifier()
model_dt1 = model_dt1.fit(training_set, class_set)

In [7]:
# Get the accuracy of the model with criterion 'gini' (The function to measure the quality of a split)
accuracy_dt1 = model_dt1.score(test_set, test_class_set['class'])
print("Here is our mean accuracy on the test set: ", '%.3f' % (accuracy_dt1 * 100), '%')

Here is our mean accuracy on the test set:  87.805 %


In [8]:
start_time = time.time()
predictions_dt = model_dt1.predict(test_set)
end_time = time.time()

In [9]:
inference_time_dt = end_time - start_time
print(f"Inference time: {inference_time_dt} seconds")

Inference time: 0.000736236572265625 seconds


In [10]:
def metrics(true_labels, prediction):
    conf_matrix = confusion_matrix(true_labels, prediction)

    # Calculate precision
    precision = precision_score(true_labels, prediction, average = 'weighted', zero_division= 0)

    # Calculate recall
    recall = recall_score(true_labels, prediction, average = 'weighted')

    # Calculate F1-Score
    f1 = f1_score(true_labels, prediction, average = 'weighted')

    # Print the confusion matrix and calculated metrics
    print("Confusion Matrix:")
    print(conf_matrix)
    print(f"Precision: {precision:.2f}")
    print(f"Recall: {recall:.2f}")
    print(f"F1-Score: {f1:.2f}")

metrics(true_labels, predictions_dt)

Confusion Matrix:
[[17  0  0  1  0  0]
 [ 0 12  1  1  0  2]
 [ 0  1 10  0  0  0]
 [ 0  0  1 12  0  0]
 [ 0  0  0  0 11  0]
 [ 0  0  0  2  1 10]]
Precision: 0.89
Recall: 0.88
F1-Score: 0.88


## Random Forest

In [11]:
model_rf = RandomForestClassifier(random_state = 42, 
                                  criterion='gini',
                                  n_estimators = 1000,
                                  max_features = 0.25)

In [12]:
model_rf.fit(training_set, class_set['class'])

importances = model_rf.feature_importances_
print(importances) # so we can keep only data3, data4, and data6 since they are 'the most important' ones

[0.17506999 0.10758921 0.17594156 0.15684391 0.08596627 0.09203217
 0.17010135 0.03645554]


In [13]:
accuracy_rf = model_rf.score(test_set, test_class_set['class'])
print(f'{accuracy_rf*100}%')

97.5609756097561%


In [14]:
start_time = time.time()
predictions_rf = model_rf.predict(test_set)
end_time = time.time()

In [15]:
inference_time_rf = end_time - start_time
print(f"Inference time: {inference_time_rf} seconds")

Inference time: 0.033833980560302734 seconds


In [16]:
metrics(true_labels, predictions_rf)

Confusion Matrix:
[[18  0  0  0  0  0]
 [ 0 15  0  0  0  1]
 [ 0  0 10  1  0  0]
 [ 0  0  0 13  0  0]
 [ 0  0  0  0 11  0]
 [ 0  0  0  0  0 13]]
Precision: 0.98
Recall: 0.98
F1-Score: 0.98


## SVM

In [17]:
# Train SVM model, rbf kernel
model_svm_rbf = svm.SVC() #kernel 'rbf'
model_svm_rbf = model_svm_rbf.fit(training_set, class_set['class'])

In [18]:
accuracy_svm_rbf = model_svm_rbf.score(training_set, class_set['class'])
print(accuracy_svm_rbf)

0.823170731707317


In [19]:
start_time = time.time()
predictions_svm_rbf = model_svm_rbf.predict(test_set)
end_time = time.time()

In [20]:
inference_time_rbf = end_time - start_time
print(f"Inference time: {inference_time_rbf} seconds")

Inference time: 0.0018963813781738281 seconds


In [21]:
metrics(true_labels, predictions_svm_rbf)

Confusion Matrix:
[[18  0  0  0  0  0]
 [ 0 14  1  0  1  0]
 [ 0  0 11  0  0  0]
 [ 0  0  6  6  1  0]
 [ 0  0  0  0 10  1]
 [ 0  4  0  0  0  9]]
Precision: 0.87
Recall: 0.83
F1-Score: 0.82


In [22]:
# Train SVM model, linear kernel
model_svm_lin = svm.SVC(kernel = 'linear') 
model_svm_lin = model_svm_lin.fit(training_set, class_set['class'])

In [23]:
start_time = time.time()
predictions_svm_lin = model_svm_lin.predict(test_set)
end_time = time.time()

In [24]:
inference_time_svm_lin = end_time - start_time
print(f"Inference time: {inference_time_svm_lin} seconds")

Inference time: 0.0010564327239990234 seconds


In [25]:
metrics(true_labels, predictions_svm_lin)

Confusion Matrix:
[[18  0  0  0  0  0]
 [ 0 14  1  0  1  0]
 [ 0  0 11  0  0  0]
 [ 0  0  0 13  0  0]
 [ 0  0  0  0 11  0]
 [ 0  0  0  1  0 12]]
Precision: 0.97
Recall: 0.96
F1-Score: 0.96


In [26]:
accuracy_svm_lin = model_svm_lin.score(training_set, class_set['class'])
print(accuracy_svm_lin)

0.926829268292683


In [27]:
# Train SVM model, polynomial kernel
model_svm_poly = svm.SVC(kernel = 'poly')
model_svm_poly = model_svm_poly.fit(training_set, class_set['class'])

In [28]:
start_time = time.time()
predictions_svm_poly = model_svm_poly.predict(test_set)
end_time = time.time()

In [29]:
inference_time_poly = end_time - start_time
print(f"Inference time: {inference_time_poly} seconds")

Inference time: 0.0015423297882080078 seconds


In [30]:
metrics(true_labels, predictions_svm_poly)

Confusion Matrix:
[[18  0  0  0  0  0]
 [ 0 15  1  0  0  0]
 [ 0  1 10  0  0  0]
 [ 0  4  8  0  1  0]
 [ 0  0  2  0  9  0]
 [ 0  4  2  0  0  7]]
Precision: 0.68
Recall: 0.72
F1-Score: 0.67


In [31]:
accuracy_svm_poly = model_svm_poly.score(training_set, class_set['class'])
print(accuracy_svm_poly)

0.7225609756097561


## RNN


In [32]:
from keras.models import Sequential
from keras.layers import SimpleRNN, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
import numpy as np

column_names = final_df_approx.columns[:-1]
X = final_df_approx.loc[:, column_names]
y = final_df_approx.loc[:, 'class']  # Simplify y extraction
y_encoded = to_categorical(y)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3)

# Convert DataFrames to NumPy arrays and then reshape for RNN
X_train = np.array(X_train).reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = np.array(X_test).reshape(X_test.shape[0], 1, X_test.shape[1])

# Build the model
model = Sequential()
model.add(SimpleRNN(50, input_shape=(1, X_train.shape[2]), activation='relu'))
model.add(Dense(y_encoded.shape[1], activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32)

# Evaluate the model
accuracy_rnn = model.evaluate(X_test, y_test)[1] * 100

print(f"Model Accuracy: {accuracy_rnn}%")

2023-12-10 18:01:50.231151: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-12-10 18:01:50.233591: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2023-12-10 18:01:50.262198: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-10 18:01:50.262215: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-10 18:01:50.263022: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

Epoch 1/100


2023-12-10 18:01:52.020362: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:274] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

In [33]:
start_time = time.time()
predictions_rnn = model.predict(X_test)
end_time = time.time()



In [35]:
inference_time_rnn = end_time - start_time
print(f"Inference time: {inference_time_rnn} seconds")

Inference time: 0.14040088653564453 seconds


## Summary

In [36]:
print(root)
print("DT : ", accuracy_dt1 * 100)
print("RF : ", accuracy_rf * 100)
print("SVM_rbf :", accuracy_svm_rbf * 100)
print("SVM_lin :", accuracy_svm_lin * 100)
print("SVM_poly :", accuracy_svm_poly * 100)
print("RNN :", accuracy_rnn)

/home/bench-user/data/mt/EMG/EMG-Signal-Classification/Src/Classification
DT :  87.8048780487805
RF :  97.5609756097561
SVM_rbf : 82.3170731707317
SVM_lin : 92.6829268292683
SVM_poly : 72.2560975609756
RNN : 91.05691313743591


In [38]:
print("DT : ", inference_time_dt)
print("RF : ", inference_time_rf)
print("SVM_rbf :", inference_time_rbf)
print("SVM_lin :", inference_time_svm_lin)
print("SVM_poly :", inference_time_poly)
print("RNN :", inference_time_rnn)

DT :  0.000736236572265625
RF :  0.033833980560302734
SVM_rbf : 0.0018963813781738281
SVM_lin : 0.0010564327239990234
SVM_poly : 0.0015423297882080078
RNN : 0.14040088653564453
