In [467]:
import os
import numpy as np
import pandas as pd

## 1. Data Pre-processing

Initialize the dataset

In [468]:
data_directory = "nouv"
class_data = {}

Create a DataFrame that will contain the signals and their respective classes

In [469]:
classes = {"F": "A", "N": "B", "O": "C", "Z": "D", "S": "E"}
all_data = pd.DataFrame()

for class_name, label in classes.items():
    class_data = {'signal': [], 'class': label}
    class_folder_path = os.path.join(data_directory, class_name)
    for file_number in range(1, 101):
        file_name = f'{class_name}{str(file_number).zfill(3)}.txt'
        file_path = os.path.join(class_folder_path, file_name)
        data_array = np.loadtxt(file_path)
        normalized_data = data_array / np.max(data_array)
        class_data['signal'].append(normalized_data)
    class_df = pd.DataFrame.from_dict(class_data)
    all_data = pd.concat([all_data, class_df], ignore_index=True)

In [470]:
all_data.shape

(500, 2)

In [471]:
all_data.sample(10)

Unnamed: 0,signal,class
463,"[0.05945945945945946, -0.1810810810810811, -0....",E
108,"[0.2642857142857143, 0.32857142857142857, 0.33...",B
453,"[0.17845659163987138, 0.18006430868167203, 0.2...",E
332,"[-0.10236220472440945, -0.1141732283464567, -0...",D
115,"[0.26174496644295303, 0.2348993288590604, 0.23...",B
47,"[0.22549019607843138, 0.29411764705882354, 0.3...",A
483,"[-0.04678362573099415, -0.056530214424951264, ...",E
0,"[0.2764227642276423, 0.2682926829268293, 0.227...",A
240,"[0.23055555555555557, 0.25, 0.1722222222222222...",C
310,"[0.10294117647058823, 0.18382352941176472, 0.2...",D


Copy the obtained dataframe to experiment on the new one

In [472]:
df = all_data.copy()

## Feature Extraction

In [473]:
import pywt
from scipy.stats import entropy

We will use Multilevel wavelet packet Entropy (MWE) for feature extraction as indicated in the paper 

In [474]:
def multilevel_wavelet_packet_entropy(signal, wavelet='bior1.5', levels=5):
    coeffs = pywt.wavedec(signal, wavelet, level=levels)
    entropy_per_level = []
    for level in range(levels + 1): 
        coefficients_at_level = [coeff for i, coeff in enumerate(coeffs) if i == level or i == levels - level]
        energy_at_level = [np.sum(np.square(coeff)) for coeff in coefficients_at_level]
        normalized_energy_at_level = energy_at_level / np.sum(energy_at_level)
        entropy_at_level =float(-np.sum(np.nan_to_num(normalized_energy_at_level) * np.log2(np.nan_to_num(normalized_energy_at_level))))
        entropy_per_level.append(entropy_at_level)
    #return np.array(entropy_per_level)
    return entropy_per_level

Create a column in our dataframe that will contain the MWE for every signal

In [475]:
for i in range(500):
    list_ = multilevel_wavelet_packet_entropy(df['signal'][i])
    for j in range(5):
        df.at[i,'WPE'+str(j+1)] = list_[j]

In [476]:
df.sample(10)

Unnamed: 0,signal,class,WPE1,WPE2,WPE3,WPE4,WPE5
470,"[0.10409556313993173, 0.07167235494880546, 0.0...",E,0.128228,0.745956,0.980775,0.980775,0.745956
222,"[0.03424657534246575, -0.2054794520547945, -0....",C,0.603374,0.932731,0.984286,0.984286,0.932731
458,"[-0.3102100635075721, -0.2799218368343918, -0....",E,0.553985,0.593479,0.953994,0.953994,0.593479
164,"[0.010309278350515464, -0.041237113402061855, ...",B,0.093251,0.462609,0.880427,0.880427,0.462609
175,"[-0.49606299212598426, -0.5511811023622047, -0...",B,0.057168,0.506546,0.909134,0.909134,0.506546
313,"[0.2, 0.16129032258064516, 0.10967741935483871...",D,0.15943,0.799535,0.992196,0.992196,0.799535
309,"[-0.3360655737704918, -0.3114754098360656, -0....",D,0.114197,0.923152,0.998714,0.998714,0.923152
397,"[-0.45132743362831856, -0.37168141592920356, -...",D,0.276219,0.996574,0.979109,0.979109,0.996574
387,"[-0.006493506493506494, -0.07142857142857142, ...",D,0.348888,0.982231,0.999557,0.999557,0.982231
302,"[-0.20108695652173914, -0.11956521739130435, -...",D,0.290748,0.940822,0.999999,0.999999,0.940822


In [477]:
df = df.drop('signal', axis=1)

In [478]:
df.sample(5)

Unnamed: 0,class,WPE1,WPE2,WPE3,WPE4,WPE5
433,E,0.916027,0.955322,0.995599,0.995599,0.955322
284,C,0.772144,0.891474,0.836075,0.836075,0.891474
371,D,0.423279,0.999971,0.99902,0.99902,0.999971
261,C,0.157154,0.87084,0.918096,0.918096,0.87084
411,E,0.714691,0.668459,0.842336,0.842336,0.668459


In [479]:
X = df.iloc[:,1:].values
y = df.iloc[:,0].values

print(X.shape, y.shape)

(500, 5) (500,)


In [480]:
X

array([[0.05764544, 0.46739005, 0.94184841, 0.94184841, 0.46739005],
       [0.04917578, 0.50605985, 0.91503831, 0.91503831, 0.50605985],
       [0.09391116, 0.67731773, 0.93233229, 0.93233229, 0.67731773],
       ...,
       [0.58885455, 0.90286743, 0.95341004, 0.95341004, 0.90286743],
       [0.76571052, 0.99484558, 0.99927953, 0.99927953, 0.99484558],
       [0.7409226 , 0.73940792, 0.9579845 , 0.9579845 , 0.73940792]])

## Split the data and perform a Dimentionality Reduction

In [481]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score

In [482]:
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

Split data into test and train

In [483]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

Dimentionality reduction using LDA

In [484]:
lda = LDA(n_components=2)
X_train = lda.fit_transform(X_train, y_train)
X_test = lda.transform(X_test)

## Classification using SVM

In [485]:
svm_classifier = SVC(kernel='rbf', C=1.0)
svm_classifier.fit(X_train, y_train)
y_pred = svm_classifier.predict(X_test)
#print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))

Accuracy: 0.6
