In [1]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

import librosa
from IPython.display import Audio

In [2]:
# !wget -r https://getfile.dokpub.com/yandex/get/https://disk.yandex.ru/d/4R8jAFyoX4Edzg

In [3]:
# !mkdir data
# !unzip -u getfile.dokpub.com/yandex/get/https:/disk.yandex.ru/d/4R8jAFyoX4Edzg -d data

In [4]:
person=2
digit = 0

In [5]:
#Modified get_data()
def get_data_m(digit=0,person=person,index=0,target_sr=16000):
    if person < 10:
        file = f"data/voice_ger/0{person}/{digit}_0{person}_{index}.wav"
    else:
        file = f"data/voice_ger/{person}/{digit}_{person}_{index}.wav"

    data, sr = librosa.load(file)#sr=22050
    #down sampling to 8000Hz
    down_d = librosa.resample(data, orig_sr=sr, target_sr=target_sr)
    #fixed length of all data to 12000 samples
    fix_len_d = librosa.util.fix_length(down_d, size=12000)
    return fix_len_d, target_sr

In [6]:
def read_data(digit=0, person=2, index=0):
    data_per = get_data_m(digit=digit, person=person, index=index)
    return data_per

In [8]:
df_0 = pd.DataFrame()
df_1 = pd.DataFrame()

In [9]:
for i in range(0,50):
    df_0[i] = pd.DataFrame(read_data(digit=0, index=i, person=2)[0])

In [10]:
for i in range(0,50):
    df_1[i] = pd.DataFrame(read_data(digit=1, index=i, person=2)[0])

In [11]:
df_0 = df_0.add_suffix('_0')
df_1 = df_1.add_suffix('_1')

In [12]:
df = pd.concat([df_0, df_1], axis =1)

In [13]:
target = [0 if x<50 else 1 for x in range(0,100)]

In [14]:
np.unique(target, return_counts= True)

(array([0, 1]), array([50, 50]))

Давайте обучим модель БЕЗ извлечения признаков

In [15]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

In [16]:
X = df.T.values

In [17]:
Y = np.array(target)[None].T

In [18]:
Y.shape

(100, 1)

In [19]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X, Y, test_size=.3,
                                                shuffle=True,random_state=42)
print(X_train.shape,Y_train.shape, X_test.shape,Y_test.shape)

(70, 12000) (70, 1) (30, 12000) (30, 1)


In [26]:
clf = RandomForestClassifier(n_estimators=10)
clf.fit(X_train, Y_train)
print("Accuracy on training set is : {:.2f}".format(clf.score(X_train, Y_train)))
print("Accuracy on test set is : {:.2f}".format(clf.score(X_test, Y_test)))
Y_test_pred = clf.predict(X_test)
print(classification_report(Y_test, Y_test_pred))

Accuracy on training set is : 1.00
Accuracy on test set is : 0.83
              precision    recall  f1-score   support

           0       1.00      0.71      0.83        17
           1       0.72      1.00      0.84        13

    accuracy                           0.83        30
   macro avg       0.86      0.85      0.83        30
weighted avg       0.88      0.83      0.83        30



  return fit_method(estimator, *args, **kwargs)


In [27]:
from sklearn.metrics import confusion_matrix
confusion_matrix(Y_test, Y_test_pred)

array([[12,  5],
       [ 0, 13]])

Давайте теперь обучим модель на основе выделенных признаков

In [29]:
# !pip install tsfel

In [28]:
import tsfel

In [29]:
cfg_file = tsfel.get_features_by_domain('spectral') # 'temporal', 'statistical'

fs=1000 # частота дискретизации


In [30]:
X_train_tsfel = pd.DataFrame()

for i in tqdm(range(0,len(X_train))):
    X_train_tsfel[i] = tsfel.time_series_features_extractor(cfg_file, X_train[i], fs=fs, verbose=False).T

100%|██████████| 70/70 [00:24<00:00,  2.80it/s]


In [31]:
X_test_tsfel = pd.DataFrame()

for i in tqdm(range(0,len(X_test))):
    X_test_tsfel[i] = tsfel.time_series_features_extractor(cfg_file, X_test[i], fs=fs, verbose=False).T

100%|██████████| 30/30 [00:10<00:00,  2.86it/s]


In [32]:
X_train.shape

(70, 12000)

In [33]:
X_train_tsfel.T.values

array([[1.27411739e-08, 3.60189572e-08, 2.60033159e-08, ...,
        2.63452819e-05, 3.55551826e-05, 4.42315455e-05],
       [9.99224916e-09, 5.19895995e-08, 3.46842608e-08, ...,
        4.08546421e-05, 5.63321358e-05, 7.10137204e-05],
       [6.36961088e-09, 3.71490381e-08, 1.62710261e-08, ...,
        3.52351159e-05, 3.84520384e-05, 4.17687457e-05],
       ...,
       [8.07795423e-09, 6.19106481e-08, 2.92935674e-08, ...,
        5.22328076e-05, 6.81663788e-05, 8.08312617e-05],
       [9.71714374e-09, 3.48331020e-08, 2.58868293e-08, ...,
        9.07062635e-05, 8.54337702e-05, 7.69051253e-05],
       [7.78551978e-09, 3.95876221e-08, 1.63814571e-08, ...,
        5.33121740e-05, 4.93398638e-05, 4.49818246e-05]])

In [57]:
clf = RandomForestClassifier(n_estimators=10, max_depth=4)
clf.fit(X_train_tsfel.T.values, Y_train)
print("Accuracy on training set is : {:.2f}".format(clf.score(X_train_tsfel.values.T, Y_train)))
print("Accuracy on test set is : {:.2f}".format(clf.score(X_test_tsfel.T.values, Y_test)))
Y_test_pred = clf.predict(X_test_tsfel.T.values)
print(classification_report(Y_test, Y_test_pred))

Accuracy on training set is : 1.00
Accuracy on test set is : 1.00
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        13

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



  return fit_method(estimator, *args, **kwargs)


In [58]:
confusion_matrix(Y_test, Y_test_pred)

array([[17,  0],
       [ 0, 13]])

Вывод: при более примитивных характеристиках классификатора, использование tsfel позволяет повысить точностные характеристики, выделив наиболее существенные признаки из набора данных