In [1]:
# Работа с табличными данными

import pandas as pd
import numpy as np


# Визуализация

import plotly.express as px
import plotly.io as pio
pio.templates.default = 'plotly_dark'
pio.renderers.default = 'notebook'
from motorica.emg8.utils import fig_montage # кастомная функция визуализации


# Пайплайн

# чтение данных и разметка по фактическим жестам
from motorica.emg8.pipeline import read_emg8
from motorica.emg8.markers import BasePeakMarker, TransMarker
# создание экземпляра пайплайна на базе: 
from motorica.emg8.pipeline import create_logreg_pipeline, create_grad_logreg_pipeline
# константы
from motorica.emg8.constants import *


# Метрики
from sklearn.metrics import classification_report


# Для вывода докстрингов с форматированием markdown
from IPython.display import Markdown as md

# Работа с файлами
import os

# Для оценки скорости инференса
from time import time

# Сериализация модели (пайплайна)
import pickle

## Подгрузка и разметка данных

Список имеющихся файлов с данными:

In [2]:
# Папка с файлами данных
DATA_DIR = 'data/plt_n'

montages = sorted(filter(lambda f: f.endswith('.emg8'), os.listdir(DATA_DIR)))
montages

['2024-11-11_18-46-48.emg8',
 '2024-11-11_19-42-26.emg8',
 '2024-11-12_11-17-31.emg8',
 '2024-11-12_11-35-21.emg8',
 '2024-11-12_13-26-05.emg8',
 '2024-11-12_13-39-57.emg8']

In [3]:
montage = montages[0]
gestures_raw = pd.read_csv(os.path.join(DATA_DIR, montage), sep=' ')
fig_montage(
    gestures_raw[OMG_CH], y_cmd=gestures_raw['id'], 
    title=f"<i>{montage}</i> – исходные данные"
).show()

In [4]:
n_holdout_groups = 1

trans_marker = TransMarker(use_peaks='std', bounds_shift=0)
X_train, X_test, y_train, y_test, gestures, cv_groups = read_emg8(
    montage, dir=DATA_DIR,
    n_holdout_groups=1,
    marker=trans_marker
)

last_train_idx = gestures[GROUP_COL].drop_duplicates().index[-n_holdout_groups] - 1

print('X_train shape:', X_train.shape)
print('y_train shape:', y_train.shape)
print('X_test shape:', X_test.shape)
print('y_test shape:', y_test.shape)

state_id = gestures[['id', 'state']].drop_duplicates()
GESTURES = state_id['state'] + ' ' + state_id['id'].astype(str)
display(GESTURES)

fig_montage(
    gestures[OMG_CH], 
    y_cmd=gestures[CMD_COL], 
    y_act=gestures[TARGET],
    protocol_cycle=gestures[GROUP_COL],
    # peaks=trans_marker.peaks_std1 / 2,
    # peaks_neg=-trans_marker.peaks_std1_neg / 2,
    # grad2=marker.peaks_grad2 / 2,
    # grad2_neg=marker.peaks_grad2_neg / 2,
    # std1=marker.peaks_std1 / 2,
    # std1_neg=marker.peaks_std1_neg / 2,
    title=f"<i>{montage}</i> – разметка переходов"
).show()

X_train shape: (4369, 16)
y_train shape: (4369,)
X_test shape: (1131, 16)
y_test shape: (1131,)


125          Neutral 0
225     ThumbFingers 1
375            Close 2
525             Open 3
675            Pinch 4
825       Indication 5
975       Wrist_Flex 6
1125    Wrist_Extend 7
dtype: object

В качестве отложенной выборки оставим последний цикл протокола:

## Пайплайн

In [6]:
# model = create_logreg_pipeline(optimize=False, X=X_train, y=y_train)
model = create_grad_logreg_pipeline(exec_optimize=False, exec_fit=False, X=X_train, y=y_train)
model

In [None]:
model = 

In [16]:
# # сохранение пайплайна (сериализация)
# with open('pipeline_logreg.pkl', 'wb') as f:
#     pickle.dump(model, f)

# # восстановление пайплайна из файла
# with open('pipeline_logreg.pkl', 'rb') as f:
#     model = pickle.load(f)

In [17]:
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

In [18]:
print("Train data " + '-' * 44)
print(classification_report(y_train, y_train_pred))#, target_names=GESTURES))

Train data --------------------------------------------
              precision    recall  f1-score   support

          -7       0.00      0.00      0.00        39
          -6       0.00      0.00      0.00        36
          -5       0.00      0.00      0.00        35
          -4       0.00      0.00      0.00        37
          -3       0.00      0.00      0.00        37
          -2       0.00      0.00      0.00        36
          -1       0.00      0.00      0.00        35
           0       0.91      1.00      0.95      2853
           1       0.03      0.17      0.06        35
           2       0.07      0.38      0.12        34
           3       0.07      0.36      0.11        36
           4       0.09      0.39      0.15        38
           5       0.09      0.43      0.15        37
           6       0.08      0.42      0.14        36
           7       0.09      0.47      0.15        36
          11       0.00      0.00      0.00       153
          12       0.00  


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [19]:
print("Test data " + '-' * 45)
print(classification_report(y_test, y_test_pred))#, target_names=GESTURES))

Test data ---------------------------------------------
              precision    recall  f1-score   support

          -7       0.00      0.00      0.00         9
          -6       0.00      0.00      0.00         9
          -5       0.00      0.00      0.00         8
          -4       0.00      0.00      0.00         9
          -3       0.00      0.00      0.00         9
          -2       0.00      0.00      0.00         9
          -1       0.00      0.00      0.00         9
           0       0.84      1.00      0.91       747
           1       0.00      0.00      0.00        10
           2       0.00      0.00      0.00         8
           3       0.09      0.40      0.14        10
           4       0.06      0.33      0.10         9
           5       0.07      0.33      0.11         9
           6       0.07      0.25      0.11         8
           7       0.11      0.56      0.18         9
          11       0.00      0.00      0.00        38
          12       0.00  


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



## Симуляция инференса в реальном времени на отложенной выборке

In [None]:
y_test_pred = np.empty(0)
comp_durations = np.empty(0)

for i in range(X_test.shape[0]):
    start_time = time()
    y_test_pred = np.append(y_test_pred, model.predict(X_test[i]))
    comp_duration = time() - start_time
    comp_durations = np.append(comp_durations, comp_duration)

print(f"Максимальное время: {np.round(comp_durations.max() * 1000, 2)} мс")
print(f"Среднее время: {np.round(comp_durations.mean() * 1000, 2)} мс")

print(classification_report(y_test, y_test_pred))#, target_names=GESTURES))

fig = fig_montage(
    pd.DataFrame(X_test), 
    y_cmd=gestures.loc[last_train_idx + 1:, CMD_COL].reset_index(drop=True), 
    y_true=y_test, y_pred=y_test_pred,
    title=f"{montage}<br>Результаты последовательных предсказаний примеров тестовой выборки"
)
fig.show()

Максимальное время: 2.08 мс
Среднее время: 0.33 мс
              precision    recall  f1-score   support

          -7       0.00      0.00      0.00         9
          -6       0.00      0.00      0.00         9
          -5       0.00      0.00      0.00         8
          -4       0.00      0.00      0.00         9
          -3       0.00      0.00      0.00         9
          -2       0.00      0.00      0.00         9
          -1       0.00      0.00      0.00         9
           0       0.84      1.00      0.91       747
           1       0.00      0.00      0.00        10
           2       0.00      0.00      0.00         8
           3       0.09      0.40      0.14        10
           4       0.06      0.33      0.10         9
           5       0.07      0.33      0.11         9
           6       0.07      0.25      0.11         8
           7       0.11      0.56      0.18         9
          11       0.00      0.00      0.00        38
          12       0.00      0


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.


Precision is ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.

