In [1]:
# Работа с табличными данными
import pandas as pd
import numpy as np

# Пайплайн
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin

# Преобразование признаков
from sklearn.preprocessing import MinMaxScaler, RobustScaler, StandardScaler

# Модели
from sklearn.linear_model import LogisticRegression

# Валидация
from sklearn.model_selection import cross_val_predict, cross_val_score
from sklearn.metrics import f1_score, accuracy_score, classification_report
from sklearn.model_selection import train_test_split

# Визуализация
import plotly.express as px
import plotly.io as pio
pio.templates.default = 'plotly_dark'

from collections import deque

from motorica.utils import *

from typing import Any

## Мета-информация

In [2]:
METAINFO_PATH = 'marked/selected_montages.csv'
work_metadata = read_meta_info(METAINFO_PATH)
work_metadata

Unnamed: 0_level_0,pilote_id,last_train_idx,len(train),len(test),ts_delta,ticks_per_gest,n_gestures,ACC,GYR,hi_val_sensors,mark_sensors
montage,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2023-05-15_16-16-08.palm,1,23337,23337,5810,33.0,46.0,271.0,True,True,"[3, 4, 6, 12, 13, 16, 17, 21, 22, 27, 28, 30, ...","[3, 4, 6, 12, 13, 16, 17, 21, 22, 27, 28, 30, ..."
2023-05-15_17-12-24.palm,1,23336,23336,5803,33.0,46.0,271.0,True,True,"[3, 4, 6, 12, 13, 16, 17, 21, 22, 27, 28, 30, ...","[3, 4, 6, 12, 13, 16, 17, 21, 22, 27, 28, 30, ..."
2023-06-05_16-12-38.palm,1,17939,17939,4431,33.0,30.0,361.0,True,True,"[3, 4, 5, 6, 12, 13, 16, 17, 21, 22, 27, 28, 3...","[3, 4, 5, 6, 12, 13, 16, 17, 21, 22, 27, 28, 3..."
2023-06-05_17-53-01.palm,1,17771,17771,4435,33.0,31.0,361.0,True,True,"[3, 4, 5, 6, 12, 13, 16, 17, 21, 22, 27, 28, 3...","[3, 4, 5, 6, 12, 13, 16, 17, 21, 22, 27, 28, 3..."
2023-06-20_14-43-11.palm,1,17936,17936,4441,33.0,31.0,361.0,True,True,"[3, 4, 5, 6, 12, 13, 16, 17, 21, 22, 27, 28, 3...","[3, 5, 6, 12, 13, 16, 17, 21, 22, 27, 28, 30, ..."
2023-06-20_13-30-15.palm,1,17928,17928,4435,33.0,31.0,361.0,True,True,"[3, 4, 5, 6, 12, 13, 16, 17, 21, 22, 27, 28, 3...","[3, 4, 5, 6, 12, 13, 16, 17, 21, 22, 27, 28, 3..."
2023-06-20_12-34-17.palm,1,17758,17758,4444,33.0,31.0,361.0,True,True,"[3, 4, 5, 6, 12, 13, 16, 17, 21, 22, 27, 28, 3...","[3, 4, 5, 6, 12, 13, 16, 17, 21, 22, 27, 28, 3..."
2023-09-30_08-06-44.palm,2,5693,5693,5509,33.0,31.0,181.0,True,True,"[7, 9, 10, 18, 20, 23, 26, 28, 31, 34, 37, 39]","[7, 9, 10, 18, 20, 23, 26, 28, 31, 34, 37, 39]"
2023-09-29_11-03-50.palm,2,5694,5694,5511,33.0,31.0,181.0,True,True,"[7, 9, 10, 18, 20, 23, 26, 28, 31, 34, 37, 39]","[7, 9, 10, 18, 20, 23, 26, 28, 34, 37, 39]"
2023-09-29_09-20-47.palm,2,5690,5690,5507,33.0,31.0,181.0,True,True,"[7, 9, 10, 18, 20, 23, 26, 28, 31, 34, 37, 39]","[7, 9, 10, 18, 20, 23, 26, 28, 31, 34, 37, 39]"


In [3]:
def read_train_and_test(
        montage: str,
        features: List[str], 
        target_col: str = 'act_label',
        subdir: str = 'marked/'
) -> List:
    
    data_train = pd.read_csv(subdir + montage + ".train", index_col=0)
    data_test = pd.read_csv(subdir + montage + ".test", index_col=0)
    data_full = pd.read_csv(subdir + montage + ".marked", index_col=None)
    X_train = data_train.drop(target_col, axis=1)[features]
    y_train = data_train[target_col]
    X_test = data_test.drop(target_col, axis=1)[features]
    y_test = data_test[target_col]
    return X_train, X_test, y_train, y_test, data_full

# Набор признаков для обучения

In [4]:
features = [
            '0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25',
            '26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49',
            'ACC0','ACC1','ACC2',
            'GYR0','GYR1','GYR2',
            #'Pronation','act_pronation','sample','act_label_ext'
            ]

In [None]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, LayerNormalization, Dropout
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold

# Функция для преобразования данных в последовательности
def create_sequences(data, labels, timesteps=2):
    X, y = [], []
    for i in range(len(data) - timesteps + 1):
        X.append(data[i:i + timesteps])
        y.append(labels[i + timesteps - 1])
    return np.array(X), np.array(y)

# Функция для загрузки данных монтажа
def load_montage_data(montage_name, subdir='marked/'):
    data_train = pd.read_csv(subdir + montage_name + ".train", index_col=0)
    data_test = pd.read_csv(subdir + montage_name + ".test", index_col=0)
    data_full = pd.read_csv(subdir + montage_name + ".marked", index_col=None)
    return data_train, data_test, data_full

# Загрузка метаданных
METAINFO_PATH = 'marked/selected_montages.csv'
meta_info = read_meta_info(METAINFO_PATH)

# Фильтрация монтажей второго пилота
pilot_2_montages = meta_info[meta_info['pilote_id'] == 2].index


# ------------------------------------------------------------------------------------------
# Обучать будем на всех монтажах 2-го пилота, кроме последнего, и тестируем на нём.
index_pilot_id2_list = pd.Index(['2023-09-30_08-06-44.palm', '2023-09-29_11-03-50.palm',
                  '2023-09-29_09-20-47.palm', '2023-09-13_22-14-05.palm',
                  '2023-09-12_14-59-23.palm', '2023-09-12_12-55-22.palm',
                  '2023-05-31_17-14-41.palm', '2023-05-31_15-46-37.palm',
                  '2023-05-22_20-22-01.palm', '2023-05-22_17-04-29.palm',
                  '2023-05-19_12-04-02.palm'], name='montage')

# Удаляем последний элемент
last_index_pilot_id2 = index_pilot_id2_list[-1]
pilot_2_montages = index_pilot_id2_list[:-1]

#------------------------------------------------------------------------------------------


# Инициализация списков для хранения данных
X_train_list, X_test_list, y_train_list, y_test_list = [], [], [], []

# Загрузка и предобработка данных для каждого монтажа
for montage in pilot_2_montages:
    montage_info = meta_info.loc[montage]
    # features = montage_info['hi_val_sensors'] + cols_gyr
    data_train, data_test, _ = load_montage_data(montage)

    X_train = data_train.drop('act_label', axis=1)[features]
    y_train = data_train['act_label']
    X_test = data_test.drop('act_label', axis=1)[features]
    y_test = data_test['act_label']

    scaler = MinMaxScaler()
    X_train_scaled = pd.DataFrame(
        scaler.fit_transform(X_train),
        columns=X_train.columns
    )
    X_test_scaled = pd.DataFrame(
        scaler.transform(X_test),
        columns=X_test.columns
    )

    X_train_list.append(X_train_scaled)
    X_test_list.append(X_test_scaled)
    y_train_list.append(y_train)
    y_test_list.append(y_test)

# ------------------------------------------------------------------------------------

# Объединение данных
X_train_combined = pd.concat(X_train_list, axis=0).reset_index(drop=True)
X_test_combined = pd.concat(X_test_list, axis=0).reset_index(drop=True)
y_train_combined = pd.concat(y_train_list, axis=0).reset_index(drop=True)
y_test_combined = pd.concat(y_test_list, axis=0).reset_index(drop=True)

# Преобразование данных в массивы numpy
X_train_array = X_train_combined.values
y_train_array = y_train_combined.values
X_test_array = X_test_combined.values
y_test_array = y_test_combined.values


# ---------------------------------------------------------------------------------------- 
# Подбор оптимального timesteps

# Функция для оценки модели с использованием кросс-валидации
def evaluate_model(timesteps, X_train_array, y_train_array, X_test_array, y_test_array):
    # Создание последовательностей
    X_train_seq, y_train_seq = create_sequences(X_train_array, y_train_array, timesteps)
    X_test_seq, y_test_seq = create_sequences(X_test_array, y_test_array, timesteps)

    # Кодирование целевых меток
    encoder = OneHotEncoder(sparse_output=False)
    y_train_encoded = encoder.fit_transform(y_train_seq.reshape(-1, 1))
    y_test_encoded = encoder.transform(y_test_seq.reshape(-1, 1))

    # Построение модели
    model = Sequential([
        LSTM(64, input_shape=(timesteps, X_train_seq.shape[2]), return_sequences=False),
        Dense(32, activation='relu'),
        Dense(y_train_encoded.shape[1], activation='softmax')
    ])

    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Кросс-валидация
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    accuracies = []

    for train_index, val_index in kf.split(X_train_seq):
        X_train_fold, X_val_fold = X_train_seq[train_index], X_train_seq[val_index]
        y_train_fold, y_val_fold = y_train_encoded[train_index], y_train_encoded[val_index]

        # Обучение модели (20 эпох, batch_size=32 для регуляризации/нормализации)
        model.fit(X_train_fold, y_train_fold, epochs=20, batch_size=32, verbose=0)
        val_loss, val_accuracy = model.evaluate(X_val_fold, y_val_fold, verbose=0)
        accuracies.append(val_accuracy)

    # Оценка на тестовых данных
    model.fit(X_train_seq, y_train_encoded, epochs=20, batch_size=32, verbose=0)
    y_pred_encoded = model.predict(X_test_seq)
    y_pred = np.argmax(y_pred_encoded, axis=1)
    y_test_actual = np.argmax(y_test_encoded, axis=1)

    test_accuracy = accuracy_score(y_test_actual, y_pred)

    return np.mean(accuracies), test_accuracy

# -------------------------------------------------------------------------------------------------


# Перебор значений timesteps
timesteps_values = [1, 2, 3, 5]
best_timesteps = None
best_val_accuracy = 0
best_test_accuracy = 0

for timesteps in timesteps_values:
    val_accuracy, test_accuracy = evaluate_model(timesteps, X_train_array, y_train_array, X_test_array, y_test_array)
    print(f"Timesteps: {timesteps}, Validation Accuracy: {val_accuracy}, Test Accuracy: {test_accuracy}")
    if val_accuracy > best_val_accuracy:
        best_val_accuracy = val_accuracy
        best_test_accuracy = test_accuracy
        best_timesteps = timesteps

print(f"Best Timesteps: {best_timesteps}, Best Validation Accuracy: {best_val_accuracy}, Best Test Accuracy: {best_test_accuracy}")

  super().__init__(**kwargs)


[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 353us/step
Timesteps: 1, Validation Accuracy: 0.9529443383216858, Test Accuracy: 0.8812957206628181


  super().__init__(**kwargs)


[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 416us/step
Timesteps: 2, Validation Accuracy: 0.9668618083000183, Test Accuracy: 0.9055064740803066


  super().__init__(**kwargs)


[1m1338/1338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step
Timesteps: 3, Validation Accuracy: 0.9687222838401794, Test Accuracy: 0.904686221806708


  super().__init__(**kwargs)


KeyboardInterrupt: 

---
# Код для предсказания конкретного монтажа (ниже будет функция)



In [None]:
# Загрузка и предобработка нового файла
new_montage_name = 'montage_name'
new_data_train, new_data_test, new_data_full = load_montage_data(new_montage_name)

new_X_train = new_data_train.drop('act_label', axis=1)[features]
new_y_train = new_data_train['act_label']
new_X_test = new_data_test.drop('act_label', axis=1)[features]
new_y_test = new_data_test['act_label']

scaler = MinMaxScaler()
new_X_train_scaled = pd.DataFrame(
    scaler.fit_transform(new_X_train),
    columns=new_X_train.columns
)
new_X_test_scaled = pd.DataFrame(
    scaler.transform(new_X_test),
    columns=new_X_test.columns
)

# Преобразование данных в массивы numpy
new_X_train_array = new_X_train_scaled.values
new_y_train_array = new_y_train.values
new_X_test_array = new_X_test_scaled.values
new_y_test_array = new_y_test.values

# Создание последовательностей
new_X_train_seq, new_y_train_seq = create_sequences(new_X_train_array, new_y_train_array, best_timesteps)
new_X_test_seq, new_y_test_seq = create_sequences(new_X_test_array, new_y_test_array, best_timesteps)

# Кодирование целевых меток
encoder = OneHotEncoder(sparse_output=False)
new_y_train_encoded = encoder.fit_transform(new_y_train_seq.reshape(-1, 1))
new_y_test_encoded = encoder.transform(new_y_test_seq.reshape(-1, 1))

# Построение модели
model = Sequential([
    LSTM(64, input_shape=(best_timesteps, new_X_train_seq.shape[2]), return_sequences=False),
    Dense(32, activation='relu'),
    Dense(new_y_train_encoded.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Обучение модели
model.fit(
    new_X_train_seq, new_y_train_encoded,
    validation_data=(new_X_test_seq, new_y_test_encoded),
    epochs=20, batch_size=32, verbose=1
)

# Предсказания
new_y_pred_encoded = model.predict(new_X_test_seq)
new_y_pred = np.argmax(new_y_pred_encoded, axis=1)
new_y_test_actual = np.argmax(new_y_test_encoded, axis=1)

# Вывод метрик
print(classification_report(new_y_test_actual, new_y_pred, zero_division=0))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
              precision    recall  f1-score   support

           0       0.96      0.98      0.97      4080
           1       0.93      0.90      0.91       363
           2       0.94      0.84      0.88       321
           3       0.89      0.73      0.80       340
           4       0.81      0.88      0.84       318
           5       0.94      0.95      0.95       369

    accuracy                           0.94      5791
   macro avg       0.91      0.88      0.89      5791
weighted avg       0.94      0.94      0.94      5791



In [11]:
features = [
            '0','1','2','3','4','5','6','7','8','9','10','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25',
            '26','27','28','29','30','31','32','33','34','35','36','37','38','39','40','41','42','43','44','45','46','47','48','49',
            'ACC0','ACC1','ACC2',
            'GYR0','GYR1','GYR2',
            'Pronation','act_pronation','sample','act_label_ext'
            ]

## Проба предсказать монтаж не 2 пилота + сохранение модели в H5

In [None]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score

# Функция для преобразования данных в последовательности
def create_sequences(data, labels, timesteps=2):
    X, y = [], []
    for i in range(len(data) - timesteps + 1):
        X.append(data[i:i + timesteps])
        y.append(labels[i + timesteps - 1])
    return np.array(X), np.array(y)

# Функция для загрузки данных монтажа
def load_montage_data(montage_name, subdir='marked/'):
    data_train = pd.read_csv(subdir + montage_name + ".train", index_col=0)
    data_test = pd.read_csv(subdir + montage_name + ".test", index_col=0)
    data_full = pd.read_csv(subdir + montage_name + ".marked", index_col=None)
    return data_train, data_test, data_full

# Загрузка метаданных
METAINFO_PATH = 'marked/selected_montages.csv'
meta_info = read_meta_info(METAINFO_PATH)

# Фильтрация монтажей второго пилота
pilot_2_montages = meta_info[meta_info['pilote_id'] == 2].index

# Инициализация списков для хранения данных
X_train_list, X_test_list, y_train_list, y_test_list = [], [], [], []

# Загрузка и предобработка данных для каждого монтажа
for montage in pilot_2_montages:
    montage_info = meta_info.loc[montage]
    # features = montage_info['hi_val_sensors'] + cols_gyr

    data_train, data_test, _ = load_montage_data(montage)

    X_train = data_train.drop('act_label', axis=1)[features]
    y_train = data_train['act_label']
    X_test = data_test.drop('act_label', axis=1)[features]
    y_test = data_test['act_label']

    scaler = MinMaxScaler()
    X_train_scaled = pd.DataFrame(
        scaler.fit_transform(X_train),
        columns=X_train.columns
    )
    X_test_scaled = pd.DataFrame(
        scaler.transform(X_test),
        columns=X_test.columns
    )

    X_train_list.append(X_train_scaled)
    X_test_list.append(X_test_scaled)
    y_train_list.append(y_train)
    y_test_list.append(y_test)

# Объединение данных
X_train_combined = pd.concat(X_train_list, axis=0).reset_index(drop=True)
X_test_combined = pd.concat(X_test_list, axis=0).reset_index(drop=True)
y_train_combined = pd.concat(y_train_list, axis=0).reset_index(drop=True)
y_test_combined = pd.concat(y_test_list, axis=0).reset_index(drop=True)

# Преобразование данных в массивы numpy
X_train_array = X_train_combined.values
y_train_array = y_train_combined.values
X_test_array = X_test_combined.values
y_test_array = y_test_combined.values

# Оптимальное значение timesteps
best_timesteps = 2

# Создание последовательностей
X_train_seq, y_train_seq = create_sequences(X_train_array, y_train_array, best_timesteps)
X_test_seq, y_test_seq = create_sequences(X_test_array, y_test_array, best_timesteps)

# Кодирование целевых меток
encoder = OneHotEncoder(sparse_output=False)
y_train_encoded = encoder.fit_transform(y_train_seq.reshape(-1, 1))
y_test_encoded = encoder.transform(y_test_seq.reshape(-1, 1))

# Построение модели
model = Sequential([
    LSTM(64, input_shape=(best_timesteps, X_train_seq.shape[2]), return_sequences=False),
    Dense(32, activation='relu'),
    Dense(y_train_encoded.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Обучение модели
model.fit(
    X_train_seq, y_train_encoded,
    validation_data=(X_test_seq, y_test_encoded),
    epochs=20, batch_size=32, verbose=1
)

In [None]:
# Сохранение модели
model.save('motorica/model/lstm_model.h5')

# Загрузка модели для деплоя
loaded_model = load_model('motorica/model/lstm_model.h5')

In [None]:
# Пример использования модели для предсказания на монтаже не 2 пилота
new_montage_name = "2023-04-18_19-08-47 gestures train.palm"
new_data_train, new_data_test, new_data_full = load_montage_data(new_montage_name)

new_X_train = new_data_train.drop('act_label', axis=1)[features]
new_y_train = new_data_train['act_label']
new_X_test = new_data_test.drop('act_label', axis=1)[features]
new_y_test = new_data_test['act_label']

scaler = MinMaxScaler()
new_X_train_scaled = pd.DataFrame(
    scaler.fit_transform(new_X_train),
    columns=new_X_train.columns
)
new_X_test_scaled = pd.DataFrame(
    scaler.transform(new_X_test),
    columns=new_X_test.columns
)

# Преобразование данных в массивы numpy
new_X_train_array = new_X_train_scaled.values
new_y_train_array = new_y_train.values
new_X_test_array = new_X_test_scaled.values
new_y_test_array = new_y_test.values

# Создание последовательностей
new_X_train_seq, new_y_train_seq = create_sequences(new_X_train_array, new_y_train_array, best_timesteps)
new_X_test_seq, new_y_test_seq = create_sequences(new_X_test_array, new_y_test_array, best_timesteps)

# Кодирование целевых меток
new_y_train_encoded = encoder.transform(new_y_train_seq.reshape(-1, 1))
new_y_test_encoded = encoder.transform(new_y_test_seq.reshape(-1, 1))

# Предсказания
new_y_pred_encoded = loaded_model.predict(new_X_test_seq)
new_y_pred = np.argmax(new_y_pred_encoded, axis=1)
new_y_test_actual = np.argmax(new_y_test_encoded, axis=1)

# Вывод метрик
print(classification_report(new_y_test_actual, new_y_pred, zero_division=0))

Итого: 
1) Для пилота на котором обучена модель метрика хорошая, для других - нет, но и проблемы такой не стоит.
2) Высокие результаты как-будто связаны с переобучением, но в модели есть batch_size=32, который 

---
# Функция для предсказания на новых данных
---

In [None]:
# Функция для загрузки данных монтажа
def load_montage_data(montage_name, subdir='marked/'):
    data_train = pd.read_csv(subdir + montage_name + ".train", index_col=0)
    data_test = pd.read_csv(subdir + montage_name + ".test", index_col=0)
    data_full = pd.read_csv(subdir + montage_name + ".marked", index_col=None)
    return data_train, data_test, data_full


# Функция для преобразования данных в последовательности
def create_sequences(data, labels, timesteps=2):
    X, y = [], []
    for i in range(len(data) - timesteps + 1):
        X.append(data[i:i + timesteps])
        y.append(labels[i + timesteps - 1])
    return np.array(X), np.array(y)


# Функция для предсказания на новых данных (1 монтаж)
def predict_on_new_montage(montage_name, model_path, features, encoder, best_timesteps=2, subdir='marked/'):
    # Загрузка данных монтажа
    new_data_train, new_data_test, new_data_full = load_montage_data(montage_name, subdir)

    # Предобработка данных
    new_X_train = new_data_train.drop('act_label', axis=1)[features]
    new_y_train = new_data_train['act_label']
    new_X_test = new_data_test.drop('act_label', axis=1)[features]
    new_y_test = new_data_test['act_label']

    scaler = MinMaxScaler()
    new_X_train_scaled = pd.DataFrame(
        scaler.fit_transform(new_X_train),
        columns=new_X_train.columns
    )
    new_X_test_scaled = pd.DataFrame(
        scaler.transform(new_X_test),
        columns=new_X_test.columns
    )

    # Преобразование данных в массивы numpy
    new_X_train_array = new_X_train_scaled.values
    new_y_train_array = new_y_train.values
    new_X_test_array = new_X_test_scaled.values
    new_y_test_array = new_y_test.values

    # Создание последовательностей
    new_X_train_seq, new_y_train_seq = create_sequences(new_X_train_array, new_y_train_array, best_timesteps)
    new_X_test_seq, new_y_test_seq = create_sequences(new_X_test_array, new_y_test_array, best_timesteps)

    # Кодирование целевых меток
    new_y_train_encoded = encoder.transform(new_y_train_seq.reshape(-1, 1))
    new_y_test_encoded = encoder.transform(new_y_test_seq.reshape(-1, 1))

    # Загрузка модели
    loaded_model = load_model(model_path)

    # Предсказания
    new_y_pred_encoded = loaded_model.predict(new_X_test_seq)
    new_y_pred = np.argmax(new_y_pred_encoded, axis=1)
    new_y_test_actual = np.argmax(new_y_test_encoded, axis=1)

    # Вывод метрик
    print(classification_report(new_y_test_actual, new_y_pred, zero_division=0))

---
## Идея на будущее
* Переобучать модель для конкретных пользователей, ища подходящие параметры 
---

In [None]:
# Функция для переобучения модели с новыми данными
def retrain_model_with_new_data(new_montage_name, model_path, features, subdir='marked/'):
    # Загрузка данных монтажа
    new_data_train, new_data_test, new_data_full = load_montage_data(new_montage_name, subdir)

    # Предобработка данных
    new_X_train = new_data_train.drop('act_label', axis=1)[features]
    new_y_train = new_data_train['act_label']
    new_X_test = new_data_test.drop('act_label', axis=1)[features]
    new_y_test = new_data_test['act_label']

    scaler = MinMaxScaler()
    new_X_train_scaled = pd.DataFrame(
        scaler.fit_transform(new_X_train),
        columns=new_X_train.columns
    )
    new_X_test_scaled = pd.DataFrame(
        scaler.transform(new_X_test),
        columns=new_X_test.columns
    )

    # Преобразование данных в массивы numpy
    new_X_train_array = new_X_train_scaled.values
    new_y_train_array = new_y_train.values
    new_X_test_array = new_X_test_scaled.values
    new_y_test_array = new_y_test.values

    # Перебор значений timesteps
    timesteps_values = [1, 2, 3]
    best_timesteps = None
    best_val_accuracy = 0
    best_test_accuracy = 0
    best_model = None
    best_encoder = None

    for timesteps in timesteps_values:
        val_accuracy, test_accuracy, model, encoder = evaluate_model(
            timesteps, new_X_train_array, new_y_train_array, new_X_test_array, new_y_test_array, new_X_train_array.shape[1], len(np.unique(new_y_train_array))
        )
        print(f"Timesteps: {timesteps}, Validation Accuracy: {val_accuracy}, Test Accuracy: {test_accuracy}")
        if val_accuracy > best_val_accuracy:
            best_val_accuracy = val_accuracy
            best_test_accuracy = test_accuracy
            best_timesteps = timesteps
            best_model = model
            best_encoder = encoder

    print(f"Best Timesteps: {best_timesteps}, Best Validation Accuracy: {best_val_accuracy}, Best Test Accuracy: {best_test_accuracy}")

    # Сохранение обновленной модели
    best_model.save(model_path)

    return best_model, best_encoder, best_timesteps



# Пример использования функции для переобучения модели с новыми данными
new_montage_name = "2023-04-18_19-08-47 gestures train.palm"
best_model, best_encoder, best_timesteps = retrain_model_with_new_data(new_montage_name, 'motorica/model/lstm_model.h5', features)

# Добавление в модель DropOut, LayerNormalization (построение модели можно изменить везде на улучшенное)

In [15]:
# Объединение данных
X_train_combined = pd.concat(X_train_list, axis=0).reset_index(drop=True)
X_test_combined = pd.concat(X_test_list, axis=0).reset_index(drop=True)
y_train_combined = pd.concat(y_train_list, axis=0).reset_index(drop=True)
y_test_combined = pd.concat(y_test_list, axis=0).reset_index(drop=True)

# Преобразование данных в массивы numpy
X_train_array = X_train_combined.values
y_train_array = y_train_combined.values
X_test_array = X_test_combined.values
y_test_array = y_test_combined.values

# Оптимальное значение timesteps
best_timesteps = 2

# Создание последовательностей
X_train_seq, y_train_seq = create_sequences(X_train_array, y_train_array, best_timesteps)
X_test_seq, y_test_seq = create_sequences(X_test_array, y_test_array, best_timesteps)

# Кодирование целевых меток
encoder = OneHotEncoder(sparse_output=False)
y_train_encoded = encoder.fit_transform(y_train_seq.reshape(-1, 1))
y_test_encoded = encoder.transform(y_test_seq.reshape(-1, 1))

# Построение модели
model = Sequential([
    LSTM(64, input_shape=(best_timesteps, X_train_seq.shape[2]), return_sequences=True),
    LayerNormalization(),
    Dropout(0.2),
    LSTM(64, return_sequences=True),
    LayerNormalization(),
    Dropout(0.2),
    LSTM(64, return_sequences=False),
    LayerNormalization(),
    Dense(32, activation='relu'),
    Dense(y_train_encoded.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Обучение модели
model.fit(
    X_train_seq, y_train_encoded,
    validation_data=(X_test_seq, y_test_encoded),
    epochs=20, batch_size=32, verbose=1
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1b0005f79d0>