In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import  LabelEncoder

<h3>Загрузка датасета</h3>

In [101]:
#загрузка датасета
df = pd.read_csv('coords_data_numerated.csv')
df = df.drop(columns=['Unnamed: 0', 'GSR'])
df.head()

Unnamed: 0,id,BreathingType,TimeStamp_sec,FirstMarkerXCoord,FirstMarkerYCoord,FirstMarkerZCoord,SecondMarkerXCoord,SecondMarkerYCoord,SecondMarkerZCoord,ThirdMarkerXCoord,ThirdMarkerYCoord,ThirdMarkerZCoord
0,1,0,0.058895,0.689,-1.453,4.417,0.881,-1.544,4.547,0.718,-1.607,4.369
1,1,0,0.111889,0.687,-1.452,4.418,0.881,-1.544,4.547,0.715,-1.607,4.371
2,1,0,0.158371,0.687,-1.451,4.419,0.881,-1.544,4.545,0.714,-1.607,4.371
3,1,0,0.262516,0.685,-1.45,4.42,0.882,-1.544,4.541,0.711,-1.608,4.373
4,1,0,0.29562,0.684,-1.449,4.42,0.883,-1.544,4.54,0.709,-1.607,4.37


In [102]:
y = df['BreathingType']
X = df.drop(columns=['BreathingType','TimeStamp_sec'])
X.tail()

Unnamed: 0,id,FirstMarkerXCoord,FirstMarkerYCoord,FirstMarkerZCoord,SecondMarkerXCoord,SecondMarkerYCoord,SecondMarkerZCoord,ThirdMarkerXCoord,ThirdMarkerYCoord,ThirdMarkerZCoord
230921,258,1.063,-1.649,3.843,1.093,-1.839,3.624,1.03,-1.867,3.859
230922,258,1.062,-1.648,3.842,1.094,-1.841,3.625,1.03,-1.867,3.857
230923,258,1.062,-1.648,3.842,1.093,-1.841,3.624,1.03,-1.868,3.855
230924,258,1.063,-1.647,3.842,1.094,-1.841,3.624,1.029,-1.868,3.853
230925,258,1.063,-1.647,3.842,1.094,-1.841,3.623,1.03,-1.868,3.853


In [103]:
#переименование столбцов
dict_renames = {
    'FirstMarkerXCoord' : 'FMX',
    'FirstMarkerYCoord' : 'FMY',
    'FirstMarkerZCoord' : 'FMZ',
    'SecondMarkerXCoord' :  'SMX',
    'SecondMarkerYCoord' : 'SMY',
    'SecondMarkerZCoord' : 'SMZ',
    'ThirdMarkerXCoord' : 'TMX',
    'ThirdMarkerYCoord' : 'TMY',
    'ThirdMarkerZCoord' : 'TMZ'
}
X = X.rename(columns=dict_renames)
X.head()

Unnamed: 0,id,FMX,FMY,FMZ,SMX,SMY,SMZ,TMX,TMY,TMZ
0,1,0.689,-1.453,4.417,0.881,-1.544,4.547,0.718,-1.607,4.369
1,1,0.687,-1.452,4.418,0.881,-1.544,4.547,0.715,-1.607,4.371
2,1,0.687,-1.451,4.419,0.881,-1.544,4.545,0.714,-1.607,4.371
3,1,0.685,-1.45,4.42,0.882,-1.544,4.541,0.711,-1.608,4.373
4,1,0.684,-1.449,4.42,0.883,-1.544,4.54,0.709,-1.607,4.37


<h3>Предобработка датасета</h3>

In [104]:
#удаление лишних временных рядов
X = X[X['id'] != 27]
X = X[X['id'] != 24]
X = X.reset_index(drop=True)
X.head()

Unnamed: 0,id,FMX,FMY,FMZ,SMX,SMY,SMZ,TMX,TMY,TMZ
0,1,0.689,-1.453,4.417,0.881,-1.544,4.547,0.718,-1.607,4.369
1,1,0.687,-1.452,4.418,0.881,-1.544,4.547,0.715,-1.607,4.371
2,1,0.687,-1.451,4.419,0.881,-1.544,4.545,0.714,-1.607,4.371
3,1,0.685,-1.45,4.42,0.882,-1.544,4.541,0.711,-1.608,4.373
4,1,0.684,-1.449,4.42,0.883,-1.544,4.54,0.709,-1.607,4.37


In [105]:
#приведение к одинаковому количеству точек для временных рядов
numeric_cols = X.drop(columns=['id'])
X2 = X.iloc[0:0]
for i in range(1, 259):
    if i in [24, 27]:
        continue
    X_id = X[X['id'] == i].reset_index(drop=True)
    for col in numeric_cols:
        X_id = X_id[:900]
        X_id = X_id.reset_index(drop=True)
    X2 = pd.concat([X2, X_id], axis=0, ignore_index=True)

In [106]:
X = X2

<h3>Вычисление новых признаков</h3>

In [107]:
from pyts.approximation import PiecewiseAggregateApproximation

In [108]:
from pyts.approximation import DiscreteFourierTransform

In [109]:
#расчет новых преобразований
X2 = X.iloc[0:0]
for i in range(1, 259):
    if i in [24, 27]:
        continue
    X_id = X[X['id'] == i].reset_index(drop=True)
    for col in numeric_cols:
        X_id[col+'_roll10_mean'] = X_id[col].rolling(10).mean()
        X_id[col+'_pct10'] = X_id[col].pct_change(periods=10)


        arr = X_id[col].to_numpy()
        arr = arr.reshape(-1,1)

        transformer = DiscreteFourierTransform()
        X_id[col+'_discrete_furier'] = transformer.fit_transform(arr)
        transformer = PiecewiseAggregateApproximation()
        X_id[col+'_piecewise_agg'] = transformer.fit_transform(arr)

        X_id = X_id[10:]
        X_id = X_id.reset_index(drop=True)
    X2 =  pd.concat([X2, X_id], axis=0, ignore_index=True)

In [110]:
X = X2

In [111]:
# dimensions = ['X', 'Y', 'Z']
# markers = ['F', 'S', 'T']
# for dimension in dimensions:
#     markers_dimension = ['FM'+dimension, 'SM'+dimension, 'TM'+dimension]
#     X['Mean_'+dimension] = X[markers_dimension].mean(axis=1)
#     X['Median_'+dimension] = X[markers_dimension].median(axis=1)
#     X['Std_'+dimension] = X[markers_dimension].std(axis=1)
# for marker in markers:
#     markers_dimension = [marker+'MX', marker+'MY', marker+'MZ']
#     X['Mean_'+ marker] = X[markers_dimension].mean(axis=1)
#     X['Median_'+ marker] = X[markers_dimension].median(axis=1)
#     X['Std_'+marker] = X[markers_dimension].std(axis=1)

<h3>Подготовка данных для библиотеки pyts</h3>

In [112]:
#преобразование датасета для библиотеки sktime
numeric_cols = X.drop(columns='id').columns
X_3d = []
for id in range(1, 259):
    if id in [24, 27]:
        continue
    X_id = X[X['id'] == id]
    instance_features = []
    for col in numeric_cols:
        lst = X_id[col].to_list()
        instance_features.append(lst)
    X_3d.append(instance_features)
X_3d = np.array(X_3d)

In [113]:
#преобразование датасета для библиотеки sktime
y_3d = []
for id in range(1, 259):
    if id in [24, 27]:
        continue
    df_id = df[df['id'] == id]
    y_3d.append(df_id['BreathingType'].iloc[0])
y_3d = np.array(y_3d)

In [114]:
from sklearn.metrics import recall_score, accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
#расчет метрик по тесовой выборке
def calculate_metrics(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("Accuracy: ", accuracy)
    print("Recall: ", recall)
    print("Precision: ", precision)
    print("F1-score: ", f1)
    report = classification_report(y_test, y_pred)
    print(report)

In [115]:
#разделение на обучающую(80%) и тестовую (20%) выборки
X_train, X_test, y_train, y_test = train_test_split(X_3d, y_3d, test_size=0.3, random_state=42)

<h3>Тестирование моделей классификации временных рядов</h3>

<h3>BOSSVC</h3>

In [116]:
from pyts.multivariate.classification import MultivariateClassifier
from pyts.classification import BOSSVS

In [117]:
clf = MultivariateClassifier(BOSSVS(strategy='normal'))

In [118]:
details = clf.fit(X_train, y_train)

In [119]:
y_pred = clf.predict(X_test)

In [120]:
calculate_metrics(y_test, y_pred)

Accuracy:  0.3116883116883117
Recall:  0.3116883116883117
Precision:  0.3942857142857143
F1-score:  0.16971664698937425
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        30
           1       0.31      0.96      0.46        24
           2       1.00      0.04      0.08        23

    accuracy                           0.31        77
   macro avg       0.44      0.33      0.18        77
weighted avg       0.39      0.31      0.17        77



<h3>KNeighbors</h3>

In [121]:
from pyts.classification import KNeighborsClassifier

In [122]:
clf = MultivariateClassifier(KNeighborsClassifier())

In [123]:
details = clf.fit(X_train, y_train)

In [124]:
y_pred = clf.predict(X_test)

In [125]:
calculate_metrics(y_test, y_pred)

Accuracy:  0.23376623376623376
Recall:  0.23376623376623376
Precision:  0.22299511129602662
F1-score:  0.22505779577118748
              precision    recall  f1-score   support

           0       0.29      0.37      0.32        30
           1       0.17      0.17      0.17        24
           2       0.19      0.13      0.15        23

    accuracy                           0.23        77
   macro avg       0.22      0.22      0.22        77
weighted avg       0.22      0.23      0.23        77



<h3>SAXVSM</h3>

In [126]:
from pyts.classification import SAXVSM

In [127]:
clf = MultivariateClassifier(SAXVSM(window_size=64, word_size=12, n_bins=5, strategy='normal'))

In [128]:
details = clf.fit(X_train, y_train)

In [129]:
y_pred = clf.predict(X_test)

In [130]:
calculate_metrics(y_test, y_pred)

Accuracy:  0.3116883116883117
Recall:  0.3116883116883117
Precision:  0.37896389324960755
F1-score:  0.235156206159341
              precision    recall  f1-score   support

           0       0.43      0.10      0.16        30
           1       0.40      0.08      0.14        24
           2       0.29      0.83      0.43        23

    accuracy                           0.31        77
   macro avg       0.37      0.34      0.24        77
weighted avg       0.38      0.31      0.24        77



<h3>Time Series Forest</h3>

In [131]:
from pyts.classification import TimeSeriesForest

In [132]:
clf = MultivariateClassifier(TimeSeriesForest())

In [133]:
details = clf.fit(X_train, y_train)

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [134]:
y_pred = clf.predict(X_test)
calculate_metrics(y_test, y_pred)

Accuracy:  0.42857142857142855
Recall:  0.42857142857142855
Precision:  0.4272941451352246
F1-score:  0.4245647969052224
              precision    recall  f1-score   support

           0       0.44      0.37      0.40        30
           1       0.39      0.38      0.38        24
           2       0.45      0.57      0.50        23

    accuracy                           0.43        77
   macro avg       0.43      0.44      0.43        77
weighted avg       0.43      0.43      0.42        77



<h3>Time Series Bag of Features</h3>

In [135]:
from pyts.classification import TSBF

In [136]:
clf = MultivariateClassifier(TSBF())

In [137]:
details = clf.fit(X_train, y_train)

  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(
  warn(


In [138]:
y_pred = clf.predict(X_test)
calculate_metrics(y_test, y_pred)

Accuracy:  0.23376623376623376
Recall:  0.23376623376623376
Precision:  0.23304758482624097
F1-score:  0.23323934511305824
              precision    recall  f1-score   support

           0       0.25      0.27      0.26        30
           1       0.22      0.21      0.21        24
           2       0.23      0.22      0.22        23

    accuracy                           0.23        77
   macro avg       0.23      0.23      0.23        77
weighted avg       0.23      0.23      0.23        77

