In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import  LabelEncoder

<h3>Загрузка датасета</h3>

In [69]:
#загрузка датасета
df = pd.read_csv('coords_data_numerated.csv')
df = df.drop(columns=['Unnamed: 0', 'GSR'])
df.head()

Unnamed: 0,id,BreathingType,TimeStamp_sec,FirstMarkerXCoord,FirstMarkerYCoord,FirstMarkerZCoord,SecondMarkerXCoord,SecondMarkerYCoord,SecondMarkerZCoord,ThirdMarkerXCoord,ThirdMarkerYCoord,ThirdMarkerZCoord
0,1,0,0.058895,0.689,-1.453,4.417,0.881,-1.544,4.547,0.718,-1.607,4.369
1,1,0,0.111889,0.687,-1.452,4.418,0.881,-1.544,4.547,0.715,-1.607,4.371
2,1,0,0.158371,0.687,-1.451,4.419,0.881,-1.544,4.545,0.714,-1.607,4.371
3,1,0,0.262516,0.685,-1.45,4.42,0.882,-1.544,4.541,0.711,-1.608,4.373
4,1,0,0.29562,0.684,-1.449,4.42,0.883,-1.544,4.54,0.709,-1.607,4.37


In [70]:
y = df['BreathingType']
X = df.drop(columns=['BreathingType','TimeStamp_sec'])
X.tail()

Unnamed: 0,id,FirstMarkerXCoord,FirstMarkerYCoord,FirstMarkerZCoord,SecondMarkerXCoord,SecondMarkerYCoord,SecondMarkerZCoord,ThirdMarkerXCoord,ThirdMarkerYCoord,ThirdMarkerZCoord
230921,258,1.063,-1.649,3.843,1.093,-1.839,3.624,1.03,-1.867,3.859
230922,258,1.062,-1.648,3.842,1.094,-1.841,3.625,1.03,-1.867,3.857
230923,258,1.062,-1.648,3.842,1.093,-1.841,3.624,1.03,-1.868,3.855
230924,258,1.063,-1.647,3.842,1.094,-1.841,3.624,1.029,-1.868,3.853
230925,258,1.063,-1.647,3.842,1.094,-1.841,3.623,1.03,-1.868,3.853


In [71]:
#переименование столбцов
dict_renames = {
    'FirstMarkerXCoord' : 'FMX',
    'FirstMarkerYCoord' : 'FMY',
    'FirstMarkerZCoord' : 'FMZ',
    'SecondMarkerXCoord' :  'SMX',
    'SecondMarkerYCoord' : 'SMY',
    'SecondMarkerZCoord' : 'SMZ',
    'ThirdMarkerXCoord' : 'TMX',
    'ThirdMarkerYCoord' : 'TMY',
    'ThirdMarkerZCoord' : 'TMZ'
}
X = X.rename(columns=dict_renames)
X.head()

Unnamed: 0,id,FMX,FMY,FMZ,SMX,SMY,SMZ,TMX,TMY,TMZ
0,1,0.689,-1.453,4.417,0.881,-1.544,4.547,0.718,-1.607,4.369
1,1,0.687,-1.452,4.418,0.881,-1.544,4.547,0.715,-1.607,4.371
2,1,0.687,-1.451,4.419,0.881,-1.544,4.545,0.714,-1.607,4.371
3,1,0.685,-1.45,4.42,0.882,-1.544,4.541,0.711,-1.608,4.373
4,1,0.684,-1.449,4.42,0.883,-1.544,4.54,0.709,-1.607,4.37


<h3>Предобработка датасета</h3>

In [72]:
#удаление лишних временных рядов
X = X[X['id'] != 27]
X = X[X['id'] != 24]
X = X.reset_index(drop=True)
X.head()

Unnamed: 0,id,FMX,FMY,FMZ,SMX,SMY,SMZ,TMX,TMY,TMZ
0,1,0.689,-1.453,4.417,0.881,-1.544,4.547,0.718,-1.607,4.369
1,1,0.687,-1.452,4.418,0.881,-1.544,4.547,0.715,-1.607,4.371
2,1,0.687,-1.451,4.419,0.881,-1.544,4.545,0.714,-1.607,4.371
3,1,0.685,-1.45,4.42,0.882,-1.544,4.541,0.711,-1.608,4.373
4,1,0.684,-1.449,4.42,0.883,-1.544,4.54,0.709,-1.607,4.37


In [73]:
#приведение к одинаковому количеству точек для временных рядов
numeric_cols = X.drop(columns=['id'])
X2 = X.iloc[0:0]
for i in range(1, 259):
    if i in [24, 27]:
        continue
    X_id = X[X['id'] == i].reset_index(drop=True)
    for col in numeric_cols:
        X_id = X_id[:900]
        X_id = X_id.reset_index(drop=True)
    X2 = pd.concat([X2, X_id], axis=0, ignore_index=True)

In [74]:
X = X2

<h3>Вычисление новых признаков</h3>

In [75]:
#расчет новых преобразований
X2 = X.iloc[0:0]
for i in range(1, 259):
    if i in [24, 27]:
        continue
    X_id = X[X['id'] == i].reset_index(drop=True)
    for col in numeric_cols:
        X_id[col+'_roll10_mean'] = X_id[col].rolling(10).mean()
        X_id[col+'_roll10_median'] = X_id[col].rolling(10).median()
        X_id[col+'_change10'] = X_id[col].diff(periods=10)
        X_id[col+'_pct10'] = X_id[col].pct_change(periods=10)
        X_id[col+'_furier'] = np.abs(np.fft.fft(X_id[col]))

        X_id = X_id[10:]
        X_id = X_id.reset_index(drop=True)
    X2 =  pd.concat([X2, X_id], axis=0, ignore_index=True)

In [76]:
X = X2

In [62]:
# dimensions = ['X', 'Y', 'Z']
# markers = ['F', 'S', 'T']
# for dimension in dimensions:
#     markers_dimension = ['FM'+dimension, 'SM'+dimension, 'TM'+dimension]
#     X['Mean_'+dimension] = X[markers_dimension].mean(axis=1)
#     X['Median_'+dimension] = X[markers_dimension].median(axis=1)
#     X['Std_'+dimension] = X[markers_dimension].std(axis=1)
# for marker in markers:
#     markers_dimension = [marker+'MX', marker+'MY', marker+'MZ']
#     X['Mean_'+ marker] = X[markers_dimension].mean(axis=1)
#     X['Median_'+ marker] = X[markers_dimension].median(axis=1)
#     X['Std_'+marker] = X[markers_dimension].std(axis=1)

<h3>Подготовка данных для библиотеки Sktime</h3>

In [77]:
#преобразование датасета для библиотеки sktime
numeric_cols = X.drop(columns='id').columns
X_3d = []
for id in range(1, 259):
    if id in [24, 27]:
        continue
    X_id = X[X['id'] == id]
    instance_features = []
    for col in numeric_cols:
        lst = X_id[col].to_list()
        instance_features.append(lst)
    X_3d.append(instance_features)
X_3d = np.array(X_3d)

In [78]:
#преобразование датасета для библиотеки sktime
y_3d = []
for id in range(1, 259):
    if id in [24, 27]:
        continue
    df_id = df[df['id'] == id]
    y_3d.append(df_id['BreathingType'].iloc[0])
y_3d = np.array(y_3d)

In [79]:
from sklearn.metrics import recall_score, accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import classification_report
#расчет метрик по тесовой выборке
def calculate_metrics(y_test, y_pred):
    accuracy = accuracy_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred, average='weighted')
    precision = precision_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    print("Accuracy: ", accuracy)
    print("Recall: ", recall)
    print("Precision: ", precision)
    print("F1-score: ", f1)
    report = classification_report(y_test, y_pred)
    print(report)

In [80]:
#разделение на обучающую(80%) и тестовую (20%) выборки
X_train, X_test, y_train, y_test = train_test_split(X_3d, y_3d, test_size=0.3, random_state=42)

<h3>Тестирование моделей классификации временных рядов</h3>

<h4>Rocket Classifier</h4>

In [81]:
from sktime.classification.kernel_based import RocketClassifier

rocket = RocketClassifier(num_kernels=2000)

In [82]:
details = rocket.fit(X_train, y_train)

In [83]:
y_pred = rocket.predict(X_test)
calculate_metrics(y_test, y_pred)

Accuracy:  0.5584415584415584
Recall:  0.5584415584415584
Precision:  0.5431719795356159
F1-score:  0.5398292456154502
              precision    recall  f1-score   support

           0       0.60      0.80      0.69        30
           1       0.55      0.50      0.52        24
           2       0.47      0.30      0.37        23

    accuracy                           0.56        77
   macro avg       0.54      0.53      0.53        77
weighted avg       0.54      0.56      0.54        77



<h3>HIVE COTE (Hierarchical Vote Collective of Transformation-based Ensembles)</h3>

In [87]:
from sktime.classification.hybrid import HIVECOTEV2

hc2 = HIVECOTEV2(time_limit_in_minutes=0.2)

In [88]:
details = hc2.fit(X_train, y_train)

In [89]:
y_pred = hc2.predict(X_test)
calculate_metrics(y_test, y_pred)

Accuracy:  0.5064935064935064
Recall:  0.5064935064935064
Precision:  0.5038506194266563
F1-score:  0.5047936491258977
              precision    recall  f1-score   support

           0       0.58      0.60      0.59        30
           1       0.48      0.50      0.49        24
           2       0.43      0.39      0.41        23

    accuracy                           0.51        77
   macro avg       0.50      0.50      0.50        77
weighted avg       0.50      0.51      0.50        77



<h3>Catch22 (Canonical Time-series Characteristics)</h3>

In [90]:
from sktime.classification.feature_based import Catch22Classifier


model = Catch22Classifier()

In [None]:
details = model.fit(X_train, y_train)

In [86]:
y_pred = model.predict(X_test)
calculate_metrics(y_test, y_pred)

Accuracy:  0.5974025974025974
Recall:  0.5974025974025974
Precision:  0.6005138237443138
F1-score:  0.5942201275271831
              precision    recall  f1-score   support

           0       0.66      0.63      0.64        30
           1       0.58      0.46      0.51        24
           2       0.55      0.70      0.62        23

    accuracy                           0.60        77
   macro avg       0.60      0.60      0.59        77
weighted avg       0.60      0.60      0.59        77



<h3>DrCIF (Diverse Representation Canonical Interval Forest Classifier)</h3>

In [92]:
from sktime.classification.interval_based import DrCIF

model = DrCIF(time_limit_in_minutes=0.2)

In [93]:
details = model.fit(X_train, y_train)

In [94]:
y_pred = model.predict(X_test)
calculate_metrics(y_test, y_pred)

Accuracy:  0.38961038961038963
Recall:  0.38961038961038963
Precision:  0.3977792393941462
F1-score:  0.391522953475394
              precision    recall  f1-score   support

           0       0.54      0.47      0.50        30
           1       0.26      0.25      0.26        24
           2       0.36      0.43      0.39        23

    accuracy                           0.39        77
   macro avg       0.39      0.38      0.38        77
weighted avg       0.40      0.39      0.39        77



<h3>Shapelet Transform</h3>

In [95]:
from sktime.classification.shapelet_based import ShapeletTransformClassifier

model = ShapeletTransformClassifier(time_limit_in_minutes=0.2)

In [96]:
details = model.fit(X_train, y_train)

In [97]:
y_pred = model.predict(X_test)
calculate_metrics(y_test, y_pred)

Accuracy:  0.42857142857142855
Recall:  0.42857142857142855
Precision:  0.4315808329601434
F1-score:  0.42997300174747427
              precision    recall  f1-score   support

           0       0.55      0.53      0.54        30
           1       0.32      0.33      0.33        24
           2       0.39      0.39      0.39        23

    accuracy                           0.43        77
   macro avg       0.42      0.42      0.42        77
weighted avg       0.43      0.43      0.43        77

