In [None]:
pip show sktime

In [1]:
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix
from sklearn.model_selection import cross_val_predict, KFold

import sklearn
import numpy as np
import os
import sys
from joblib import parallel_backend



In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# assume X is the multivariate time-series with shape (N, T, M)
# assume y is the class labels with shape (N,)

# split the multivariate time-series into separate univariate time-series, one for each variable
X_univariate = np.split(X, M, axis=2)

# list to store predicted class labels for each univariate time-series
y_pred_univariate = []

# perform KNN classification on each univariate time-series independently
for X_var in X_univariate:
    # train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_var.reshape(N, T), y, test_size=0.2, random_state=42)
    
    # KNN classification
    k = 5 # number of nearest neighbors to consider
    clf = KNeighborsClassifier(n_neighbors=k)
    clf.fit(X_train, y_train)
    
    # predict class labels
    y_pred_var = clf.predict(X_test)
    y_pred_univariate.append(y_pred_var)

# ensemble the results by taking the majority vote for each time step
y_pred_ensemble = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), axis=0, arr=y_pred_univariate)

# compute the accuracy of the final classification
accuracy = accuracy_score(y_test, y_pred_ensemble)

print("Accuracy of KNN classification on multivariate time-series:", accuracy)


In [None]:
from sktime.alignment import AlignerDTWfromDist
from sktime.dists_kernels.scipy_dist import ScipyDist
from sktime.dists_kernels.compose_from_align import DistFromAligner

# multivariate euclidean distance between vectors
eucl_dist = ScipyDist()
# dtw-d alignment = DTW alignment using multivariate euclidean distance
dtw_d_align = AlignerDTWfromDist(eucl_dist)
# dtw-d distance between time series = distance matrix of the ddtw aligner
dtw_d_dist = DistFromAligner(dtw_d_align)

from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier

ddtw_knn = KNeighborsTimeSeriesClassifier(distance=dtw_d_dist)

In [2]:
# define a list of datasets
datasets = ["PHM2022_Multivar", "PHM2022_Univar_PDIN"]
datasets_path = "../datasets"

for dataset in datasets:
    Dataset_name = dataset + "_Dataset"
    Dataset = np.load(datasets_path + "/" + Dataset_name + ".npy")
    print(Dataset.shape)
    

    Labels_name = dataset + "_Labels"
    Labels = np.load(datasets_path + "/"  + Labels_name + ".npy")


(53785, 749, 3)
(53785, 749, 1)


In [3]:
# change this directory for your machine
root_dir = './'


#add the classifier path to the sys
#sys.path.append("./classifiers/")

# define a list of datasets
#datasets = ["PHM2022_Multivar", "PHM2022_Univar_PDIN"]
datasets_path = "../datasets"

# define a list of algorithms
#algorithms = [LR_module]
algorirhms_path = "./classifiers"

from classifiers import LR_module
from classifiers import SVM_module
from classifiers import RF_module
from classifiers import NB_module
from classifiers import TSKNN_module
from classifiers import PF_module

# define the number of folds
n_folds = 5

# perform cross-validation for each dataset and algorithm combination
for dataset in datasets:
    Dataset_name = dataset + "_Dataset"
    Dataset = np.load(datasets_path + "/" + Dataset_name + ".npy")
    

    Labels_name = dataset + "_Labels"
    Labels = np.load(datasets_path + "/"  + Labels_name + ".npy")

    # Create a folder for results
    results_path = root_dir + "Results/" + Dataset_name
    if os.path.exists(results_path):
        pass
    else:
        try:
            os.makedirs(results_path)
        except:
            # in case another machine created the path meanwhile !:(
            pass



    with parallel_backend('threading', n_jobs= 100):
        #Run The Logistic Regression (LR) module
        #LR_module.LR(results_path, Dataset_name, Dataset, Labels, nb_folds= n_folds, C=10)

        #Run The Support Vector Machine (SVM) Module
        SVM_module.SVM(results_path, Dataset_name, Dataset, Labels, nb_folds= n_folds, C=10)

        #Run The Random Forest (RF) Module
        #RF_module.RF(results_path, Dataset_name, Dataset, Labels, nb_folds= n_folds, n_trees=500)

        #Run The Naive Bayes (NB) Module
        #NB_module.NB(results_path, Dataset_name, Dataset, Labels, nb_folds= n_folds)
        
        #Run The TSSKNN Module
        #TSKNN_module.KNN(results_path, Dataset_name, Dataset, Labels, dis= 'euclidean')
        
        #Run The PF Module
        #PF_module.PF(results_path, Dataset_name, Dataset, Labels, nb_folds=5, n_estimators= 50, n_stump_evaluations=1)



    




 The dataset shape is:(53785, 749, 3)

 The number of data samples (N) is:53785

 The number of TS length (T) is:749

 The number of TS dimention (M) is:3
SVM is not capable of doing classification on MTS. so it will be done on only the first dimension
0.9853118899321373
0.9852944206881837
[[3879    4    4    0    1    0   15    1    2    1    3    1]
 [  15  579    0    0    0   12    0    0    0    0    0   11]
 [   9    0  690    0    0    0    0    0    0    0    0    0]
 [   4    0    0  616    0    0    0    0    0    0    1    0]
 [   1    0    0    0  647    0    0    0    0    0    0    0]
 [   0   21    0    0    0  576    0    0    0    0    0    4]
 [  12    0    0    0    0    0  596    0    0    0    0    0]
 [   4    0    0    0    0    0    0  609    0    0    0    0]
 [   5    0    0    0    0    0    0    3  622    0    0    0]
 [   0    0    0    0    0    0    0    0    0  623    0    0]
 [   5    0    0    0    0    0    0    0    0    0  507    0]
 [   8    9    

0.9858696662638282
0.9858272288499511
[[3893    0    4    0    0    0   11    0    3    1    3    0]
 [  17  597    0    0    0   10    0    0    0    0    0   15]
 [   3    0  707    0    0    0    0    0    0    0    0    0]
 [   6    1    0  582    0    0    0    0    0    0    0    0]
 [   3    0    0    0  623    0    0    0    0    0    0    0]
 [   2   17    0    0    0  597    0    0    0    0    0    2]
 [   6    0    0    0    0    0  579    0    0    0    0    0]
 [   4    0    0    0    0    0    0  611    4    0    0    0]
 [   7    0    0    0    0    0    0    6  602    0    0    0]
 [   0    0    0    0    0    0    0    0    0  617    0    0]
 [   4    0    0    3    0    0    0    0    0    0  554    1]
 [  10    6    0    1    0    2    0    0    0    0    0  643]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.99      3915
           1       0.96      0.93      0.95       639
           2       0.99      1.00      1.0