In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

In [3]:
datasets = [
"CWRU_12k_FE_multivar"
]

datasets_path = "../datasets"

for dataset in datasets:
    Dataset_name = dataset + "_Dataset"
    Dataset = np.load(datasets_path + "/" + Dataset_name + ".npy")
    Dataset = Dataset
    print(Dataset.shape)
    
    Labels_name = dataset + "_Labels"
    Labels = np.load(datasets_path + "/"  + Labels_name + ".npy")
    Labels = Labels

(5400, 1000, 3)


In [4]:
print(f"\n The dataset shape is:{Dataset.shape}")
N=Dataset.shape[0]
print(f"\n The number of data samples (N) is:{N}")

T=Dataset.shape[1]
print(f"\n The number of TS length (T) is:{T}")

M=Dataset.shape[2]
print(f"\n The number of TS dimention (M) is:{M}")


 The dataset shape is:(5400, 1000, 3)

 The number of data samples (N) is:5400

 The number of TS length (T) is:1000

 The number of TS dimention (M) is:3


In [5]:
# split the multivariate time-series into separate univariate time-series, one for each variable
X_univariate = np.split(Dataset, M, axis=2)
len(X_univariate)

3

In [6]:
X_univariate[0]

array([[[ 0.08527844],
        [ 0.10152196],
        [-0.07634451],
        ...,
        [-0.04142096],
        [-0.05238533],
        [-0.01299481]],

       [[-0.12791766],
        [-0.09827325],
        [ 0.10477066],
        ...,
        [-0.0998976 ],
        [-0.15309511],
        [-0.21035349]],

       [[-0.01583743],
        [ 0.07553234],
        [-0.06741058],
        ...,
        [ 0.03614182],
        [ 0.22740918],
        [-0.01299481]],

       ...,

       [[ 0.04125852],
        [ 0.09307533],
        [-0.03151242],
        ...,
        [-0.11971469],
        [-0.10200926],
        [-0.09145098]],

       [[ 0.00617253],
        [ 0.08381653],
        [ 0.08576575],
        ...,
        [ 0.06123804],
        [ 0.23910451],
        [ 0.03784739]],

       [[-0.18014056],
        [-0.0328119 ],
        [ 0.06546136],
        ...,
        [-0.12637453],
        [ 0.00633497],
        [-0.02712667]]])

In [7]:
Dataset[:,:,0].shape

(5400, 1000)

In [8]:
# list to store predicted class labels for each univariate time-series
y_pred_univariate = []

# perform KNN classification on each univariate time-series independently
for X_var in X_univariate:
    # train-test split
    X_train, X_test, y_train, y_test = train_test_split(X_var.reshape(N, T), Labels.squeeze(), test_size=0.2, shuffle=True)
    print(X_train.shape)
    print(X_test.shape)
    print(y_train.shape)
    print(y_test.shape)
    
    # KNN classification
    from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
    classifier = KNeighborsTimeSeriesClassifier(distance= "euclidean", n_jobs=10)
    classifier.fit(X_train, y_train)
    
        
    # predict class labels
    y_pred_var = classifier.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred_var)
    print(f"accuracy for dimension:{accuracy}")
    y_pred_univariate.append(y_pred_var)
    print(len(y_pred_univariate))
    


(4320, 1000)
(1080, 1000)
(4320,)
(1080,)
accuracy for dimension:0.3638888888888889
1
(4320, 1000)
(1080, 1000)
(4320,)
(1080,)
accuracy for dimension:0.3990740740740741
2
(4320, 1000)
(1080, 1000)
(4320,)
(1080,)
accuracy for dimension:0.7916666666666666
3


In [9]:
np.unique(Labels)

array([0., 1., 2., 3., 4., 5., 6., 7., 8.])

In [10]:
y_pred_univariate

[array([6., 8., 6., ..., 6., 6., 8.]),
 array([0., 6., 6., ..., 6., 1., 6.]),
 array([1., 8., 7., ..., 7., 3., 1.])]

In [48]:
y_pred_univariate[2][0]

6.0

In [14]:
# ensemble the results by taking the majority vote for each time step
#y_pred_ensemble = np.apply_along_axis(lambda x: np.argmax(np.bincount(x)), axis=0, arr=y_pred_univariate)
y_pred_ensemble = np.apply_along_axis(lambda x: np.argmax(np.bincount(np.round(x).astype(int))), axis=0, arr=y_pred_univariate)


In [12]:
y_pred_ensemble

array([0, 8, 6, ..., 6, 1, 1])

In [13]:
# compute the accuracy of the final classification
accuracy = accuracy_score(y_test, y_pred_ensemble)

f1 = f1_score(y_test, y_pred_ensemble, average='weighted')
print(f1)

print("Accuracy of KNN classification on multivariate time-series:", accuracy)


0.30643355338395634
Accuracy of KNN classification on multivariate time-series: 0.3194444444444444
