# Mind Reading Dataset

In [56]:
from aeon.datasets import load_from_tsfile
import numpy as np # for some mathematical operations
def load_data(DATA_PATH,t):
    if t=='train':
        train_x, train_y = load_from_tsfile(DATA_PATH + "/MindReading/MindReading_TRAIN.ts")
        return [train_x,train_y]
    elif t=='test':
        test_x, test_y = load_from_tsfile(DATA_PATH + "/MindReading/MindReading_TEST.ts")
        return [test_x, test_y]

In [57]:
#Test the load_data function
[data, data_y] = load_data("datasets",'train')  # 'path', 'test'/'train'

In [58]:
print('First time series:')
print(data[0],"\n")
print('First target:')
print(data_y[0],"\n")
print('Shape of train dataset:') #(samples, features,timestamps) 
print(np.shape(data))

First time series:
[[ 4.4678e-13  3.9723e-12  1.1486e-11 ...  1.9081e-11  1.8362e-11
   1.8827e-11]
 [ 1.1228e-12  4.0343e-12 -7.8352e-13 ... -4.5558e-12 -5.0939e-12
  -4.4572e-12]
 [-1.1288e-11 -8.9216e-12 -4.1732e-12 ...  5.8279e-12  9.3328e-12
   5.6202e-12]
 ...
 [-1.8000e-11 -1.7135e-11 -9.7925e-12 ... -7.3061e-12 -8.2804e-12
  -1.1478e-11]
 [-7.6181e-12 -4.2700e-12 -6.4728e-12 ... -1.0940e-11 -5.6271e-12
  -2.8647e-12]
 [-1.5248e-11 -1.4270e-11 -7.2255e-12 ...  8.7034e-12  4.5966e-12
   1.5183e-12]] 

First target:
5 

Shape of train dataset:
(727, 204, 200)


In [59]:
from tslearn.preprocessing import TimeSeriesScalerMeanVariance

def standarize(data):
    scaler = TimeSeriesScalerMeanVariance(mu=0.0, std=1.0)  # Standardize to mean=0, std=1
    scaled_data_3d = scaler.fit_transform(data)
    return scaled_data_3d
    

In [60]:
# Transform from (samples, features,timestamps) to (samples, timestamps, features) to apply standarisation
reshaped_data = np.transpose(data, (0, 2, 1))
print("Original shape:", data.shape)          # (instances, features, timepoints)
print("Reshaped shape:", reshaped_data.shape) # (instances, timepoints, features)

Original shape: (727, 204, 200)
Reshaped shape: (727, 200, 204)


In [61]:
scaled_data=standarize(reshaped_data)
# scaled_data
print('First time series:')
print(scaled_data[0],"\n")

First time series:
[[-2.17755427 -0.05407343 -2.42725565 ... -2.34694313 -0.21684994
  -1.92298355]
 [-1.4915501   0.55616434 -2.04298104 ... -2.07882616  0.54557664
  -1.78658807]
 [-0.02951625 -0.45362984 -1.27189856 ...  0.19706845  0.04395684
  -0.80413621]
 ...
 [ 1.44833714 -1.24428338  0.35215858 ...  0.9677575  -0.97331027
   1.41736674]
 [ 1.30843239 -1.3570668   0.92131176 ...  0.66576171  0.23653894
   0.84461731]
 [ 1.39891321 -1.22361724  0.31843063 ... -0.32537218  0.8655904
   0.41530628]] 



In [62]:
np.shape(scaled_data)

(727, 200, 204)

## Test 1: intrinsic metrics before vs. after Feature selection

### Before FS:
* Representation Entropy correlation based
* Variance
* Redundancy Rate RED

TO DO:
* Information Gain Ratio

In [63]:
# Representation Entropy
import numpy as np
from scipy.linalg import eigh

def compute_representation_entropy(data):
    """
    Compute Representation Entropy (RE) of a multivariate dataset.
    
    Args:
        data (numpy.ndarray): 2D-Data with shape (samples, features).
    
    Returns:
        float: Representation Entropy (RE).
    """
    # Step 1: Compute the covariance matrix of the dataset (features x features)
    #covariance_matrix = np.cov(data, rowvar=False)  # rowvar=False means variables are columns

    block_size=100
    data_centered = data - np.mean(data, axis=0)
    num_features = data.shape[1]
    covariance_matrix = np.zeros((num_features, num_features), dtype=np.float64)

    for i in range(0, num_features, block_size):
        for j in range(i, num_features, block_size):
            block_i = data_centered[:, i:i+block_size]
            block_j = data_centered[:, j:j+block_size]
            block_cov = np.dot(block_i.T, block_j) / (data.shape[0] - 1)
            covariance_matrix[i:i+block_size, j:j+block_size] = block_cov
            if i != j:
                covariance_matrix[j:j+block_size, i:i+block_size] = block_cov.T


    # # Step 2: Compute eigenvalues of the covariance matrix
    # eigenvalues = np.linalg.eigvals(covariance_matrix) THIS METHOD WAS REPLACED BC OF INSTABILITY
    eigenvalues, eigenvectors = eigh(covariance_matrix)

    # Step 3: Normalize the eigenvalues to act as probabilities
    eigenvalues_sum = np.sum(eigenvalues)
    normalized_eigenvalues = eigenvalues / eigenvalues_sum

    # Step 4: Compute Representation Entropy using the formula
    representation_entropy = -np.sum(normalized_eigenvalues * np.log(normalized_eigenvalues))

    return representation_entropy

In [64]:
#Need to flatten the Time Dimension
data_flattened = scaled_data.reshape(-1, scaled_data.shape[2])
np.shape(data_flattened)

(145400, 204)

In [65]:
compute_representation_entropy(data_flattened)

3.584673534531154

In [66]:
import pandas as pd

# calculate variance
overall_variance = data_flattened.var().mean()
print('overall variance:', overall_variance)

# Compute the correlation matrix
corr_matrix = pd.DataFrame(data_flattened).corr().abs()
# Calculate average absolute correlation (excluding the diagonal)
avg_corr = (corr_matrix.values.sum() - len(corr_matrix)) / (len(corr_matrix) * (len(corr_matrix) - 1))
redundancy_rate = avg_corr
print("Redundancy Rate (Correlation-Based):", redundancy_rate)

overall variance: 0.9999999999999994
Redundancy Rate (Correlation-Based): 0.13251974452129833


In [67]:
# Load TEST dataset
[TESTdata, TESTdata_y] = load_data("datasets",'test')  # 'path', 'test'/'train'

In [68]:
#Preprocess the TEST data

reshaped_TESTdata = np.transpose(TESTdata, (0, 2, 1))
print("Original shape:", data.shape)          # (instances, features, timepoints)
print("Reshaped shape:", reshaped_data.shape) # (instances, timepoints, features)
scaled_TESTdata=standarize(reshaped_TESTdata)

Original shape: (727, 204, 200)
Reshaped shape: (727, 200, 204)


In [69]:
# Compute all intrinsic metrics again for original data_TEST

# Compute representation entropy
before_TESTdata_flattened = scaled_TESTdata.reshape(-1, scaled_TESTdata.shape[2])
before_representation_entropy = compute_representation_entropy(before_TESTdata_flattened)
print('before Representation entropy: ', before_representation_entropy)

# calculate variance
before_overall_variance = before_TESTdata_flattened.var().mean()
print('before overall variance:', before_overall_variance)

# Compute the correlation matrix
before_corr_matrix = pd.DataFrame(before_TESTdata_flattened).corr().abs()
# Calculate average absolute correlation (excluding the diagonal)
before_avg_corr = (before_corr_matrix.values.sum() - len(before_corr_matrix)) / (len(before_corr_matrix) * (len(before_corr_matrix) - 1))
before_redundancy_rate = before_avg_corr
print("before Redundancy Rate (Correlation-Based):", before_redundancy_rate)

before Representation entropy:  3.6266400216236834
before overall variance: 1.000000000000002
before Redundancy Rate (Correlation-Based): 0.12879010061670176


In [70]:
np.shape(before_TESTdata_flattened)

(130600, 204)

### Feature Selection 1: CLeVer Hybrid

In [71]:
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage, fcluster
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import os
os.environ["OMP_NUM_THREADS"] = "1"

def compute_dcpcs(data, variance_threshold=0.8):
    """
    Compute Descriptive Common Principal Components (DCPCs) for a set of multivariate time series.

    Parameters:
    - data: ndarray of shape (num_samples, num_features, time_steps), the time-series dataset.
    - variance_threshold: float, the cumulative variance explained to determine the number of PCs.

    Returns:
    - dcpc_loadings: ndarray of shape (num_dcpcs, num_features), loadings of the DCPCs.
    """
    num_samples, num_features, time_steps = data.shape

    # Step 1: Compute PCs for each MTS item
    pc_matrices = []  # Store PC loadings for each sample
    for sample in range(num_samples):
        # Compute correlation matrix for each sample
        correlation_matrix = np.corrcoef(data[sample])
        # Perform PCA on the correlation matrix
        pca = PCA()
        pca.fit(correlation_matrix)
        pc_matrices.append(pca.components_[:pca.n_components_])

    # Step 2: Compute DCPCs across all samples using SVD
    all_pc_matrices = np.concatenate(pc_matrices, axis=0)  # Combine PC loadings from all samples
    dcpc_covariance = all_pc_matrices.T @ all_pc_matrices
    eigvals, eigvecs = np.linalg.eigh(dcpc_covariance)
    sorted_indices = np.argsort(eigvals)[::-1]
    eigvecs = eigvecs[:, sorted_indices]  # Sort eigenvectors by eigenvalues

    # Select DCPCs explaining the desired variance threshold
    cumulative_variance = np.cumsum(eigvals[sorted_indices]) / np.sum(eigvals)
    num_dcpcs = np.searchsorted(cumulative_variance, variance_threshold) + 1
    dcpc_loadings = eigvecs[:, :num_dcpcs].T

    return dcpc_loadings

def cluster_features(dcpc_loadings, n_clusters):
    """
    Cluster features based on their DCPC loadings using K-means.

    Parameters:
    - dcpc_loadings: ndarray of shape (num_dcpcs, num_features), loadings of the DCPCs.
    - n_clusters: int, number of clusters.

    Returns:
    - cluster_labels: ndarray of shape (num_features,), cluster assignments for each feature.
    """
    kmeans = KMeans(n_clusters=n_clusters, n_init=10, random_state=42)
    cluster_labels = kmeans.fit_predict(dcpc_loadings.T)
    return cluster_labels

def rank_features(dcpc_loadings, cluster_labels):
    """
    Rank features within each cluster based on their contribution to DCPCs.

    Parameters:
    - dcpc_loadings: ndarray of shape (num_dcpcs, num_features), loadings of the DCPCs.
    - cluster_labels: ndarray of shape (num_features,), cluster assignments for each feature.

    Returns:
    - ranked_features: dict, keys are cluster labels, values are ranked feature indices.
    """
    ranked_features = {}
    for cluster in np.unique(cluster_labels):
        cluster_indices = np.where(cluster_labels == cluster)[0]
        cluster_loadings = dcpc_loadings[:, cluster_indices]
        scores = np.linalg.norm(cluster_loadings, axis=0)  # L2 norm of loadings
        ranking = cluster_indices[np.argsort(scores)[::-1]]  # Sort by descending contribution
        ranked_features[cluster] = ranking
    return ranked_features

def select_top_features(ranked_features, top_n=1):
    """
    Select top-ranked features from each cluster.

    Parameters:
    - ranked_features: dict, keys are cluster labels, values are ranked feature indices.
    - top_n: int, number of features to select from each cluster.

    Returns:
    - selected_features: list, indices of selected features.
    """
    selected_features = []
    for features in ranked_features.values():
        selected_features.extend(features[:top_n])
    return selected_features

def clever_hybrid(data, variance_threshold=0.8, n_clusters=None, top_n=1):
    """
    Perform feature selection using the CLeVer-Hybrid algorithm.

    Parameters:
    - data: ndarray of shape (num_samples, num_features, time_steps), the time-series dataset.
    - variance_threshold: float, variance threshold for selecting DCPCs.
    - n_clusters: int, number of clusters (if None, sqrt of num_features is used).
    - top_n: int, number of features to select from each cluster.

    Returns:
    - selected_features: list, indices of selected features.
    """
    num_samples, num_features, _ = data.shape
    if n_clusters is None:
        n_clusters = int(np.sqrt(num_features))

    # Step 1: Compute DCPCs
    dcpc_loadings = compute_dcpcs(data, variance_threshold)

    # Step 2: Cluster features based on DCPC loadings
    cluster_labels = cluster_features(dcpc_loadings, n_clusters)

    # Step 3: Rank features within clusters
    ranked_features = rank_features(dcpc_loadings, cluster_labels)

    # Step 4: Select top features from each cluster
    selected_features = select_top_features(ranked_features, top_n)

    return selected_features

In [72]:
scaled_data_ift= np.transpose(scaled_data, (0, 2, 1))
np.shape(scaled_data_ift)

(727, 204, 200)

In [73]:
selected_features_CLeVerH=clever_hybrid(scaled_data_ift, n_clusters=25,top_n=1)
print("Selected features CLeVer Hybrid: ", selected_features_CLeVerH)

Selected features CLeVer Hybrid:  [47, 45, 160, 190, 141, 109, 118, 132, 81, 41, 121, 51, 181, 143, 4, 169, 0, 125, 21, 20, 193, 1, 199, 5, 58]


In [74]:
np.shape(scaled_TESTdata)

(653, 200, 204)

In [75]:
# Filter the TEST Dataset according to the selected features from CLeVer
selected_TESTdata_CLeVerH = scaled_TESTdata[:, :, selected_features_CLeVerH]
print('Filtered TEST dataset shape: ', np.shape(selected_TESTdata_CLeVerH))

Filtered TEST dataset shape:  (653, 200, 25)


In [76]:
# Compute all intrinsic metrics again for selected_TESTdata_CLeVerH

# Compute representation entropy
CLEVERH_TESTdata_flattened = selected_TESTdata_CLeVerH.reshape(-1, selected_TESTdata_CLeVerH.shape[2])
CLEVERH_representation_entropy = compute_representation_entropy(CLEVERH_TESTdata_flattened)
print('CLEVER Hybrid Representation entropy: ', CLEVERH_representation_entropy)

# calculate variance
CLEVERH_overall_variance = CLEVERH_TESTdata_flattened.var().mean()
print('CLEVER Hybrid overall variance:', CLEVERH_overall_variance)

# Compute the correlation matrix
CLEVERH_corr_matrix = pd.DataFrame(CLEVERH_TESTdata_flattened).corr().abs()
# Calculate average absolute correlation (excluding the diagonal)
CLEVERH_avg_corr = (CLEVERH_corr_matrix.values.sum() - len(CLEVERH_corr_matrix)) / (len(CLEVERH_corr_matrix) * (len(CLEVERH_corr_matrix) - 1))
CLEVERH_redundancy_rate = CLEVERH_avg_corr
print("CLEVER Hybrid Redundancy Rate (Correlation-Based):", CLEVERH_redundancy_rate)

CLEVER Hybrid Representation entropy:  2.825385295447174
CLEVER Hybrid overall variance: 0.9999999999999997
CLEVER Hybrid Redundancy Rate (Correlation-Based): 0.13899326959469616


### Feature Selection 2: CLeVer Cluster

In [77]:
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import numpy as np
import matplotlib.pyplot as plt
os.environ["OMP_NUM_THREADS"] = "1"

def clever_cluster(data, n_clusters=None, variance_threshold=0.8):
    """
    CLeVer-Cluster implementation.

    Parameters:
    - data: np.array of shape (samples, features, time_steps)
    - n_clusters: Number of feature clusters (if None, heuristic is used)
    - variance_threshold: Minimum variance explained by selected PCs

    Returns:
    - selected_features: List of representative feature indices for each cluster
    """
    num_samples, num_features, time_steps = data.shape

    # Step 1: Compute DCPC loadings
    dcpc_loadings = compute_dcpcs(data, variance_threshold=variance_threshold)  # Shape: (components, features)

    # Step 2: Transpose DCPC loadings to cluster features
    feature_embeddings = dcpc_loadings.T  # Shape: (features, components)

    # Step 3: Determine number of clusters
    if n_clusters is None:
        n_clusters = int(np.sqrt(num_features))  # Heuristic for cluster count

    # Step 4: Perform K-means clustering on DCPC loadings
    kmeans = KMeans(n_clusters=n_clusters, n_init=10, random_state=42)
    cluster_labels = kmeans.fit_predict(feature_embeddings)

    # Step 5: Select representative features (closest to cluster centroids)
    selected_features = []
    for cluster in range(n_clusters):
        cluster_indices = np.where(cluster_labels == cluster)[0]
        centroid = kmeans.cluster_centers_[cluster]

        # Find the feature closest to the centroid
        distances = np.linalg.norm(feature_embeddings[cluster_indices] - centroid, axis=1)
        representative_feature = cluster_indices[np.argmin(distances)]
        selected_features.append(representative_feature)

    return selected_features

In [78]:
selected_features_CLeVerC = clever_cluster(scaled_data_ift,n_clusters=25)
print(f'CLeVer Cluster Selected feature indices: {selected_features_CLeVerC}')

CLeVer Cluster Selected feature indices: [47, 133, 160, 190, 141, 109, 118, 132, 81, 41, 121, 51, 181, 143, 4, 169, 0, 125, 21, 20, 193, 165, 199, 5, 58]


In [79]:
np.shape(scaled_TESTdata)

(653, 200, 204)

In [80]:
# Filter the TEST Dataset according to the selected features from CLeVer
selected_TESTdata_CLeVerC = scaled_TESTdata[:, :, selected_features_CLeVerC]
print('Filtered TEST dataset shape: ', np.shape(selected_TESTdata_CLeVerC))

Filtered TEST dataset shape:  (653, 200, 25)


In [81]:
# Compute all intrinsic metrics again for selected_TESTdata_CLeVerC

# Compute representation entropy
CLEVERC_TESTdata_flattened = selected_TESTdata_CLeVerC.reshape(-1, selected_TESTdata_CLeVerC.shape[2])
CLEVERC_representation_entropy = compute_representation_entropy(CLEVERC_TESTdata_flattened)
print('CLEVER Cluster Representation entropy: ', CLEVERC_representation_entropy)

# calculate variance
CLEVERC_overall_variance = CLEVERC_TESTdata_flattened.var().mean()
print('CLEVER Cluster overall variance:', CLEVERC_overall_variance)

# Compute the correlation matrix
CLEVERC_corr_matrix = pd.DataFrame(CLEVERC_TESTdata_flattened).corr().abs()
# Calculate average absolute correlation (excluding the diagonal)
CLEVERC_avg_corr = (CLEVERC_corr_matrix.values.sum() - len(CLEVERC_corr_matrix)) / (len(CLEVERC_corr_matrix) * (len(CLEVERC_corr_matrix) - 1))
CLEVERC_redundancy_rate = CLEVERC_avg_corr
print("CLEVER Cluster Redundancy Rate (Correlation-Based):", CLEVERC_redundancy_rate)

CLEVER Cluster Representation entropy:  2.838692187395427
CLEVER Cluster overall variance: 1.0000000000000002
CLEVER Cluster Redundancy Rate (Correlation-Based): 0.13755710813746946


### Feature Selection 3: CLeVer Rank

In [82]:
import numpy as np

def clever_ranking(data, num_features_to_select=5, variance_threshold=0.8):
    """
    CLeVer Ranking method for feature selection.

    Parameters:
    - data: np.array of shape (samples, features, time_steps)
    - num_features_to_select: Number of top-ranked features to select
    - variance_threshold: Variance threshold for PCA

    Returns:
    - selected_features: List of indices of the top-ranked features
    """
    # Step 1: Compute DCPC loadings
    dcpc_loadings = compute_dcpcs(data,variance_threshold=variance_threshold)

    # Step 2: Rank features based on their contribution to the DCPCs
    feature_scores = np.linalg.norm(dcpc_loadings, axis=1)  # L2 norm of DCPC loadings
    ranked_features = np.argsort(feature_scores)[::-1]  # Sort in descending order

    # Step 3: Select top features
    selected_features = ranked_features[:num_features_to_select]

    return selected_features

In [83]:
selected_features_CLeVerR = clever_ranking(scaled_data_ift, num_features_to_select=25)
print(f'CLeVer Rank Selected feature indices: {selected_features_CLeVerR}')

CLeVer Rank Selected feature indices: [ 22   8   3  44  76  19  30  31  14 107  59  39  97   4   6  79 126  26
  42 124 123  49 103 106  33]


In [84]:
# Filter the TEST Dataset according to the selected features from CLeVer
selected_TESTdata_CLeVerR = scaled_TESTdata[:, :, selected_features_CLeVerR]
print('Filtered TEST dataset shape: ', np.shape(selected_TESTdata_CLeVerR))

Filtered TEST dataset shape:  (653, 200, 25)


In [85]:
# Compute all intrinsic metrics again for selected_TESTdata_CLeVerR

# Compute representation entropy
CLEVERR_TESTdata_flattened = selected_TESTdata_CLeVerR.reshape(-1, selected_TESTdata_CLeVerR.shape[2])
CLEVERR_representation_entropy = compute_representation_entropy(CLEVERR_TESTdata_flattened)
print('CLEVER Cluster Representation entropy: ', CLEVERR_representation_entropy)

# calculate variance
CLEVERR_overall_variance = CLEVERR_TESTdata_flattened.var().mean()
print('CLEVER Cluster overall variance:', CLEVERR_overall_variance)

# Compute the correlation matrix
CLEVERR_corr_matrix = pd.DataFrame(CLEVERR_TESTdata_flattened).corr().abs()
# Calculate average absolute correlation (excluding the diagonal)
CLEVERR_avg_corr = (CLEVERR_corr_matrix.values.sum() - len(CLEVERR_corr_matrix)) / (len(CLEVERR_corr_matrix) * (len(CLEVERR_corr_matrix) - 1))
CLEVERR_redundancy_rate = CLEVERR_avg_corr
print("CLEVER Cluster Redundancy Rate (Correlation-Based):", CLEVERR_redundancy_rate)

CLEVER Cluster Representation entropy:  2.790011001438379
CLEVER Cluster overall variance: 1.0000000000000004
CLEVER Cluster Redundancy Rate (Correlation-Based): 0.14052007920900758


## Test 2: Perform Timeseries-k-Means and evaluate clustering performance UNSUPERVISED

Clustering evaluation Metrics:
* Silhouette 
* Davies-Bouldin Index

### Before FS

DTW Time series clsutering erfolgreich 133 min
clustering of training dataset not required, commented out on last line

In [86]:
from tslearn.clustering import TimeSeriesKMeans
seed = 0
np.random.seed(seed)
print("DTW k-means")
sdtw_km = TimeSeriesKMeans(n_clusters=5,
                           metric="dtw",
                           verbose=True,
                           random_state=seed)
#y_pred = sdtw_km.fit_predict(scaled_data)

DTW k-means


Clustering should be done on the TEST data

In [87]:
np.shape(scaled_TESTdata)

(653, 200, 204)

thefit_predict(X, y=None)
Fit k-means clustering using X and then predict the closest cluster each time series in X belongs to.

Parameters:
Xarray-like of shape=(n_ts, sz, d)
n_ts: instance, sz:timestamps, d:features

In [88]:
before_y_pred = sdtw_km.fit_predict(scaled_TESTdata)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.4s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    3.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.4s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    3.4s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    6.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    9.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:   13.9s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.4s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    3.4s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    6.2s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    9.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:   13.7s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]

73226.986 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    3.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    5.8s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    9.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:   13.4s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:   18.3s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:   24.1s


39979.520 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.4s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    3.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    5.8s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    9.2s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:   13.5s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:   18.2s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:   23.9s


39857.772 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    3.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    5.7s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    9.2s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:   13.4s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:   18.1s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:   23.9s


39817.676 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    3.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    6.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:   10.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:   15.3s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:   21.4s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:   27.5s


39801.773 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    3.4s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    5.9s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    9.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:   13.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:   17.8s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:   23.1s


39800.592 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    3.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    5.9s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    9.5s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:   13.5s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:   18.2s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:   23.6s


39800.592 --> 


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    3.4s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    6.1s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    9.4s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:   13.4s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:   18.3s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:   23.7s


In [89]:
before_y_pred

array([1, 1, 4, 4, 3, 4, 1, 2, 3, 2, 2, 1, 4, 4, 4, 1, 4, 2, 1, 3, 3, 4,
       3, 2, 3, 3, 1, 1, 1, 2, 4, 2, 1, 1, 4, 1, 0, 0, 2, 4, 0, 1, 2, 1,
       0, 1, 0, 3, 1, 4, 4, 4, 1, 2, 4, 2, 0, 1, 4, 4, 3, 0, 3, 2, 1, 1,
       1, 1, 1, 4, 0, 2, 3, 4, 2, 4, 1, 1, 0, 1, 0, 1, 3, 2, 2, 2, 1, 1,
       4, 0, 3, 4, 1, 3, 3, 4, 3, 0, 0, 0, 4, 0, 2, 3, 0, 1, 3, 4, 0, 3,
       3, 3, 1, 1, 1, 4, 1, 2, 2, 1, 0, 1, 4, 1, 4, 3, 4, 3, 2, 4, 1, 1,
       4, 1, 3, 2, 1, 1, 4, 1, 4, 4, 1, 3, 4, 0, 2, 2, 1, 2, 0, 1, 3, 4,
       1, 1, 4, 4, 3, 1, 4, 3, 2, 1, 3, 1, 3, 0, 0, 2, 2, 4, 1, 0, 1, 4,
       3, 0, 2, 3, 1, 2, 1, 2, 0, 2, 1, 4, 1, 1, 3, 3, 4, 1, 3, 4, 1, 1,
       1, 0, 3, 2, 1, 4, 4, 4, 0, 0, 2, 0, 1, 1, 4, 1, 4, 4, 3, 3, 4, 3,
       4, 2, 3, 0, 4, 1, 1, 1, 4, 0, 4, 2, 2, 2, 3, 4, 2, 4, 1, 1, 3, 1,
       4, 0, 2, 1, 1, 3, 0, 3, 1, 4, 4, 1, 1, 4, 4, 4, 1, 2, 0, 4, 3, 4,
       2, 4, 4, 2, 4, 1, 2, 1, 3, 1, 2, 2, 3, 0, 2, 0, 3, 3, 1, 0, 2, 4,
       4, 1, 1, 4, 0, 4, 3, 2, 3, 3, 2, 1, 2, 4, 4,

In [90]:
from sklearn.metrics import silhouette_score

labels = before_y_pred  # Cluster labels from the model
# Flatten the time series for silhouette_score into (instances,timestamps*features)
scaled_TESTdata_flattened_instances = scaled_TESTdata.reshape(scaled_TESTdata.shape[0], -1)  

before_silhouette_avg = silhouette_score(scaled_TESTdata_flattened_instances, labels, metric='euclidean')
print(f"before Silhouette Score: {before_silhouette_avg:.5f}")


before Silhouette Score: 0.00491


In [91]:
from sklearn.metrics import davies_bouldin_score

before_db_index = davies_bouldin_score(scaled_TESTdata_flattened_instances, labels)
print(f"before Davies-Bouldin Index: {before_db_index:.5f}")

before Davies-Bouldin Index: 8.84594


### FS1: CLeVer Hybrid

In [92]:
np.shape(selected_TESTdata_CLeVerH)

(653, 200, 25)

In [93]:
#Clustering
CLEVER_y_pred = sdtw_km.fit_predict(selected_TESTdata_CLeVerH)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.8s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]

8380.213 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4766.426 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4747.937 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.0s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4740.144 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4735.243 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.2s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.7s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.2s


4733.682 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4732.951 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4732.677 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4732.677 --> 


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


In [94]:
# Compute clustering metrics
labels_CH = CLEVER_y_pred  # Cluster labels from the model
# Flatten the time series for silhouette_score into (instances,timestamps*features)
scaled_TESTdataCH_flattened_instances = selected_TESTdata_CLeVerH.reshape(selected_TESTdata_CLeVerH.shape[0], -1)  

CLEVERH_silhouette_avg = silhouette_score(scaled_TESTdataCH_flattened_instances, labels_CH, metric='euclidean')
print(f"CLEVER Hybrid Silhouette Score: {CLEVERH_silhouette_avg:.5f}")
CLEVERH_db_index = davies_bouldin_score(scaled_TESTdataCH_flattened_instances, labels_CH)
print(f"CLEVER Hybrid Davies-Bouldin Index: {CLEVERH_db_index:.5f}")

CLEVER Hybrid Silhouette Score: 0.00582
CLEVER Hybrid Davies-Bouldin Index: 8.36665


### FS2: CLeVer Cluster

In [95]:
#Clustering
CLEVERC_y_pred = sdtw_km.fit_predict(selected_TESTdata_CLeVerC)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.8s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]

8386.107 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.8s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.2s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.7s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.3s


4783.352 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4760.471 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.8s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.6s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4753.989 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.1s


4751.737 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4750.229 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4749.856 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4749.856 --> 


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.1s


In [96]:
# Compute clustering metrics
labels_CC = CLEVERC_y_pred  # Cluster labels from the model
# Flatten the time series for silhouette_score into (instances,timestamps*features)
scaled_TESTdataCC_flattened_instances = selected_TESTdata_CLeVerC.reshape(selected_TESTdata_CLeVerC.shape[0], -1)  

CLEVERC_silhouette_avg = silhouette_score(scaled_TESTdataCC_flattened_instances, labels_CC, metric='euclidean')
print(f"CLEVER Cluster Silhouette Score: {CLEVERC_silhouette_avg:.5f}")
CLEVERC_db_index = davies_bouldin_score(scaled_TESTdataCC_flattened_instances, labels_CC)
print(f"CLEVER Cluster Davies-Bouldin Index: {CLEVERC_db_index:.5f}")

CLEVER Cluster Silhouette Score: 0.00337
CLEVER Cluster Davies-Bouldin Index: 9.87220


### FS3: CLeVer Rank

In [97]:
#Clustering
CLEVERR_y_pred = sdtw_km.fit_predict(selected_TESTdata_CLeVerR)

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.8s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.9s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]

8257.020 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4740.994 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.8s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4722.911 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4714.943 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4709.386 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.9s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.3s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.8s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.3s


4705.970 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4703.345 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4701.401 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4700.597 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


4700.017 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.8s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.2s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.8s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.4s


4699.850 --> 

[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.1s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.3s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.5s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    1.0s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    2.0s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.6s


4699.850 --> 


[Parallel(n_jobs=1)]: Done  49 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 199 tasks      | elapsed:    0.0s
[Parallel(n_jobs=1)]: Done 449 tasks      | elapsed:    0.2s
[Parallel(n_jobs=1)]: Done 799 tasks      | elapsed:    0.4s
[Parallel(n_jobs=1)]: Done 1249 tasks      | elapsed:    0.7s
[Parallel(n_jobs=1)]: Done 1799 tasks      | elapsed:    1.1s
[Parallel(n_jobs=1)]: Done 2449 tasks      | elapsed:    1.5s
[Parallel(n_jobs=1)]: Done 3199 tasks      | elapsed:    2.0s


In [98]:
# Compute clustering metrics
labels_CR = CLEVERR_y_pred  # Cluster labels from the model
# Flatten the time series for silhouette_score into (instances,timestamps*features)
scaled_TESTdataCR_flattened_instances = selected_TESTdata_CLeVerR.reshape(selected_TESTdata_CLeVerR.shape[0], -1)  

CLEVERR_silhouette_avg = silhouette_score(scaled_TESTdataCR_flattened_instances, labels_CR, metric='euclidean')
print(f"CLEVER Rank Silhouette Score: {CLEVERR_silhouette_avg:.5f}")
CLEVERR_db_index = davies_bouldin_score(scaled_TESTdataCR_flattened_instances, labels_CR)
print(f"CLEVER Rank Davies-Bouldin Index: {CLEVERR_db_index:.5f}")

CLEVER Rank Silhouette Score: 0.00288
CLEVER Rank Davies-Bouldin Index: 9.42421


## Validierung mit clustering accuracy

### Before FS

In [99]:
# compare clustering vs labels

from sklearn.metrics import confusion_matrix
from scipy.optimize import linear_sum_assignment

def clustering_accuracy(true_labels, predicted_labels):
    # Create a confusion matrix
    cm = confusion_matrix(true_labels, predicted_labels)

    # Use the Hungarian algorithm to find the optimal assignment of clusters
    row_ind, col_ind = linear_sum_assignment(-cm)  # Maximize the matching (negative to maximize)
    
    # Calculate accuracy based on optimal matching
    accuracy = cm[row_ind, col_ind].sum() / len(true_labels)
    
    return accuracy

In [100]:
labels=TESTdata_y.astype(int)
labels

array([1, 5, 5, 2, 1, 1, 5, 2, 1, 3, 2, 1, 1, 1, 5, 1, 1, 2, 2, 4, 3, 2,
       5, 3, 1, 4, 4, 1, 5, 5, 1, 5, 1, 3, 3, 3, 2, 5, 2, 5, 1, 3, 4, 2,
       5, 2, 2, 5, 5, 1, 1, 4, 2, 4, 2, 5, 3, 2, 3, 1, 1, 1, 5, 1, 5, 2,
       2, 1, 1, 1, 2, 4, 5, 3, 3, 2, 1, 3, 1, 2, 3, 4, 3, 5, 5, 2, 3, 4,
       4, 5, 1, 1, 4, 2, 1, 5, 5, 5, 1, 5, 4, 5, 5, 4, 3, 2, 4, 1, 1, 1,
       5, 2, 1, 2, 1, 4, 2, 1, 2, 1, 4, 3, 4, 4, 1, 5, 1, 1, 1, 4, 2, 4,
       1, 3, 2, 3, 1, 4, 3, 1, 2, 2, 4, 2, 2, 4, 2, 4, 5, 1, 1, 1, 2, 5,
       2, 4, 4, 2, 1, 5, 3, 3, 1, 5, 3, 1, 2, 1, 3, 3, 4, 5, 1, 2, 4, 1,
       5, 4, 5, 2, 1, 1, 2, 2, 2, 4, 2, 4, 4, 5, 1, 4, 3, 1, 2, 1, 4, 5,
       4, 3, 1, 3, 2, 1, 3, 5, 2, 3, 3, 1, 4, 3, 2, 4, 1, 4, 4, 3, 4, 5,
       3, 1, 1, 3, 4, 5, 2, 3, 4, 3, 5, 2, 2, 2, 2, 1, 1, 1, 4, 4, 3, 4,
       3, 4, 2, 2, 1, 2, 1, 1, 5, 3, 2, 2, 4, 4, 1, 5, 1, 1, 4, 4, 2, 3,
       2, 2, 4, 2, 3, 4, 4, 5, 1, 1, 1, 2, 4, 2, 4, 4, 5, 1, 1, 5, 4, 2,
       1, 2, 2, 2, 5, 2, 1, 2, 5, 2, 2, 5, 5, 5, 2,

In [101]:
before_y_pred

array([1, 1, 4, 4, 3, 4, 1, 2, 3, 2, 2, 1, 4, 4, 4, 1, 4, 2, 1, 3, 3, 4,
       3, 2, 3, 3, 1, 1, 1, 2, 4, 2, 1, 1, 4, 1, 0, 0, 2, 4, 0, 1, 2, 1,
       0, 1, 0, 3, 1, 4, 4, 4, 1, 2, 4, 2, 0, 1, 4, 4, 3, 0, 3, 2, 1, 1,
       1, 1, 1, 4, 0, 2, 3, 4, 2, 4, 1, 1, 0, 1, 0, 1, 3, 2, 2, 2, 1, 1,
       4, 0, 3, 4, 1, 3, 3, 4, 3, 0, 0, 0, 4, 0, 2, 3, 0, 1, 3, 4, 0, 3,
       3, 3, 1, 1, 1, 4, 1, 2, 2, 1, 0, 1, 4, 1, 4, 3, 4, 3, 2, 4, 1, 1,
       4, 1, 3, 2, 1, 1, 4, 1, 4, 4, 1, 3, 4, 0, 2, 2, 1, 2, 0, 1, 3, 4,
       1, 1, 4, 4, 3, 1, 4, 3, 2, 1, 3, 1, 3, 0, 0, 2, 2, 4, 1, 0, 1, 4,
       3, 0, 2, 3, 1, 2, 1, 2, 0, 2, 1, 4, 1, 1, 3, 3, 4, 1, 3, 4, 1, 1,
       1, 0, 3, 2, 1, 4, 4, 4, 0, 0, 2, 0, 1, 1, 4, 1, 4, 4, 3, 3, 4, 3,
       4, 2, 3, 0, 4, 1, 1, 1, 4, 0, 4, 2, 2, 2, 3, 4, 2, 4, 1, 1, 3, 1,
       4, 0, 2, 1, 1, 3, 0, 3, 1, 4, 4, 1, 1, 4, 4, 4, 1, 2, 0, 4, 3, 4,
       2, 4, 4, 2, 4, 1, 2, 1, 3, 1, 2, 2, 3, 0, 2, 0, 3, 3, 1, 0, 2, 4,
       4, 1, 1, 4, 0, 4, 3, 2, 3, 3, 2, 1, 2, 4, 4,

In [102]:
ClusteringACC_before = clustering_accuracy(labels,before_y_pred)
print(f"Clustering Accuracy before FS: {ClusteringACC_before:.2f}")

Clustering Accuracy before FS: 0.23


### FS 1: CLeVer Hybrid

In [103]:
# compare clustering vs labels
ClusteringACC_CLEVER = clustering_accuracy(labels,CLEVER_y_pred)
print(f"Clustering Accuracy CLEVER: {ClusteringACC_CLEVER:.2f}")

Clustering Accuracy CLEVER: 0.23


### FS 2: CLeVer Cluster

In [104]:
ClusteringACC_CLEVERC = clustering_accuracy(labels,CLEVERC_y_pred)
print(f"Clustering Accuracy CLEVER Cluster: {ClusteringACC_CLEVERC:.2f}")

Clustering Accuracy CLEVER Cluster: 0.23


### FS 3: CLeVer Rank 

In [105]:
ClusteringACC_CLEVERR = clustering_accuracy(labels,CLEVERR_y_pred)
print(f"Clustering Accuracy CLEVER Rank: {ClusteringACC_CLEVERR:.2f}")

Clustering Accuracy CLEVER Rank: 0.23
