In [1]:
import warnings

import numpy as np
from aeon.clustering import KASBA
from aeon.datasets import load_basic_motions, load_gunpoint, load_japanese_vowels

warnings.filterwarnings("ignore")

In [2]:
# Univariate example
uni_X_train, uni_y_train = load_gunpoint(split="train")
uni_X_test, uni_y_test = load_gunpoint(split="test")

n_clusters = len(np.unique(uni_y_train))

print(
    f"Train shape: {uni_X_train.shape}, Test shape: {uni_X_test.shape}, "
    f"n_clusters: {n_clusters}"
)

univariate_KASBA = KASBA(
    n_clusters=n_clusters,
    distance="msm",
    ba_subset_size=0.5,
    initial_step_size=0.05,
    max_iter=300,
    tol=1e-6,
    distance_params={"c": 1.0},
    decay_rate=0.1,
    verbose=False,
    random_state=0,
)

univariate_KASBA.fit(uni_X_train)

print(f"Train labels: {univariate_KASBA.labels_}")

test_labels = univariate_KASBA.predict(uni_X_test)

print(f"Test labels: {test_labels}")

Train shape: (50, 1, 150), Test shape: (150, 1, 150), n_clusters: 2
Train labels: [1 1 1 1 1 1 0 0 0 0 1 0 0 0 1 0 1 1 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1 1 0
 1 0 0 1 0 0 0 0 1 1 1 1 0]
Test labels: [0 1 0 0 1 1 0 0 1 1 1 0 1 0 0 1 1 1 1 1 0 0 1 1 0 0 0 0 1 1 1 0 0 1 1 1 1
 1 1 1 1 0 1 1 0 0 0 1 1 1 1 0 1 0 1 0 0 1 1 1 0 0 0 1 1 1 0 1 1 1 1 1 1 0
 0 1 1 0 0 1 0 0 0 1 1 1 0 1 1 1 1 0 1 0 1 1 1 0 0 1 0 0 0 1 1 1 1 0 1 1 1
 1 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 1 1 1 1
 0 0]


In [3]:
# Multivariate example
multi_X_train, multi_y_train = load_basic_motions(split="train")
multi_X_test, multi_y_test = load_basic_motions(split="test")

n_clusters = len(np.unique(multi_y_train))

print(
    f"Train shape: {multi_X_train.shape}, Test shape: {multi_X_test.shape}, "
    f"n_clusters: {n_clusters}"
)

multivariate_KASBA = KASBA(
    n_clusters=n_clusters,
    distance="msm",
    ba_subset_size=0.5,
    initial_step_size=0.05,
    max_iter=300,
    tol=1e-6,
    distance_params={"c": 1.0},
    decay_rate=0.1,
    verbose=False,
    random_state=0,
)

multivariate_KASBA.fit(multi_X_train)

print(f"Train labels: {multivariate_KASBA.labels_}")

test_labels = multivariate_KASBA.predict(multi_X_test)

print(f"Test labels: {test_labels}")

Train shape: (40, 6, 100), Test shape: (40, 6, 100), n_clusters: 4
Train labels: [0 0 0 0 0 0 0 0 0 0 3 3 3 3 3 3 3 3 3 3 1 2 2 2 1 2 2 2 1 2 2 1 1 1 2 2 1
 1 2 2]
Test labels: [0 0 0 0 0 0 0 0 0 0 3 3 3 3 3 3 3 3 3 3 2 2 1 2 2 2 2 2 2 2 1 1 2 2 2 2 1
 1 2 2]


In [8]:
# Unequal length
unequal_X_train, unequal_y_train = load_japanese_vowels(split="train")
unequal_X_test, unequal_y_test = load_japanese_vowels(split="test")

n_clusters = len(np.unique(unequal_y_train))

print(
    f"Train n cases: {len(unequal_X_train)}, Test n cases: {len(unequal_X_test)}, "
    f"n_clusters: {n_clusters}"
)
print(f"Train longest series length: {max(x.shape[1] for x in unequal_X_train)}")
print(f"Train shortest series length: {min(x.shape[1] for x in unequal_X_train)}")
print(f"Test longest series length: {max(x.shape[1] for x in unequal_X_test)}")
print(f"Test shortest series length: {min(x.shape[1] for x in unequal_X_test)}")


def _pad_panel_ndims_length(X, max_len=None, value=0.0):
    n_cases = len(X)
    n_dims = X[0].shape[0]

    if max_len is None:
        max_len = max(x.shape[1] for x in X)

    padded = np.full((n_cases, n_dims, max_len), value, dtype=float)

    for i, x in enumerate(X):
        length = x.shape[1]
        padded[i, :, :length] = x

    return padded


all_X = list(unequal_X_train) + list(unequal_X_test)
global_max_len = max(x.shape[1] for x in all_X)

unequal_X_train = _pad_panel_ndims_length(
    unequal_X_train, max_len=global_max_len, value=0.0
)
unequal_X_test = _pad_panel_ndims_length(
    unequal_X_test, max_len=global_max_len, value=0.0
)

print(f"Padded train shape: {unequal_X_train.shape}")
print(f"Padded test shape: {unequal_X_test.shape}")


print(
    f"Padded train shape: {unequal_X_train.shape}, Padded test "
    f"shape: {unequal_X_test.shape}, n_clusters: {n_clusters}"
)


unequal_KASBA = KASBA(
    n_clusters=n_clusters,
    distance="msm",
    ba_subset_size=0.5,
    initial_step_size=0.05,
    max_iter=300,
    tol=1e-6,
    distance_params={"c": 1.0},
    decay_rate=0.1,
    verbose=False,
    random_state=0,
)

unequal_KASBA.fit(unequal_X_train)

print(f"Train labels: {unequal_KASBA.labels_}")

test_labels = unequal_KASBA.predict(unequal_X_test)

print(f"Test labels: {test_labels}")

Train n cases: 270, Test n cases: 370, n_clusters: 9
Train longest series length: 26
Train shortest series length: 7
Test longest series length: 29
Test shortest series length: 7
Padded train shape: (270, 12, 29)
Padded test shape: (370, 12, 29)
Padded train shape: (270, 12, 29), Padded test shape: (370, 12, 29), n_clusters: 9
Train labels: [8 8 8 8 8 8 8 5 8 5 8 3 5 5 1 5 5 8 5 5 5 8 8 8 5 5 2 5 2 5 5 5 4 4 4 4 4
 4 4 6 4 2 4 4 4 4 4 4 4 2 4 4 2 2 4 4 2 4 4 4 6 3 3 3 3 4 3 3 3 3 3 3 3 3
 3 4 3 3 3 3 3 3 4 3 3 3 2 3 3 3 6 6 6 6 6 6 6 6 6 6 6 6 4 6 6 6 6 6 4 6 4
 4 4 6 6 6 6 4 6 6 2 1 1 1 1 1 1 1 1 1 1 1 1 1 5 1 5 1 1 1 1 1 1 1 1 1 1 1
 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3 3 3 7 3
 3 7 7 7 3 7 7 7 7 7 7 7 7 7 3 7 7 7 7 7 7 7 7 7 7 1 3 2 2 2 2 2 2 2 2 2 2
 5 2 2 2 2 2 2 2 2 2 2 3 2 2 2 3 2 5 5 2 5 5 5 5 5 5 5 2 2 5 5 2 1 2 2 3 2
 3 3 5 2 1 2 5 1 2 1 2]
Test labels: [5 5 8 5 8 5 5 8 5 8 8 3 5 5 5 2 5 5 5 5 0 5 8 5 5 5 5 1 5 5 5 5 4 4 4 4 2
 4 4 4 4 4 4 2 4 4 3