In [None]:
import warnings
warnings.filterwarnings('ignore')

import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold

from sklearn.metrics import accuracy_score

from sktime.datasets import load_japanese_vowels

X_train, y_train = load_japanese_vowels(split='train', return_type=None)
X_test, y_test = load_japanese_vowels(split='test', return_type=None)

## Data preprocessing
### Transform to numpy 3D array

In [None]:
X_train.head()

In [None]:
# Shape should be (num_samples, num_features, time steps)

print(X_train.shape)
print(X_test.shape)

### Count class frequencies

### Plot features and labels

In [None]:
series_indices = np.arange(0, 30*4, 30)
categories = np.arange(1, 5, 1)
features = np.arange(0, 13, 1)

selected_series = X_train[series_indices]

fig, axes = plt.subplots(4, 1, figsize=(10, 18))

for i in range(4):  
    for j in range(selected_series.shape[1]):
        axes[i].plot(selected_series[i, j], label=features[j])

    axes[i].set_title(f"Category: {categories[i]}")
    axes[i].set_xlabel("Time Steps")
    axes[i].set_ylabel("Values")
    axes[i].legend()

plt.tight_layout()
plt.show()

## Classification
### Baseline

In [None]:
from sktime.classification.dummy import DummyClassifier

start = time.time()

# Initialize baseline model

# Fit

# Predict

end = time.time()

baseline_time = end - start
baseline_accuracy = round(accuracy_score(y_test, y_pred),2)

print(f"Elapsed time: {round(baseline_time,0)} seconds or {round(baseline_time/60,0)} minutes")
print(f"Accuracy: {baseline_accuracy*100}%")

### BOSS (univariate)

In [None]:
from sktime.classification.dictionary_based import ContractableBOSS

uni_X_train = X_train[:, 0:1, :]
uni_X_test = X_test[:, 0:1, :]

start = time.time()

# Initialize model

# Parameters grid

# Tune model

# Fit tuned model

# Predict

end = time.time()

boss_time = end - start
boss_accuracy = round(accuracy_score(y_test, y_pred),2)

print(f"Elapsed time: {round(boss_time,0)} seconds or {round(boss_time/60,1)} minutes")
print(tuned_boss.best_params_)
print(f"Accuracy: {boss_accuracy}%")

### WEASEL (univariate)

In [None]:
from sktime.classification.dictionary_based import WEASEL

start = time.time()

# Initialize model

# Parameters grid

# Tune model

# Fit

# Predict

end = time.time()

weasel_time = end - start
weasel_accuracy = round(accuracy_score(y_test, y_pred),2)

print(f"Elapsed time: {round(weasel_time,0)} seconds or {round(weasel_time/60,1)} minutes")
print(tuned_weasel.best_params_)
print(f"Accuracy: {weasel_accuracy*100}%")

### TDE (multivariate)

TDE is way too slow. I believe that MUSE is a much better alternative to TDE as it is much faster and it achieves great performance.

### MUSE (multivariate)

In [None]:
from sktime.classification.dictionary_based import MUSE

start = time.time()

# Initialize model

# Fit

# Predict

end = time.time()

muse_time = end - start
muse_accuracy = round(accuracy_score(y_test, y_pred),2)

print(f"Elapsed time: {round(muse_time,0)} seconds or {round(muse_time/60,1)} minutes")
print(f"Accuracy: {muse_accuracy*100}%")

In [None]:
models = ['Baseline', 'BOSS Ensemble', 'WEASEL', 'MUSE']
accuracies = [baseline_accuracy, boss_accuracy, weasel_accuracy, muse_accuracy]
times = [baseline_time/60, boss_time/60, weasel_time/60, muse_time/60]

bar_width = 0.35
indices = np.arange(len(models))

fig, ax = plt.subplots(figsize=(12, 8))
acc_bars = ax.bar(indices - bar_width / 2, accuracies, bar_width, color='skyblue', label='Accuracy')
time_bars = ax.bar(indices + bar_width / 2, times, bar_width, color='orange', label='Time (min)')

for bars in [acc_bars, time_bars]:
        for bar in bars:
            height = bar.get_height()
            ax.annotate(f'{height:.2f}', 
                            xy=(bar.get_x() + bar.get_width() / 2, height),
                            xytext=(0, 3),
                            textcoords="offset points",
                            ha='center', va='bottom')

ax.set_xticks(indices)
ax.set_xticklabels(models)
ax.legend(loc='best')

plt.tight_layout()