### A part of solution is displayed here

Documentation of Arsenal which i used as my model:
https://www.sktime.net/en/stable/api_reference/auto_generated/sktime.classification.kernel_based.Arsenal.html

In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import KFold, cross_val_score, train_test_split
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.classification.kernel_based import Arsenal


In [2]:
# Load the data
data = np.load('challenge.npz')
X = data['arr_0']
y = data['arr_1']

test_data = np.load('test.npz')
Xtest = test_data['arr_0']

In [3]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [34]:
# Arsenal()
# Arsenal captures more patterns; applies larger # of random kernels; better for small datasets 

arsenal = Arsenal() 
arsenal.fit(X_train, y_train) 
y_pred_arsenal = arsenal.predict(X_test) 

accuracy_arsenal = accuracy_score(y_test, y_pred_arsenal)
print(f"Arsenal Accuracy: {accuracy_arsenal:.2f}")

default = Arsenal().get_params()
print(f"Arsenal Parameters: {default}")

Arsenal Accuracy: 0.68
Arsenal Parameters: {'contract_max_n_estimators': 100, 'max_dilations_per_kernel': 32, 'n_estimators': 25, 'n_features_per_kernel': 4, 'n_jobs': 1, 'num_kernels': 2000, 'random_state': None, 'rocket_transform': 'rocket', 'save_transformed_data': False, 'time_limit_in_minutes': 0.0}


In [40]:
arsenal_grid = Arsenal()
cross_val_score_arsenal = cross_val_score(arsenal_grid, X_train, y_train, cv= KFold(n_splits=5))
print(f"Cross-validation score: {cross_val_score_arsenal}")

Cross-validation score: [0.8125 0.625  0.6875 0.75   0.6875]


In [41]:
# Hyperparameter Tuning for Arsenal (RandomizedSearch because of computational time)

param_grid = {
    'n_estimators': [25, 50],  # Example values for n_estimators
    'n_features_per_kernel': [4, 8],
    'max_dilations_per_kernel': [32],
}
random_search_arsenal = RandomizedSearchCV(arsenal_grid, param_grid,  cv = KFold(n_splits = 5))
random_search_arsenal.fit(X_train, y_train)

print(f"Best parameters for Arsenal: {random_search_arsenal.best_params_}")



Best parameters for Arsenal: {'n_features_per_kernel': 8, 'n_estimators': 25, 'max_dilations_per_kernel': 32}


In [46]:
# Best parameters for Arsenal: {'n_features_per_kernel': 8, 'n_estimators': 25, 'max_dilations_per_kernel': 32}

best_arsenal = random_search_arsenal.best_estimator_
best_arsenal.fit(X_train, y_train)

# Predict the labels of the testing data
y_pred = best_arsenal.predict(X_test)

# Compute the accuracy score
accuracy_arsenal = accuracy_score(y_test, y_pred)
print(f"Arsenal Accuracy after Hyperparameter Tuning: {accuracy_arsenal:.2f}")

Arsenal Accuracy after Hyperparameter Tuning: 0.68


In [None]:
# Cross-validation scores for Arsenal after Hyperparameter Tuning (Better than hyperparameter tuned kNN)
cross_val_score_arsenal = cross_val_score(best_arsenal, X_train, y_train, cv= KFold(n_splits=5))
print(f"Cross-validation score: {cross_val_score_arsenal}")

Cross-validation score: [0.875  0.625  0.6875 0.75   0.75  ]


In [None]:
# Prediction file
best_arsenal.fit(X_train, y_train)
y_pred = best_arsenal.predict(Xtest)
np.savetxt("ypred.csv", y_pred, delimiter=",")