notes on how to improve the code

- clean all non necessary files and push to github (git remote add origin https://github.com/mlgig/MTS_torch_classifiers.git; 
git branch -M main; 
git push -u origin main
)

- optional params for each class int setting hyperparams e.g. emb size for ConvTran, n_channels for hydra

- option non verbose for ConvTran

- return only transformation for hydra miniRocket and QUANT

- cite tsai in miniRocket

# load dataset

In [1]:
import numpy as np
data = np.load('CMJ_resampled.npy', allow_pickle=True).item()
X_train , y_train = data['train']['X'], data['train']['y']
X_test , y_test = data['test']['X'], data['test']['y']

print(X_train.shape, X_test.shape, np.unique(y_train).shape)

(419, 3, 384) (179, 3, 384) (3,)


### everytime convert labels into idx, labelEncoding keep track of the mapping

In [2]:
from sklearn.preprocessing import LabelEncoder 
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.transform(y_test)
le.classes_

array(['0', '1', '2'], dtype='<U1')

# quant aaltd2024 data

In [3]:
from models.aaltd2024.code.quant import QuantClassifier
from models.aaltd2024.code.utils import BatchDataset

convert data into pytorch dataLoader (torch tensors)
## NOTE careful about QUANT, dataLoader might be bugged

In [4]:
batch_size = 32
data_train = BatchDataset(X_train, y_train, batch_size=batch_size, shuffle=True)
data_test = BatchDataset(X_test, y_test, batch_size=batch_size, shuffle=False)

# strong suggestion! always do shuffle the train (dev) set, NEVER shuffle the test set

In [5]:
model = QuantClassifier()
model.fit(data_train)

In [6]:
error_quant = model.score(data_test); acc_quant = 1 - error_quant
predictions_quant = model.predict(data_test)
pred_probas_quant = model.predict_proba(data_test)

print ("accuracy was ",acc_quant,"\npredictions: ", predictions_quant[:5] ,"...\n probabilities:\n", pred_probas_quant[:5],"...")
print(predictions_quant.shape, pred_probas_quant.shape, y_test.shape)

accuracy was  0.9217877094972067 
predictions:  [1 0 1 0 0] ...
 probabilities:
 [[0.335 0.605 0.06 ]
 [0.9   0.065 0.035]
 [0.02  0.02  0.96 ]
 [0.99  0.    0.01 ]
 [0.93  0.05  0.02 ]] ...
(179,) (179, 3) (179,)


# hydra aaltd24 GPU

In [7]:
import numpy as np
from models.aaltd2024.code.utils import Dataset as hydraDataset
from models.aaltd2024.code.hydra_gpu import HydraMultivariateGPU
from models.aaltd2024.code.ridge import RidgeClassifier
from torch.cuda import is_available as is_gpu_available

same as before get Dataloader; in this case we also have the device 

In [8]:
device = "cuda" if is_gpu_available() else "cpu"
batch_size = 32

_ , n_channels, length = X_train.shape
n_classes = np.unique(y_train).shape[0]

data_train = hydraDataset(X_train, y_train, batch_size=batch_size, shuffle=True)
data_test = hydraDataset(X_test, y_test, batch_size=batch_size, shuffle=False)

 you can notice the pipeline here

In [9]:
transform = HydraMultivariateGPU(input_length=length, num_channels=n_channels).to(device)
ridge4hydra = RidgeClassifier(transform=transform, device=device)
ridge4hydra.fit(data_train, num_classes=n_classes)

In [10]:
error_hydra = ridge4hydra.score(data_test) ; score_hydra = 1-error_hydra.cpu().numpy()

preds_hydra = ridge4hydra.predict(data_test)
pred_probas_hydra = ridge4hydra.predict_proba(data_test)

print ("accuracy was ",score_hydra,"\npredictions: ", preds_hydra[:5] ,"...\n probabilities:\n", pred_probas_hydra[:5],"...")
print(preds_hydra.shape, pred_probas_hydra.shape, y_test.shape)


accuracy was  0.9273743033409119 
predictions:  [0 0 0 0 0] ...
 probabilities:
 [[0.5688696  0.24773626 0.18339412]
 [0.48659664 0.31500056 0.19840279]
 [0.59562397 0.23855086 0.16582514]
 [0.5287457  0.27923328 0.19202103]
 [0.45135924 0.33801287 0.2106279 ]] ...
(179,) (179, 3) (179,)


# miniRocket GPU
found miniRocket GPU implementation from tsai + ridge GPU implementation from aaltd24's angus

In [11]:
import numpy as np
from models.MyMiniRocket import MyMiniRocket
from models.aaltd2024.code.utils import Dataset as MiniRocketDataset

same as before

In [12]:
n_samples , n_channels , seq_len = X_train.shape
n_classes = np.unique(y_train).shape[0]

device = "cuda" if is_gpu_available() else "cpu"
batch_size = 32

data_train = MiniRocketDataset(X_train, y_train, batch_size=batch_size, shuffle=True)
data_test = MiniRocketDataset(X_test, y_test, batch_size=batch_size, shuffle=False)


In [13]:
miniRocket = MyMiniRocket(n_channels=n_channels,seq_len=seq_len,n_classes=n_classes,
						  normalise=False, device="cuda")
miniRocket.train(data_train)
acc_miniR = miniRocket.score(data_test)

In [14]:

preds_miniR = miniRocket.predict(data_test) ; 
pred_probas_miniR = miniRocket.predict_proba(data_test)

print ("accuracy was ",acc_miniR,"\npredictions: ", preds_miniR[:5] ,"...\n probabilities:\n", pred_probas_miniR[:5],"...")
print(preds_miniR.shape, pred_probas_miniR.shape, y_test.shape)


accuracy was  0.9441340789198875 
predictions:  [0 0 0 0 0] ...
 probabilities:
 [[0.36723945 0.3542896  0.278471  ]
 [0.41379905 0.32062802 0.26557294]
 [0.39968294 0.2963606  0.30395645]
 [0.4396199  0.24795483 0.31242526]
 [0.36138856 0.33560294 0.30300844]] ...
(179,) (179, 3) (179,)


#  ConvTran
previously provided with unfair usage of test set

here we also have a dev_dataset i.e. (train + validation)

it probably might be improved 

In [15]:
from models.utils import load_data_ConvTran
train_loader, val_loader, dev_dataset, test_loader = load_data_ConvTran(X_train,X_test,y_train,y_test)

# differences with before
1) optional 'save_path' argument i.e. where to save the final model
2) still extremely verbose... optional verbose param?

In [None]:
from models.convTran import *
convTran, hyperParams = build_train_ConvTran(train_loader,val_loader,dev_dataset,save_path="ConvTran_CMJ.pth")


In [17]:
convTran.eval()
accuracy_convTran = convTran.score(test_loader)
preds_convTran = convTran.predict(test_loader)
probas_convTran = convTran.predict_proba(test_loader)

In [18]:
print ("accuracy was ",accuracy_convTran,"\npredictions: ", preds_convTran[:5] ,"...\n probabilities:\n", probas_convTran[:5],"...")
print(preds_convTran.shape, probas_convTran.shape, y_test.shape)


accuracy was  0.8268156424581006 
predictions:  [0 0 0 0 0] ...
 probabilities:
 [[0.9900339  0.00711962 0.00284643]
 [0.9930293  0.00246044 0.00451024]
 [0.9930352  0.0054089  0.00155603]
 [0.9940414  0.00371839 0.00224025]
 [0.99298745 0.00490746 0.00210511]] ...
(179,) (179, 3) (179,)
