In [1]:
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import f1_score, accuracy_score
from torch import zeros
from tqdm.auto import tqdm
from yaml import load as load_yaml, FullLoader

from dataset import KIDataset, k_fold_cross_validator
from models.rocket import ROCKET
from processor.processor import Leif
from train import train_eval_rocket_segment
from utils.const import SEED
from utils.misc import set_random_state
from utils.path import config_path

set_random_state(SEED)
!conda activate eyetrackpdc

# Data parameters
BINARY_CLF = True

# Rocket parameters
NUM_KERNELS = 1000
NORMALIZE = True

# Classifier parameters
REG_FACTOR = 1e5

# Number of folds in cross validation
K = 5

## Initialize Datasets and Dataloaders

In [2]:
with open(f'{config_path}/leif.yaml', 'r') as reader:
    config = load_yaml(reader, Loader=FullLoader)

# Configure processor
processor = Leif(config)

# Initialize Datasets
train_val_ds = KIDataset(data_processor=processor, train=True)
test_ds = KIDataset(data_processor=processor, train=False)

loaded dataset from C:\Users\hejpa\Documents\GitHub\eye-track-pdc\data\ki\tmp\ki-dataset-train
loaded dataset from C:\Users\hejpa\Documents\GitHub\eye-track-pdc\data\ki\tmp\ki-dataset-test


In [None]:
scores, accuracies, predictions = zeros(K), zeros(K), zeros(K)
for i, (train_ds, val_ds) in tqdm(enumerate(k_fold_cross_validator(train_val_ds, k=K)), unit='fold', total=K):
    set_random_state(SEED)

    pred, val_batch = train_eval_rocket_segment(train_ds, val_ds,
                                                rocket=ROCKET(c_in=train_ds.x.shape[1],
                                                              seq_len=train_ds.x.shape[2],
                                                              n_kernels=NUM_KERNELS,
                                                              normalize=NORMALIZE),
                                                clf=RidgeClassifier(alpha=REG_FACTOR, random_state=SEED))

    # Compute F1 Score
    scores[i] = f1_score(val_batch.y.numpy(), pred, average='weighted')
    accuracies[i] = accuracy_score(val_batch.y.numpy(), pred)
    predictions.append((pred, val_batch))

# Print average fold score
print(f'average fold f1 score: {scores.mean()}')
# Print score for each fold
print('\n'.join([f'fold {i}: {score}' for i, score in enumerate(scores)]))
# Print average fold accuracy
print(f'average fold accuracy: {accuracies.mean():.2%}')
# Print accuracy for each fold
print('\n'.join([f'fold {i}: {accuracy:.2%}' for i, accuracy in enumerate(accuracies)]))

  0%|          | 0/5 [00:00<?, ?fold/s]