In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../src')

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [9]:
import torch
import torch.nn.functional as F

import pandas as pd
import numpy as np

import os
from tqdm.auto import tqdm

from rocket import ROCKET, Logreg
from ucr_utils import load_ucr_seq, train_val_split
from sklearn.linear_model import RidgeClassifierCV

### Settings

In [5]:
PATH_TO_UCR = '../data/Univariate_ts/'
PATH_TO_BAKEOFF_SEQUENCES = '../data/results_ucr_bakeoff.csv'
PATH_TO_ADDITIONAL_SEQUENCES = '../data/results_ucr_additional.csv'

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

torch.manual_seed(0)

<torch._C.Generator at 0x7f574d0bbcf0>

### Load data

In [6]:
bakeoff_sequences = pd.read_csv(PATH_TO_BAKEOFF_SEQUENCES)
additional_sequences = pd.read_csv(PATH_TO_ADDITIONAL_SEQUENCES)

all_sequences = pd.concat([bakeoff_sequences, additional_sequences])
all_sequences = all_sequences.set_index('dataset')
all_sequences['accuracy_mean_rep_ridge'] = None
all_sequences['accuracy_std_rep_ridge'] = None

In [7]:
# statistics = {}
# for seq in all_sequences.index:
#     x_train, y_train, x_test, y_test = load_ucr_seq(PATH_TO_UCR, seq, device)
#     statistics[seq] =  dict(
#         n_train = x_train.size(0),
#         n_test = x_test.size(0),
#         n_classes = len(y_train.unique())
#     )

# pd.DataFrame.from_dict(statistics, orient='index').to_csv('../results/statistics.csv')

### Apply ensemble of ROCKETs with ridge classifier (reproducing of paper results)

In [None]:
for seq in tqdm(all_sequences.index):
    
    x_train, y_train, x_test, y_test = load_ucr_seq(PATH_TO_UCR, seq, device)
        
    rocket_ensemble = [ROCKET(input_len=x_train.size(2), n_kernels=10000, kernels_sizes_set=torch.tensor([7, 9, 11]),
                        device=device) for _ in range(10)]
    
    ensemble_acc = []
    
    for rocket in rocket_ensemble:
        
        x_train_rocket = rocket.generate_random_features(x_train)
        const_columns_mask = (x_train_rocket.std(0) == 0)
        x_train_rocket = x_train_rocket[:,  ~const_columns_mask]
        x_train_rocket_mean, x_train_rocket_std = x_train_rocket.mean(0), x_train_rocket.std(0)
        x_train_rocket = (x_train_rocket - x_train_rocket_mean) / x_train_rocket_std
       
        x_test_rocket = rocket.generate_random_features(x_test) 
        x_test_rocket = x_test_rocket[:,  ~const_columns_mask]
        x_test_rocket = (x_test_rocket - x_train_rocket_mean) / x_train_rocket_std
        
        clf = RidgeClassifierCV(alphas=10 ** np.linspace(-3, 3, 10))
        clf.fit(x_train_rocket.cpu().numpy(), y_train.cpu().numpy())
        
        ensemble_acc.append(clf.score(x_test_rocket.cpu().numpy(), y_test.cpu().numpy()))
        
    all_sequences.loc[seq, 'accuracy_mean_rep_ridge'] = np.mean(ensemble_acc)
    all_sequences.loc[seq, 'accuracy_std_rep_ridge'] = np.std(ensemble_acc)
    
all_sequences['delta_mean'] = all_sequences['accuracy_mean'] - all_sequences['accuracy_mean_rep_ridge']

 25%|████████████████████                                                            | 32/128 [50:34<2:25:21, 90.85s/it]

In [None]:
all_sequences.to_csv('../results/reproduced_results_ridge.csv')