# ROCKET: Exceptionally Fast and Accurate Time Series Classification using Random Convolutional Kernels

This is a prototype implementation of the *ROCKET*.

### Method:
- transform time series using a large number of random convolutional kernels (random length, weights, bias, dilation, padding) to create features
- use the transformed features to train a linear classifier

##### The features are:
- the maximum value (equivalent to global max pooling)
- the proportion of positive values (*ppv*)

We create the random kernels ourselves, but use a `sklearn`'s builtin linear classifier.

---

## Coding

Imports:

In [1]:
# Numpy imports
import numpy as np

# Linear classifier imports
from sklearn.linear_model import LogisticRegression, RidgeClassifierCV

As stated in the paper, it is best to use `RidgeClassifier` over `LogisticRegression` as long as the dataset is not too large ($#examples << #features$).

For small datasets, the regularization is super-important, and `RidgeClassifierCV` allows tuning this hyper-parameter quickly.

In [2]:
# PyTorch imports
import torch
from torch import nn, utils

---

### Model

In [3]:
class ROCKET(nn.Module):
    def __init__(self, input_length, num_kernels=10000):
        super().__init__()
        
        self.kernels = self.sample(input_length, num_kernels)
    
    def sample(self, input_length, num_kernels):
        """
        Args:
        - input_length: length of input time series
        - num_kernels: number of kernels
        
        Returns randomly created kernels
        """
        kernels = []
        kernel_sizes = np.random.choice([7, 9, 11], num_kernels)
        unique, counts = np.unique(kernel_sizes, return_counts=True)
        
        for kernel_size, count in dict(zip(unique, counts)).items():
            weights = torch.normal(0, 1, size=(count, kernel_size), dtype=torch.double)
            weights -= weights.mean()
            biases = 2 * torch.rand(count, dtype=torch.double) - 1
            
            paddings = torch.randint(0, 2, (count,))

            A = np.log2((input_length - 1) / (kernel_size - 1))
            x = A * torch.rand(count)
            dilations = torch.floor(2 ** x)

            for i in range(count):
                padding = int(paddings[i] * torch.floor(dilations[i] * (kernel_size - 1) / 2))
                dilation = int(dilations[i])
                
                kernel = nn.Conv1d(in_channels=1, out_channels=1,
                                   kernel_size=kernel_size, stride=1,
                                   padding=padding, dilation=dilation)
                
                kernel.weight = nn.Parameter(weights[i].unsqueeze(0).unsqueeze(0))
                kernel.bias = nn.Parameter(biases[i].unsqueeze(0))
                
                kernels.append(kernel)
        
        return kernels
    
    def transform(self, X):
        features = torch.zeros((X.shape[0], len(self.kernels), 2))
        
        with torch.no_grad():
            for idx, kernel in enumerate(self.kernels):
                trans = kernel(X)
                
                maxv = trans.max(dim=2).values

                pos = torch.zeros_like(trans)
                pos[trans > 0] = 1
                ppv = pos.count_nonzero(dim=2) / float(trans.shape[2])
                
                features[:, idx, 0] = maxv.squeeze(-1)
                features[:, idx, 1] = ppv.squeeze(-1)
        
        return features.view(X.shape[0], 2 * len(self.kernels))

---

## Application

In [4]:
import pandas as pd

In [5]:
import pathlib

In [6]:
DATA_DIR = pathlib.Path("data/")

### FordA

In [7]:
DATASET = "FordA"

In [8]:
train = pd.read_table(DATA_DIR.joinpath(DATASET, f"{DATASET}_TRAIN.tsv"), header=None)
test = pd.read_table(DATA_DIR.joinpath(DATASET, f"{DATASET}_TEST.tsv"), header=None)

In [9]:
train.notnull().sum(axis=1).sort_values()

0       501
2393    501
2394    501
2395    501
2396    501
       ... 
1204    501
1205    501
1206    501
1236    501
3600    501
Length: 3601, dtype: int64

In [10]:
test.notnull().sum(axis=1).sort_values()

0       501
883     501
882     501
881     501
880     501
       ... 
437     501
436     501
435     501
433     501
1319    501
Length: 1320, dtype: int64

In [11]:
train.shape

(3601, 501)

In [12]:
test.shape

(1320, 501)

In [13]:
train.shape

(3601, 501)

In [14]:
train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,491,492,493,494,495,496,497,498,499,500
0,-1,-0.797172,-0.664392,-0.373015,0.040815,0.526936,0.984288,1.35312,1.578108,1.659251,...,1.120756,0.722417,0.362068,0.092083,-0.081268,-0.212573,-0.391456,-0.664392,-1.073796,-1.564343
1,1,0.804855,0.634629,0.373474,0.038343,-0.340988,-0.74086,-1.109667,-1.395357,-1.570192,...,0.386403,0.049213,-0.258138,-0.510583,-0.683647,-0.773817,-0.785255,-0.714885,-0.560443,-0.319086
2,-1,0.727985,0.111284,-0.499124,-1.068629,-1.578351,-1.990534,-2.302031,-2.503403,-2.585211,...,0.394463,0.463685,0.507735,0.517174,0.504588,0.47627,0.438513,0.394463,0.3394,0.255391
3,-1,-0.234439,-0.502157,-0.732488,-0.946128,-1.139739,-1.323336,-1.490243,-1.607077,-1.62043,...,-0.952804,-0.929437,-0.922761,-0.929437,-0.909409,-0.83597,-0.695768,-0.47879,-0.188707,0.119736
4,-1,-0.171328,-0.062285,0.235829,0.710396,1.239969,1.649823,1.876321,1.865535,1.703751,...,0.776188,0.725496,0.697453,0.731967,0.808545,0.839823,0.733046,0.43752,-0.026585,-0.602213


In [15]:
train.iloc[:, 1:].values.shape

(3601, 500)

In [16]:
train.iloc[:, 0].values.shape

(3601,)

#### Data

In [17]:
X_train = torch.DoubleTensor(train.iloc[:,1:].values).unsqueeze(1)
y_train = train.iloc[:, 0].values

In [18]:
X_test = torch.DoubleTensor(test.iloc[:,1:].values).unsqueeze(1)
y_test = test.iloc[:, 0].values

In [19]:
X_train.shape, y_train.shape

(torch.Size([3601, 1, 500]), (3601,))

In [20]:
X_train.dtype, y_train.dtype

(torch.float64, dtype('int64'))

In [21]:
X_train.type(), X_test.type()

('torch.DoubleTensor', 'torch.DoubleTensor')

### Setup

In [22]:
INPUT_LENGTH = train.shape[1]
NUM_KERNELS = 1000

In [23]:
model = ROCKET(input_length=INPUT_LENGTH, num_kernels=NUM_KERNELS)

Since we're not interested in any convolution training:

In [24]:
model.eval()

ROCKET()

#### Device

In [25]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [26]:
X_train = X_train.to(device)
X_test = X_test.to(device)

In [27]:
X_train.type(), X_test.type()

('torch.cuda.DoubleTensor', 'torch.cuda.DoubleTensor')

In [28]:
model = model.to(device)

for idx, kernel in enumerate(model.kernels):
    model.kernels[idx] = kernel.to(device)

In [29]:
model.kernels[0].weight.type()

'torch.cuda.DoubleTensor'

#### Train

In [30]:
%%time
features_train = model.transform(X_train).numpy()

Wall time: 16.1 s


In [31]:
features_train.shape

(3601, 2000)

In [32]:
%%time
classifier = RidgeClassifierCV()
classifier.fit(features_train, y_train)

Wall time: 8.44 s


RidgeClassifierCV(alphas=array([ 0.1,  1. , 10. ]))

#### Test

In [33]:
%%time
features_test = model.transform(X_test).numpy()

Wall time: 6.06 s


In [34]:
%%time
preds = classifier.predict(features_test)
acc = sum(preds == y_test) / float(len(y_test))

print(f"Accuracy {100*acc:.5}%")

Accuracy 92.121%
Wall time: 21 ms
