In [1]:
# -*- coding: utf-8 -*-
"""Rocket transformer."""

__author__ = "angus924"
__all__ = ["Rocket"]

import multiprocessing

import numpy as np
import pandas as pd
from numba import get_num_threads, njit, prange, set_num_threads

from sktime.transformations.base import BaseTransformer


class Rocket(BaseTransformer):
    """ROCKET.

    RandOm Convolutional KErnel Transform

    @article{dempster_etal_2019,
      author  = {Dempster, Angus and Petitjean, Francois and Webb,
      Geoffrey I},
      title   = {ROCKET: Exceptionally fast and accurate time series
      classification using random convolutional kernels},
      year    = {2019},
      journal = {arXiv:1910.13051}
    }

    Parameters
    ----------
    num_kernels  : int, number of random convolutional kernels (default 10,000)
    normalise    : boolean, whether or not to normalise the input time
    series per instance (default True)
    n_jobs             : int, optional (default=1) The number of jobs to run in
    parallel for `transform`. ``-1`` means using all processors.
    random_state : int (ignored unless int due to compatability with Numba),
    random seed (optional, default None)
    """

    _tags = {
        "univariate-only": False,
        "fit_is_empty": False,
        "scitype:transform-input": "Series",
        # what is the scitype of X: Series, or Panel
        "scitype:transform-output": "Primitives",
        # what is the scitype of y: None (not needed), Primitives, Series, Panel
        "scitype:instancewise": False,  # is this an instance-wise transform?
        "X_inner_mtype": "numpy3D",  # which mtypes do _fit/_predict support for X?
        "y_inner_mtype": "None",  # which mtypes do _fit/_predict support for X?
    }

    def __init__(self, num_kernels=10_000, normalise=True, n_jobs=1, random_state=None):
        self.num_kernels = num_kernels
        self.normalise = normalise
        self.n_jobs = n_jobs
        self.random_state = random_state if isinstance(random_state, int) else None
        super(Rocket, self).__init__()

    def _fit(self, X, y=None):
        """Generate random kernels adjusted to time series shape.

        Infers time series length and number of channels / dimensions (
        for multivariate time series) from input pandas DataFrame,
        and generates random kernels.

        Parameters
        ----------
        X : 3D np.ndarray of shape = [n_instances, n_dimensions, series_length]
            panel of time series to transform
        y : ignored argument for interface compatibility

        Returns
        -------
        self
        """
        _, self.n_columns, n_timepoints = X.shape
        self.kernels = _generate_kernels(
            n_timepoints, self.num_kernels, self.n_columns, self.random_state
        )
        return self

    def _transform(self, X, y=None):
        """Transform input time series using random convolutional kernels.

        Parameters
        ----------
        X : 3D np.ndarray of shape = [n_instances, n_dimensions, series_length]
            panel of time series to transform
        y : ignored argument for interface compatibility

        Returns
        -------
        pandas DataFrame, transformed features
        """
        if self.normalise:
            X = (X - X.mean(axis=-1, keepdims=True)) / (
                X.std(axis=-1, keepdims=True) + 1e-8
            )
        prev_threads = get_num_threads()
        if self.n_jobs < 1 or self.n_jobs > multiprocessing.cpu_count():
            n_jobs = multiprocessing.cpu_count()
        else:
            n_jobs = self.n_jobs
        set_num_threads(n_jobs)
        t = pd.DataFrame(_apply_kernels(X.astype(np.float32), self.kernels))
        set_num_threads(prev_threads)
        return t


@njit(
    "Tuple((float32[:],int32[:],float32[:],int32[:],int32[:],int32[:],"
    "int32[:]))(int32,int32,int32,optional(int32))",
    cache=True,
)
def _generate_kernels(n_timepoints, num_kernels, n_columns, seed):
    if seed is not None:
        np.random.seed(seed)

    candidate_lengths = np.array((7, 9, 11), dtype=np.int32)
    lengths = np.random.choice(candidate_lengths, num_kernels).astype(np.int32)

    num_channel_indices = np.zeros(num_kernels, dtype=np.int32)
    for i in range(num_kernels):
        limit = min(n_columns, lengths[i])
        num_channel_indices[i] = 2 ** np.random.uniform(0, np.log2(limit + 1))

    channel_indices = np.zeros(num_channel_indices.sum(), dtype=np.int32)

    weights = np.zeros(
        np.int32(
            np.dot(lengths.astype(np.float32), num_channel_indices.astype(np.float32))
        ),
        dtype=np.float32,
    )
    biases = np.zeros(num_kernels, dtype=np.float32)
    dilations = np.zeros(num_kernels, dtype=np.int32)
    paddings = np.zeros(num_kernels, dtype=np.int32)

    a1 = 0  # for weights
    a2 = 0  # for channel_indices

    for i in range(num_kernels):

        _length = lengths[i]
        _num_channel_indices = num_channel_indices[i]

        _weights = np.random.normal(0, 1, _num_channel_indices * _length).astype(
            np.float32
        )

        b1 = a1 + (_num_channel_indices * _length)
        b2 = a2 + _num_channel_indices

        a3 = 0  # for weights (per channel)
        for _ in range(_num_channel_indices):
            b3 = a3 + _length
            _weights[a3:b3] = _weights[a3:b3] - _weights[a3:b3].mean()
            a3 = b3

        weights[a1:b1] = _weights

        channel_indices[a2:b2] = np.random.choice(
            np.arange(0, n_columns), _num_channel_indices, replace=False
        )

        biases[i] = np.random.uniform(-1, 1)

        dilation = 2 ** np.random.uniform(
            0, np.log2((n_timepoints - 1) / (_length - 1))
        )
        dilation = np.int32(dilation)
        dilations[i] = dilation

        padding = ((_length - 1) * dilation) // 2 if np.random.randint(2) == 1 else 0
        paddings[i] = padding

        a1 = b1
        a2 = b2

    return (
        weights,
        lengths,
        biases,
        dilations,
        paddings,
        num_channel_indices,
        channel_indices,
    )


@njit(fastmath=True, cache=True)
def _apply_kernel_univariate(X, weights, length, bias, dilation, padding):
    n_timepoints = len(X)

    output_length = (n_timepoints + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (n_timepoints + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < n_timepoints:
                _sum = _sum + weights[j] * X[index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return np.float32(_ppv / output_length), np.float32(_max)


@njit(fastmath=True, cache=True)
def _apply_kernel_multivariate(
    X, weights, length, bias, dilation, padding, num_channel_indices, channel_indices
):
    n_columns, n_timepoints = X.shape

    output_length = (n_timepoints + (2 * padding)) - ((length - 1) * dilation)

    _ppv = 0
    _max = np.NINF

    end = (n_timepoints + padding) - ((length - 1) * dilation)

    for i in range(-padding, end):

        _sum = bias

        index = i

        for j in range(length):

            if index > -1 and index < n_timepoints:

                for k in range(num_channel_indices):
                    _sum = _sum + weights[k, j] * X[channel_indices[k], index]

            index = index + dilation

        if _sum > _max:
            _max = _sum

        if _sum > 0:
            _ppv += 1

    return np.float32(_ppv / output_length), np.float32(_max)


@njit(
    "float32[:,:](float32[:,:,:],Tuple((float32[::1],int32[:],float32[:],"
    "int32[:],int32[:],int32[:],int32[:])))",
    parallel=True,
    fastmath=True,
    cache=True,
)
def _apply_kernels(X, kernels):
    (
        weights,
        lengths,
        biases,
        dilations,
        paddings,
        num_channel_indices,
        channel_indices,
    ) = kernels

    n_instances, n_columns, _ = X.shape
    num_kernels = len(lengths)

    _X = np.zeros(
        (n_instances, num_kernels * 2), dtype=np.float32
    )  # 2 features per kernel

    for i in prange(n_instances):

        a1 = 0  # for weights
        a2 = 0  # for channel_indices
        a3 = 0  # for features

        for j in range(num_kernels):

            b1 = a1 + num_channel_indices[j] * lengths[j]
            b2 = a2 + num_channel_indices[j]
            b3 = a3 + 2

            if num_channel_indices[j] == 1:

                _X[i, a3:b3] = _apply_kernel_univariate(
                    X[i, channel_indices[a2]],
                    weights[a1:b1],
                    lengths[j],
                    biases[j],
                    dilations[j],
                    paddings[j],
                )

            else:

                _weights = weights[a1:b1].reshape((num_channel_indices[j], lengths[j]))

                _X[i, a3:b3] = _apply_kernel_multivariate(
                    X[i],
                    _weights,
                    lengths[j],
                    biases[j],
                    dilations[j],
                    paddings[j],
                    num_channel_indices[j],
                    channel_indices[a2:b2],
                )

            a1 = b1
            a2 = b2
            a3 = b3

    return _X.astype(np.float32)

In [2]:
### get current path
from pathlib import Path
path = Path.cwd()
path

WindowsPath('g:/My Drive/1-time-series classification in manufacturing/Code')

In [3]:
## Load the dataset
path_dir = 'g:/My Drive/1-time-series classification in manufacturing/Datasets'
DSname = 'Etching_dataset'
path = path_dir + "/" + DSname + "/"
X_train = np.load(path + 'X_train.npy')
X_test = np.load(path + 'X_test.npy')
y_train = np.load(path + 'y_train.npy').reshape((-1,))
y_test = np.load(path + 'y_test.npy').reshape((-1,))
ts = np.concatenate((X_train, X_test), axis=0)

print(f"\n The dataset shape is:{ts.shape}")
print(f"\n The number of data samples (N) is:{ts.shape[0]}")
print(f"\n The number of TS length (T) is:{ts.shape[1]}")
print(f"\n The number of TS dimention (M) is:{ts.shape[2]}")


 The dataset shape is:(129, 112, 19)

 The number of data samples (N) is:129

 The number of TS length (T) is:112

 The number of TS dimention (M) is:19


In [4]:
X_train = np.swapaxes(X_train, 1,2)
X_test = np.swapaxes(X_test, 1,2)

In [11]:
tranformer = Rocket(n_jobs=-1, num_kernels=2002)
X_test_t = tranformer.fit_transform(X_test)

In [12]:
X_test.shape

(26, 19, 112)

In [14]:
X_test_t

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,3994,3995,3996,3997,3998,3999,4000,4001,4002,4003
0,0.658537,7.738423,0.392857,7.90082,0.098039,2.199746,0.241071,4.738143,0.491071,6.531595,...,0.375,8.045144,0.486111,14.517509,0.401786,6.404209,0.8125,4.75951,0.298077,5.525523
1,0.670732,6.932831,0.339286,7.75491,0.098039,2.031164,0.303571,6.8461,0.535714,7.235452,...,0.410714,9.09806,0.375,14.165912,0.455357,5.934387,0.848214,5.642019,0.298077,5.054339
2,0.597561,8.78651,0.366071,6.649922,0.098039,1.931009,0.303571,5.85868,0.580357,7.380602,...,0.410714,10.986277,0.458333,12.965539,0.392857,6.99521,0.848214,5.816529,0.240385,5.78091
3,0.658537,9.408181,0.330357,5.504756,0.107843,2.020632,0.285714,9.295912,0.535714,7.24221,...,0.419643,9.855467,0.486111,18.611555,0.4375,6.524563,0.883929,5.013162,0.25,6.91359
4,0.658537,11.14635,0.383929,6.946706,0.107843,1.934609,0.285714,8.030827,0.482143,9.884404,...,0.401786,8.958074,0.388889,20.284845,0.446429,4.193433,0.839286,5.404426,0.240385,5.384228
5,0.695122,9.807899,0.357143,5.312036,0.098039,1.874545,0.3125,7.932706,0.535714,7.608558,...,0.375,8.033159,0.513889,12.157357,0.410714,5.657496,0.875,6.22421,0.230769,7.245308
6,0.670732,9.855147,0.3125,4.767713,0.117647,2.163522,0.276786,6.008308,0.580357,5.676947,...,0.383929,8.53651,0.458333,19.991161,0.4375,6.494547,0.875,4.558971,0.307692,5.439619
7,0.682927,10.65685,0.3125,9.404527,0.098039,2.183964,0.241071,7.680702,0.517857,9.589148,...,0.401786,9.395132,0.347222,21.234489,0.419643,5.438184,0.866071,5.388599,0.201923,4.043894
8,0.646341,9.233398,0.375,10.40685,0.098039,1.824014,0.258929,8.84764,0.589286,11.312515,...,0.383929,9.092612,0.402778,16.249327,0.419643,6.248868,0.8125,5.901835,0.355769,5.329819
9,0.682927,11.198006,0.357143,6.539944,0.098039,2.085662,0.285714,12.815554,0.517857,7.735493,...,0.446429,13.095354,0.375,19.086979,0.473214,5.433387,0.821429,4.840829,0.288462,5.936163
