In [49]:
import numpy as np
import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import Dataset, DataLoader

import pytorch_lightning as pl

In [45]:
from typing import Any, Dict, List, Optional, Union, Tuple


class BaseDataset(Dataset):
    def __init__(self, x, y) -> None:
        super().__init__()
        self.x = x
        self.y = y
        
        assert len(self.x) == len(self.y)
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
        return torch.from_numpy(self.x[index]).float(), torch.from_numpy(np.array([self.y[index]]))


def same_padding1d(sequence_length: int, kernel_size: int, stride: Optional[int] = 1, dilation: Optional[int] = 1):
    p = (sequence_length - 1) * stride + (kernel_size - 1) * dilation + 1 - sequence_length
    return p // 2, p - p // 2


class Pad1d(nn.ConstantPad1d):
    def __init__(self, padding: Any, value: Optional[float] = 0.):
        super().__init__(padding, value)


class SameConv(nn.Module):
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: Optional[Union[Tuple[int], int]] = 3,
        stride: Optional[int] = 1,
        dilation: Optional[int] = 1,
        bias: Optional[bool] = False
    ) -> None:
        super().__init__()
        self.kernel_size, self.stride, self.dilation = kernel_size, stride, dilation
        # Create the conv module that will be used for same padding
        self.conv1d_same = nn.Conv1d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            dilation=dilation,
            bias=bias
        )
        self.weight = self.conv1d_same.weight
        if bias == True:
            self.bias = self.conv1d_same.bias
        self.pad = Pad1d
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        self.padding = same_padding1d(x.shape[-1], self.kernel_size, dilation=self.dilation) # Stride: will not be used on padding calculation
        return self.conv1d_same(self.pad(self.padding)(x))


def Conv1d(
    in_channels: int,
    out_channels: int,
    kernel_size: Optional[Union[Tuple[int], int]] = None,
    stride: Optional[int] = 1,
    padding: Optional[Union[str, int]] = 'same',
    dilation: Optional[int] = 1,
    bias: Optional[bool] = False
) -> nn.Module:
    if padding == 'same':
        if kernel_size % 2 == 1:
            conv = nn.Conv1d(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
                stride=stride,
                padding=kernel_size // 2 * dilation,
                dilation=dilation,
                bias=bias
            )
        else:
            conv = SameConv(
                in_channels=in_channels,
                out_channels=out_channels,
                kernel_size=kernel_size,
                stride=stride,
                dilation=dilation,
                bias=bias
            )
    else:
        conv = nn.Conv1d(
            in_channels=in_channels,
            out_channels=out_channels,
            kernel_size=kernel_size,
            stride=stride,
            padding=padding,
            dilation=dilation,
            bias=bias
        )

    return conv


class DeepSVDDAutoEncoder(pl.LightningModule):
    def __init__(self, sequence_length: int, in_channels: int, representation_dim: int = 32) -> None:
        super().__init__()

        self.sequence_length = sequence_length
        self.in_channels = in_channels
        self.representation_dim = representation_dim

        # --- Encoder --- #
        self.encoder_conv1 = Conv1d(
            in_channels=in_channels,
            out_channels=8,
            kernel_size=5,
            bias=False,
        )
        self.encoder_bn1 = nn.BatchNorm1d(num_features=8, eps=1e-04, affine=False)
        self.encoder_conv2 = Conv1d(
            in_channels=8,
            out_channels=4,
            kernel_size=5,
            bias=False,
        )
        self.encoder_bn2 = nn.BatchNorm1d(num_features=4, eps=1e-04, affine=False)
        self.encoder_linear = nn.Linear(self.sequence_length * 4, self.representation_dim, bias=False)
        
        # --- Decoder --- #
        self.decoder_linear = nn.Linear(self.representation_dim, self.sequence_length * 4, bias=False)
        self.decoder_conv1 = Conv1d(
            in_channels=4,
            out_channels=4,
            kernel_size=5,
            bias=False,
        )
        self.decoder_bn1 = nn.BatchNorm1d(num_features=4, eps=1e-04, affine=False)
        self.decoder_conv2 = Conv1d(
            in_channels=4,
            out_channels=8,
            kernel_size=5,
            bias=False,
        )
        self.decoder_bn2 = nn.BatchNorm1d(num_features=8, eps=1e-04, affine=False)
        self.decoder_conv3 = Conv1d(
            in_channels=8,
            out_channels=1,
            kernel_size=5,
            bias=False,
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        z = F.leaky_relu(self.encoder_bn1(self.encoder_conv1(x)))
        z = F.leaky_relu(self.encoder_bn2(self.encoder_conv2(z)))
        z = z.view(z.size(0), -1)
        z = self.encoder_linear(z) # Final representation output for encoder

        x_hat = self.decoder_linear(z)
        x_hat = x_hat.view(z.size(0), 4, self.sequence_length)
        x_hat = F.leaky_relu(self.decoder_bn1(self.decoder_conv1(x_hat)))
        x_hat = F.leaky_relu(self.decoder_bn2(self.decoder_conv2(x_hat)))
        x_hat = self.decoder_conv3(x_hat) # Final reconstruction output for decoder

        return x_hat, z
    
    def configure_optimizers(self) -> Any:
        # Set optimizer for the autoencoder task
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4, weight_decay=1e-6, amsgrad=False)
        # Set learning rate scheduler
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[250], gamma=0.1)
        return [optimizer], [scheduler]
    
    def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> torch.Tensor:
        x, y = batch
        x_hat, z = self(x)
        
        loss = torch.sum((x_hat - x) ** 2, dim=tuple(range(1, x_hat.dim())))
        loss = torch.mean(loss)
        
        self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)

        return loss
    
    def test_step(self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> List[Dict[str, Any]]:
        x, y = batch
        x_hat, z = self(x)

        loss = torch.sum((x_hat - x) ** 2, dim=tuple(range(1, x_hat.dim())))
        loss = torch.mean(loss)
        
        self.log('test_loss', loss, prog_bar=True, on_step=False, on_epoch=True)

        return


class DeepSVDD(pl.LightningModule):
    def __init__(self, sequence_length: int, in_channels: int, representation_dim: int = 32) -> None:
        super().__init__()

        self.sequence_length = sequence_length
        self.in_channels = in_channels
        self.representation_dim = representation_dim
        
        self.R = torch.tensor(0.0, device=self.device)
        self.nu = 0.1
        self.center = None

        # --- Encoder --- #
        self.encoder_conv1 = Conv1d(
            in_channels=in_channels,
            out_channels=8,
            kernel_size=5,
            bias=False,
        )
        self.encoder_bn1 = nn.BatchNorm1d(num_features=8, eps=1e-04, affine=False)
        self.encoder_conv2 = Conv1d(
            in_channels=8,
            out_channels=4,
            kernel_size=5,
            bias=False,
        )
        self.encoder_bn2 = nn.BatchNorm1d(num_features=4, eps=1e-04, affine=False)
        self.encoder_linear = nn.Linear(self.sequence_length * 4, self.representation_dim, bias=False)
        
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        z = F.leaky_relu(self.encoder_bn1(self.encoder_conv1(x)))
        z = F.leaky_relu(self.encoder_bn2(self.encoder_conv2(z)))
        z = z.view(z.size(0), -1)
        z = self.encoder_linear(z)

        return z

    def init_center(self, loader: DataLoader, eps: Optional[float] = 0.01) -> torch.Tensor:
        n_samples = 0
        center = torch.zeros(self.representation_dim, device=self.device)

        self.eval()
        with torch.no_grad():
            for (x, y) in loader:
                x = x.to(self.device)
                z = self(x)

                n_samples += z.shape[0]
                center += torch.sum(z, dim=0)

        center /= n_samples

        center[(abs(center) < eps) & (center < 0)] = -eps
        center[(abs(center) < eps) & (center > 0)] = eps
        
        return center
    
    def get_radius(self, distance: torch.Tensor, nu: float):
        return np.quantile(np.sqrt(distance.clone().data.cpu().numpy()), 1 - nu)

    def configure_optimizers(self) -> Any:
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4, weight_decay=1e-6, amsgrad=False)
        scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[150], gamma=0.1)
        return [optimizer], [scheduler]
    
    def training_step(self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> torch.Tensor:
        x, y = batch
        z = self(x)
        
        distance = torch.sum((z - self.center) ** 2, dim=1)
        scores = distance - self.R ** 2
        loss = self.R ** 2 + (1 / self.nu) * torch.mean(torch.max(torch.zeros_like(scores), scores))

        self.log('train_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
        
        if self.current_epoch >= 10:
            self.R.data = torch.tensor(self.get_radius(distance, self.nu), device=self.device)
        
        return loss
    
    def test_step(self, batch: Tuple[torch.Tensor, torch.Tensor], batch_idx: int) -> List[Dict[str, Any]]:
        x, y = batch
        z = self(x)

        distance = torch.sum((z - self.center) ** 2, dim=1)
        scores = distance - self.R ** 2

        preds = torch.max(torch.zeros_like(scores), scores).tolist()
        preds = np.array([1 if pred > 0 else -1 for pred in preds])

        self.log('accuracy_score', accuracy_score(preds, y.cpu().numpy()))
        self.log('f1', f1_score(preds, y.cpu().numpy()))
        self.log('recall', recall_score(preds, y.cpu().numpy()))
        self.log('precision', precision_score(preds, y.cpu().numpy()))

        return


In [46]:
UCR_DATASETS = [
    'Adiac',
    'ArrowHead',
    'Beef',
    'BeetleFly',
    'BirdChicken',
    'Car',
    'CBF',
    'ChlorineConcentration',
    'CinCECGTorso',
    'Coffee',
    'Computers',
    'CricketX',
    'CricketY',
    'CricketZ',
    'DiatomSizeReduction',
    'DistalPhalanxOutlineAgeGroup',
    'DistalPhalanxOutlineCorrect',
    'DistalPhalanxTW',
    'Earthquakes',
    'ECG200',
    'ECG5000',
    'ECGFiveDays',
    'ElectricDevices',
    'FaceAll',
    'FaceFour',
    'FacesUCR',
    'FiftyWords',
    'Fish',
    'FordA',
    'FordB',
    'GunPoint',
    'Ham',
    'HandOutlines',
    'Haptics',
    'Herring',
    'InlineSkate',
    'InsectWingbeatSound',
    'ItalyPowerDemand',
    'LargeKitchenAppliances',
    'Lightning2',
    'Lightning7',
    'Mallat',
    'Meat',
    'MedicalImages',
    'MiddlePhalanxOutlineAgeGroup',
    'MiddlePhalanxOutlineCorrect',
    'MiddlePhalanxTW',
    'MoteStrain',
    'NonInvasiveFetalECGThorax1',
    'NonInvasiveFetalECGThorax2',
    'OliveOil',
    'OSULeaf',
    'PhalangesOutlinesCorrect',
    'Phoneme',
    'Plane',
    'ProximalPhalanxOutlineAgeGroup',
    'ProximalPhalanxOutlineCorrect',
    'ProximalPhalanxTW',
    'RefrigerationDevices',
    'ScreenType',
    'ShapeletSim',
    'ShapesAll',
    'SmallKitchenAppliances',
    'SonyAIBORobotSurface1',
    'SonyAIBORobotSurface2',
    'StarLightCurves',
    'Strawberry',
    'SwedishLeaf',
    'Symbols',
    'SyntheticControl',
    'ToeSegmentation1',
    'ToeSegmentation2',
    'Trace',
    'TwoLeadECG',
    'TwoPatterns',
    'UWaveGestureLibraryAll',
    'UWaveGestureLibraryX',
    'UWaveGestureLibraryY',
    'UWaveGestureLibraryZ',
    'Wafer',
    'Wine',
    'WordSynonyms',
    'Worms',
    'WormsTwoClass',
    'Yoga',
    'ACSF1',
    'BME',
    'Chinatown',
    'Crop',
    'EOGHorizontalSignal',
    'EOGVerticalSignal',
    'EthanolLevel',
    'FreezerRegularTrain',
    'FreezerSmallTrain',
    'Fungi',
    'GunPointAgeSpan',
    'GunPointMaleVersusFemale',
    'GunPointOldVersusYoung',
    'HouseTwenty',
    'InsectEPGRegularTrain',
    'InsectEPGSmallTrain',
    'MixedShapesRegularTrain',
    'MixedShapesSmallTrain',
    'PigAirwayPressure',
    'PigArtPressure',
    'PigCVP',
    'PowerCons',
    'Rock',
    'SemgHandGenderCh2',
    'SemgHandMovementCh2',
    'SemgHandSubjectCh2',
    'SmoothSubspace',
    'UMD'
]

In [47]:
results = {
    'dataset': [],
    'model': [],
    'label': [],
    'accuracy': [],
    'f1': [],
    'recall': [],
    'precision': [],
}

In [51]:
for dataset in UCR_DATASETS:
    print(f'Starting experiments with {dataset} dataset...')
    # Load the data from .tsv files
    train_data = np.genfromtxt(f'../data/ucr/{dataset}/{dataset}_TRAIN.tsv')
    x_train, y_train = train_data[:, 1:], train_data[:, 0]
    
    test_data = np.genfromtxt(f'../data/ucr/{dataset}/{dataset}_TEST.tsv')
    x_test, y_test = test_data[:, 1:], test_data[:, 0]
    
    unique_labels = np.unique(y_train)
    for label in unique_labels:
        print(f'\tClassifying the label {label}...')
        # Filter samples from positive label
        x_train_ = x_train[y_train == label]
        y_train_ = y_train[y_train == label]

        y_test_ = np.array([1 if y_true == label else -1 for y_true in y_test])
        
        # Apply z normalization
        std_ = x_train_.std(axis=1, keepdims=True)
        std_[std_ == 0] = 1.0
        x_train_ = (x_train_ - x_train_.mean(axis=1, keepdims=True)) / std_
        
        std_ = x_test.std(axis=1, keepdims=True)
        std_[std_ == 0] = 1.0
        x_test = (x_test - x_test.mean(axis=1, keepdims=True)) / std_
    
        
        x_train_ = np.expand_dims(x_train_, axis=1)
        x_test = np.expand_dims(x_test, axis=1)
        
        train_set = BaseDataset(x=x_train_, y=y_train_)
        test_set = BaseDataset(x=x_test, y=y_test_)
        
        train_loader = DataLoader(train_set, batch_size=16)
        test_loader = DataLoader(test_set, batch_size=16)

        # Train the autoencoder to learn the data representation over the new space
        autoencoder = DeepSVDDAutoEncoder(x_train_.shape[-1], in_channels=1)
        trainer = pl.Trainer(max_epochs=350, accelerator='gpu', devices=-1)
        trainer.fit(autoencoder, train_dataloaders=train_loader)
        
        deepsvdd = DeepSVDD(sequence_length=x_train_.shape[-1], in_channels=1)
        deepsvdd.to(torch.device('cuda'))

        center = deepsvdd.init_center(train_loader)
        deepsvdd.center = center

        trainer_deepsvdd = pl.Trainer(max_epochs=350, accelerator='gpu', devices=-1)
        trainer_deepsvdd.fit(deepsvdd, train_dataloaders=train_loader)
        
        metrics = trainer_deepsvdd.test(deepsvdd, dataloaders=test_loader)[0]
        
        results['dataset'].append(dataset)
        results['model'].append('deepsvdd')
        results['label'].append(label)
        results['accuracy'].append(metrics['accuracy_score'].item())
        results['f1'].append(metrics['f1'].item())
        results['recall'].append(metrics['recall'].item())
        results['precision'].append(metrics['precision'].item())




GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name           | Type        | Params
------------------------------------------------
0  | encoder_conv1  | Conv1d      | 40    
1  | encoder_bn1    | BatchNorm1d | 0     
2  | encoder_conv2  | Conv1d      | 160   
3  | encoder_bn2    | BatchNorm1d | 0     
4  | encoder_linear | Linear      | 22.5 K
5  | decoder_linear | Linear      | 22.5 K
6  | decoder_conv1  | Conv1d      | 80    
7  | decoder_bn1    | BatchNorm1d | 0     
8  | 

Starting experiments with Adiac dataset...
	Classifying the label 1.0...
Epoch 349: 100%|██████████| 1/1 [00:00<00:00, 201.35it/s, v_num=14, train_loss=7.590]

`Trainer.fit` stopped: `max_epochs=350` reached.


Epoch 349: 100%|██████████| 1/1 [00:00<00:00, 89.40it/s, v_num=14, train_loss=7.590] 


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name           | Type        | Params
-----------------------------------------------
0 | encoder_conv1  | Conv1d      | 40    
1 | encoder_bn1    | BatchNorm1d | 0     
2 | encoder_conv2  | Conv1d      | 160   
3 | encoder_bn2    | BatchNorm1d | 0     
4 | encoder_linear | Linear      | 22.5 K
-----------------------------------------------
22.7 K    Trainable params
0         Non-trainable params
22.7 K    Total params
0.091     To

Epoch 349: 100%|██████████| 1/1 [00:00<00:00, 203.86it/s, v_num=15, train_loss=0.0448]

`Trainer.fit` stopped: `max_epochs=350` reached.


Epoch 349: 100%|██████████| 1/1 [00:00<00:00, 112.17it/s, v_num=15, train_loss=0.0448]

You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]



Testing DataLoader 0: 100%|██████████| 25/25 [00:00<00:00, 173.22it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
     accuracy_score         0.1585677749360614
           f1              0.016505835070601133
        precision           0.12276214833759591
         recall            0.008853039543576628
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


KeyError: 'dataset'