In [None]:
##########################
#### standard library ####
##########################
import os
import sys
import time
import warnings
import random
from typing import List, Tuple, Dict, Any, Union, Optional, Callable
import shutil
# warnings.filterwarnings("ignore")

###################
#### 3rd party ####
###################
import torch
import torchaudio
import torchvision
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import AdamW
from torch.utils.data.sampler import WeightedRandomSampler
from torch.utils.data import DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.cuda.amp import GradScaler, autocast #amp = automatic mixed precision
import lightning as L
import numpy as np
import pandas as pd
from tqdm import tqdm
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt

######################
#### my own files ####
######################
sys.path.append(os.path.dirname(os.getcwd()))
sys.path.append(os.path.dirname(os.path.dirname(os.getcwd())))
from utils.utils import print_name, print_shape
from rocket import Rocket, RocketFeatures
from ridge_loocv import fit_ridge_LOOCV

np.set_printoptions(precision=3, threshold=5) # Print options

# Config

In [None]:
class CFG:
    data_dir = "/home/nikita/Code/zephyrox/Data/Ford/"
    logs_dir = "/home/nikita/Code/zephyrox/Data/Ford/logs/"
    
    # Device and random seed
    device = 'cpu' # if torch.cuda.is_available() else 'cpu'
    seed = 42
    
    # Number of epochs, number of folds
    batch_size = 64
    epochs = 30
    n_folds = 5

    # Learning rate, optimizer, and cosine scheduler
    lr = 1e-4
    lr_min = 1e-6
    weight_decay = 1e-3
    gradient_clip_val = 10.0
    optimizer = torch.optim.AdamW # AdamW, Adam

    # model params
    n_kernels = 10000

## 🌱 Seed Everything

In [None]:
def set_seed(seed: int):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    
set_seed(CFG.seed)

## (binary) Time Series Classication Data

In [None]:
# Print the different datasets
from aeon.datasets.tsc_datasets import multivariate, univariate, univariate_equal_length
from aeon.datasets import load_classification

def get_aeon_dataset(
        dataset_name:str,
        ):
    """Loads a dataset from the UCR/UEA archive using 
    the aeon library.

    Args:
        dataset_name (str): Name of the dataset

    Returns:
        Tuple: 4-tuple of the form (X_train, y_train, X_test, y_test)
    """
    X_train, y_train = load_classification(dataset_name, split="train")
    X_test, y_test = load_classification(dataset_name, split="test")

    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = get_aeon_dataset("FaceDetection")

# Dataset

In [None]:
class MTSCDataset(torch.utils.data.Dataset):
    def __init__(self, X, y, transform=None):
        self.X = X.astype(np.float32)
        self.y = y.astype(np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

## ⚡ PyTorch Lightning

In [None]:
class RocketBinaryClassification(L.LightningModule):
    def __init__(self, D, T, n_kernels):
        super().__init__()
        self.model = Rocket(D, T, n_kernels, 1)
        self.loss = nn.BCEWithLogitsLoss()
        self.sigmoid = nn.Sigmoid()
    

    def training_step(self, batch, batch_idx):
        X, labels = batch
        logits = self.model(X).squeeze()
        loss = self.loss(logits, labels)

        # log things
        acc = (self.sigmoid(logits).round() == labels).float().mean()
        self.log("train_loss", loss)
        self.log("train_acc", acc)
        return loss


    def validation_step(self, batch, batch_idx):
        X, labels = batch
        logits = self.model(X).squeeze()
        loss = self.loss(logits, labels)

        # log things
        acc = (self.sigmoid(logits).round() == labels).float().mean()
        self.log("val_loss", loss)
        self.log("val_acc", acc)
        return loss


    def configure_optimizers(self):
        optimizer = CFG.optimizer(
                self.model.parameters(),  #TODO remove linear
                lr=CFG.lr,
                weight_decay=CFG.weight_decay
            )
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
                optimizer, 
                T_max=CFG.epochs, 
                eta_min=CFG.lr_min
            )
        return {"optimizer": optimizer, "lr_scheduler": scheduler}

In [None]:
def train_model(X_train, y_train, X_test, y_test):
    N, D, T = X_train.shape

    #create DataLoaders for train and val
    train_dataset = MTSCDataset(X_train, y_train)
    test_dataset = MTSCDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, CFG.batch_size, shuffle=True, num_workers=1)
    test_loader = DataLoader(test_dataset, CFG.batch_size, shuffle=False, num_workers=1)

    model = RocketBinaryClassification(D, T, CFG.n_kernels)
    model.model.init_biases(torch.from_numpy(train_dataset.X[0:1]))
    model = model.to(CFG.device)
    
    trainer = L.Trainer(
            accelerator="gpu" if CFG.device == "cuda" else "cpu",
            max_epochs=CFG.epochs,
            gradient_clip_val=CFG.gradient_clip_val,
            num_sanity_val_steps=0,
        )
    trainer.fit(model, train_loader, test_loader)

train_model(X_train, y_train, X_test, y_test)