In [1]:
from typing import List, Dict, Set, Any, Optional, Tuple, Literal, Callable
import numpy as np
import torch
from torch import Tensor
import sigkernel
import os
import sys
import tslearn
import tslearn.metrics
import ksig

from kernels.abstract_base import TimeSeriesKernel, StaticKernel
from kernels.static_kernels import LinearKernel, RBFKernel, PolyKernel
from kernels.integral import StaticIntegralKernel
from kernels.sig_pde import SigPDEKernel
from kernels.sig_trunc import TruncSigKernel
from kernels.gak import GlobalAlignmentKernel, sigma_gak
from kernels.flattened_static import FlattenedStaticKernel
from kernels.reservoir import ReservoirKernel

from features.signature import sig

In [18]:
##########################################
#########    randomized sigs   ###########
##########################################


from typing import List, Dict, Set, Any, Optional, Tuple, Literal, Callable
import torch
from torch import Tensor
from torch.nn.functional import relu
from torch.nn.functional import tanh

import os
import sys
from kernels.abstract_base import TimeSeriesKernel, StaticKernel
from kernels.static_kernels import RBFKernel

    

def randomized_sig(
        X:Tensor,
        A:Tensor,
        b:Tensor,
        Y_0:Tensor,
    ):
    """
    Randomized signature of a (batched) time series X, with identity
    activation function.

    Args:
        X (Tensor): Input tensor of shape (N, T, d).
        A (Tensor): Tensor of shape (M, M, d). Random matrix.
        b (Tensor): Tensor of shape (M, d). Random bias.
        Y_0 (Tensor): Initial value of the randomized signature.
            Tensor of shape (M).
    """
    N, T, d = X.shape
    diff = X.diff(dim=-2) # shape (N, T-1, d)
    Y_0 = torch.tile(Y_0, (N, 1)) # shape (N, M)

    #iterate y[t+1] = y[t] + ...
    Z = torch.tensordot(Y_0, A, dims=1) + b #shape (N, M, d)
    Y = Y_0 + (Z * diff[:, 0:1, :]).sum(dim=-1) # shape (N, M)
    for t in range(1, T-1):
        Z = torch.tensordot(Y, A, dims=1) + b
        Y = Y + (Z * diff[:, t:t+1, :]).sum(dim=-1)
    return Y



# @torch.jit.script
def randomized_sig_ReLU(
        X:Tensor,
        A:Tensor,
        b:Tensor,
        Y_0:Tensor,
    ):
    """
    Randomized signature of a (batched) time series X, with ReLU
    activation function.

    Args:
        X (Tensor): Input tensor of shape (N, T, d).
        A (Tensor): Tensor of shape (M, M, d). Random matrix.
        b (Tensor): Tensor of shape (M, d). Random bias.
        Y_0 (Tensor): Initial value of the randomized signature.
            Tensor of shape (M).
    """
    N, T, d = X.shape
    diff = X.diff(dim=1) # shape (N, T-1, d)
    Y_0 = torch.tile(Y_0, (N, 1)) # shape (N, M)

    #iterate y[t+1] = y[t] + ...
    Z = torch.tensordot(relu(Y_0), A, dims=1) + b[None] # shape (N, M, d)
    Y = Y_0 + (Z * diff[:, 0, :].unsqueeze(-2)).sum(dim=-1) # shape (N, M)
    for t in range(1, T-1):
        Z = torch.tensordot(relu(Y), A, dims=1) + b[None]
        Y = Y + (Z * diff[:, t, :].unsqueeze(-2)).sum(dim=-1)
    return Y



# @torch.jit.script
def randomized_sig_tanh(
        X:Tensor,
        A:Tensor,
        b:Tensor,
        Y_0:Tensor,
    ):
    """
    Randomized signature of a (batched) time series X, with tanh
    activation function.

    Args:
        X (Tensor): Input tensor of shape (N, T, d).
        A (Tensor): Tensor of shape (M, M, d). Random matrix.
        b (Tensor): Tensor of shape (M, d). Random bias.
        Y_0 (Tensor): Initial value of the randomized signature.
            Tensor of shape (M).
    """
    N, T, d = X.shape
    diff = X.diff(dim=1) # shape (N, T-1, d)
    Y_0 = torch.tile(Y_0, (N, 1)) # shape (N, M)

    #iterate y[t+1] = y[t] + ...
    Z = torch.tensordot(tanh(Y_0), A, dims=1) + b[None] # shape (N, M, d)
    Y = Y_0 + (Z * diff[:, 0, :].unsqueeze(-2)).sum(dim=-1) # shape (N, M)
    for t in range(1, T-1):
        Z = torch.tensordot(tanh(Y), A, dims=1) + b[None]
        Y = Y + (Z * diff[:, t, :].unsqueeze(-2)).sum(dim=-1)
    return Y


class RandomizedSigKernel(TimeSeriesKernel):
    def __init__(
            self,
            n_features = 100,
            activation:Literal["identity", "relu", "tanh"] = "relu",
            seed:int = 0,
            max_batch:int = 1000,
            normalize:bool = False,
        ):
        """
        The randomized signature kernel of two time series of 
        shape (T_i, d).

        Args:
            n_features (int): Number of features.
            activation (str): Activation function.
            seed (int): Random seed.
            max_batch (int, optional): Max batch size for computations.
            normalize (bool, optional): If True, normalizes the kernel.
        """
        super().__init__(max_batch, normalize)
        self.n_features = n_features
        self.activation = activation
        self.seed = seed
        self.has_initialized = False


    def _init_given_input(
            self, 
            X: Tensor
        ):
        """
        Initializes the random matrices and biases used in the 
        randomized signature kernel.

        Args:
            X (Tensor): Example input tensor of shape (N, T, d) of 
                timeseries.
        """
        # Get shape, dtype and device info.
        N, T, d = X.shape
        device = X.device
        dtype = X.dtype
        
        # Create a generator and set the seed
        gen = torch.Generator(device=device).manual_seed(self.seed)
        
        # Initialize the random matrices and biases
        self.A = torch.randn(self.n_features, 
                             self.n_features, 
                             d, 
                             device=device,
                             dtype=dtype,
                             generator=gen
                             ) / np.sqrt(self.n_features)
        # self.b = torch.randn(self.n_features,
        #                      d,
        #                      device=device,
        #                      dtype=dtype,
        #                      generator=gen
        #                      )
        self.b = torch.zeros(self.n_features,
                             d,
                             device=device,
                             dtype=dtype,)

        self.Y_0 = torch.randn(self.n_features,
                               device=device,
                               dtype=dtype,
                               generator=gen)


    def _gram(
            self, 
            X: Tensor, 
            Y: Tensor,
            diag: bool,
        ):
        if not self.has_initialized:
            self._init_given_input(X)
            self.has_initialized = True

        fun = randomized_sig if self.activation == "identity" else \
              randomized_sig_ReLU if self.activation == "relu" else \
              randomized_sig_tanh
        
        feat_X = fun(X, self.A, self.b, self.Y_0)
        feat_Y = fun(Y, self.A, self.b, self.Y_0)

        if diag:
            return (feat_X * feat_Y).mean(dim=-1)
        else:
            return feat_X @ feat_Y.t() / self.n_features
        

#############
## test it ##
#############


def test_randsig():
    N= 3
    N2= 2
    T, d = 30, 2
    torch.manual_seed(0)
    X = torch.randn(N,  T, d, dtype=torch.float64).to("cuda").cumsum(dim=1) / np.sqrt(d)
    Y = torch.randn(N2, T, d, dtype=torch.float64).to("cuda").cumsum(dim=1) / np.sqrt(d)

    n_features = 200
    n_MC = 10
    out = []
    for i in range(n_MC):
        ker = RandomizedSigKernel(n_features=n_features, 
                                activation="identity", 
                                normalize=False)
        out.append(ker(X, Y))
    out = torch.stack(out).mean(dim=0)
    print(f"\nT={T}, d={d}")
    print(f"path development n_features={n_features}, n_MC = {n_MC}")
    print(out)

    dyadic_order = 3
    sigkernel = SigPDEKernel(LinearKernel(),
                            dyadic_order=dyadic_order,
                                normalize=False)
    out = sigkernel(X, Y)
    print(f"\npde sigker dyadic_order={dyadic_order}")
    print(out)

    trunc_level=15
    sigtrunc = TruncSigKernel(LinearKernel(),
                            trunc_level=trunc_level,
                            geo_order = 1,)
    out = sigtrunc(X, Y)
    print(f"\nnon-geometric trunc sigker trunc_level={trunc_level}")
    print(out)

    lineartruncsig = TruncSigKernel(LinearKernel(),
                            trunc_level=trunc_level,
                            geo_order = trunc_level,)
    out = lineartruncsig(X, Y)
    print(f"\npiecewise linear trunc sigker trunc_level={trunc_level}")
    print(out)

    sig_X = sig(X, trunc_level)
    sig_Y = sig(Y, trunc_level)
    out = sig_X @ sig_Y.t()
    print(f"\nexact signature inner product trunc_level={trunc_level}")
    print(out)

    def function():
        ker(X, Y)

    import timeit
    # Measure the execution time of function
    execution_time = timeit.timeit(function, number=100)
    print("Execution time of function:", execution_time)

test_randsig()


T=30, d=2
path development n_features=200, n_MC = 10
tensor([[  2723383.8437,   4388336.5323],
        [ -3114313.6835,   -505699.7463],
        [-36695762.9923,  -2316757.4759]], device='cuda:0',
       dtype=torch.float64)

pde sigker dyadic_order=3
tensor([[   5937.1830, 3242989.9209],
        [  -6623.9208,  158561.4732],
        [ 595235.0906, -271875.4427]], device='cuda:0', dtype=torch.float64)

non-geometric trunc sigker trunc_level=15
tensor([[-22475.6242,  -3424.4065],
        [  -866.7473,   -988.0156],
        [   343.7322,   3667.5261]], device='cuda:0', dtype=torch.float64)

piecewise linear trunc sigker trunc_level=15
tensor([[   5331.2835, 3033761.0354],
        [  -6323.8684,  139105.0875],
        [ 390713.5333, -173587.0705]], device='cuda:0', dtype=torch.float64)

exact signature inner product trunc_level=15
tensor([[   5330.2835, 3033760.0354],
        [  -6324.8684,  139104.0875],
        [ 390712.5333, -173588.0705]], device='cuda:0', dtype=torch.float64)
Execut

In [19]:
# test naive randsig
import numpy as np
import torch
from torch import Tensor
from torch.nn.functional import tanh
from typing import List, Dict, Set, Any, Optional, Tuple, Literal, Callable

@torch.jit.script
def randomized_sig_tanh(
        X:Tensor,
        A:Tensor,
        b:Tensor,
        Y_0:Tensor,
    ):
    """
    Randomized signature of a (batched) time series X, with tanh
    activation function.

    Args:
        X (Tensor): Input tensor of shape (N, T, d).
        A (Tensor): Tensor of shape (M, M, d). Random matrix.
        b (Tensor): Tensor of shape (M, d). Random bias.
        Y_0 (Tensor): Initial value of the randomized signature.
            Tensor of shape (M).
    """
    N, T, d = X.shape
    diff = X.diff(dim=1) # shape (N, T-1, d)
    Y_0 = torch.tile(Y_0, (N, 1)) # shape (N, M)

    #iterate y[t+1] = y[t] + ...
    Z = torch.tensordot(tanh(Y_0), A, dims=1) + b[None] # shape (N, M, d)
    Y = Y_0 + (Z * diff[:, 0:1, :]).sum(dim=-1) # shape (N, M)
    for t in range(1, T-1):
        Z = torch.tensordot(tanh(Y), A, dims=1) + b[None]
        Y = Y + (Z * diff[:, t:t+1, :]).sum(dim=-1)
    return Y



def naive_rand_sig(
        X:Tensor,
        A:Tensor,
        b:Tensor,
        Y_0:Tensor,
        activation:Callable,
    ):
    """
    The naive non-optimized version of rand sigs

    Args:
        X (Tensor): Input tensor of shape (N, T, d).
        A (Tensor): Tensor of shape (M, M, d). Random matrix.
        b (Tensor): Tensor of shape (M, d). Random bias.
        Y_0 (Tensor): Initial value of the randomized signature.
            Tensor of shape (M).
        activation (Callable): Activation function.
    """
    N, T, d = X.shape
    diff = X.diff(dim=-2) # shape (N, T-1, d)
    Y = Y_0[None].repeat(N, 1) # shape (N, M)
    Z = Y.clone()

    for t in range(T-1):
        v = [] #for each dim of the control path
        for k in range(d):
            summand =  (activation(Z) @ A[:,:, k] + b[None, :, k])
            summand = summand * diff[:, t:t+1, k]
            v.append(summand)
        Z = Z + torch.stack(v, dim=-1).sum(dim=-1)
    return Z


def test_naive_randsig():
    N,T,d = 3, 30, 2
    torch.manual_seed(0)
    X = torch.randn(N,  T, d, dtype=torch.float64).to("cuda").cumsum(dim=1) / np.sqrt(d)
    n_features = 40
    A = torch.randn(n_features, n_features, d, dtype=torch.float64).to("cuda") / np.sqrt(n_features)
    b = torch.zeros(n_features, d, dtype=torch.float64).to("cuda")
    Y_0 = torch.randn(n_features, dtype=torch.float64).to("cuda")

    out_naive = naive_rand_sig(X, A, b, Y_0, torch.tanh)
    print("naive\n", out_naive)

    out = randomized_sig_tanh(X, A, b, Y_0)
    print("out\n", out)

    print("diff", out_naive - out)


test_naive_randsig()

naive
 tensor([[ -4.8191,   1.1076,  -3.6456,  -5.6884,   6.6305,   5.9853,   5.5638,
           3.1855,   4.8021,  -2.7075,  -1.8026,   4.1848,  -1.7182,   0.5929,
          13.5506,   5.5576,   3.0846,   1.1335,  -5.0735,  -5.3065,  -0.7329,
           0.7478,  -1.2011,  -3.3103,   3.6241,  -2.9897,  -9.7440,  -2.9861,
          -9.8475,  -3.4611,  -1.9100,  -7.3128,  -2.5018,  -5.7883,  -8.3317,
           1.8492,  -3.7105,  -9.3530,  -1.3751,  -1.7755],
        [ -7.2429,  11.6536,   1.8351,  -5.6309,  12.3161,  -6.3678,  -0.6727,
          -5.2734,  -0.4374,  -2.5425,   2.1544,  -6.5285,   6.8743,  -9.8720,
         -12.8083,  -5.6890,  -8.5065,  -2.4751,  10.6474,   8.8322,   8.1747,
          -8.2879,   9.5290,   5.3229,  -7.5562,  -1.0465,   2.3181,   4.8677,
          -0.3393,  -3.5337,  -0.0770,   9.1050,  17.1236, -11.5461,  -4.3280,
          -4.0708,   0.2352,  -4.9608,   8.0505, -12.4527],
        [  4.3682,  -3.6289,  -1.8262, -10.1565,   0.3523,   6.9981,  -3.7521,
    

In [None]:
import ksig
import timeit

#### Test GAK ####
N= 8
N2= 20
T, d = 20, 2
torch.manual_seed(0)
X = torch.randn(N,  T, d, dtype=torch.float64).to("cuda") / d
Y = torch.randn(N2, T, d, dtype=torch.float64).to("cuda") / d
X_np = X.cpu().numpy()
Y_np = Y.cpu().numpy()

sigma = tslearn.metrics.sigma_gak(X_np)
gak = tslearn.metrics.cdist_gak
ksigker = ksig.kernels.GlobalAlignmentKernel(static_kernel=ksig.static.kernels.RBFKernel(bandwidth=sigma))
mine = GlobalAlignmentKernel(RBFKernel(sigma=sigma), normalize=True, max_batch=50000)
# ksigker = ksig.kernels.SignatureKernel(static_kernel=ksig.static.kernels.RBFKernel(bandwidth=sigma), n_levels=5, order=1)
# mine = TruncSigKernel(RBFKernel(sigma=sigma), normalize=False, trunc_level=5, geo_order=1, max_batch=50000)
# ksigker = ksig.kernels.SignatureKernel(static_kernel=ksig.static.kernels.LinearKernel(), n_levels=5, order=1)
# mine = TruncSigKernel(LinearKernel(), normalize=True, trunc_level=5, geo_order=1, max_batch=50000)

# print(tslearn.metrics.sigma_gak(X_np))
# print(sigma_gak(X))
# out = gak(X, X, sigma=sigma)
# print(out)

out2 = ksigker(X_np, X_np)
print(out2)
out3 = mine(X, X)
#print(out)
print(out3)
print(np.mean(np.abs(out2 - out3.cpu().numpy())))

def function1():
    gak(X, X_np, sigma=sigma)

def function2():
    ksigker(X_np, X_np)

def function3():
    with torch.no_grad():
        mine(X, Y)

# # Measure the execution time of function 1
# execution_time1 = timeit.timeit(function1, number=1)
# print("Execution time of function 1:", execution_time1)

# Measure the execution time of function 2
execution_time2 = timeit.timeit(function2, number=1)
print("Execution time of function 2:", execution_time2)
# Measure the execution time of function 3
execution_time3 = timeit.timeit(function3, number=1)
print("Execution time of function 3:", execution_time3)

In [None]:
#iterate over antidiagonals with s,t>0
T1 = 5
T2 = 4
for diag in range(2, T1+T2-1):
    for s in range(max(1, diag - T2 + 1), min(diag, T1)):
        t = diag - s
        print(s,t)
    print("\n")

In [None]:
# test for loop indices
import time
import torch
import itertools
from kernels.static_kernels import LinearKernel

lin_ker = LinearKernel()

def placeholder_ker(X:Tensor, Y:Tensor, diag:bool=False):
    return lin_ker.time_gram(X, Y, diag)[...,0,0]


def test_indices(X:Tensor, 
                 Y:Tensor,
                 diag:bool,
                max_batch:int, 
    ):
    device = X.device
    N1, T, d = X.shape
    N2, _, _ = Y.shape

    # split into batches. FASTEST METHOD NO BATCH
    t1 = time.perf_counter()
    result = placeholder_ker(X, Y)
    t2 = time.perf_counter()
    print("time NOBATCH\t", t1-t2)

    # split into batches BY INDICES
    t1 = time.perf_counter()
    if diag:
        indices = torch.arange(N1, device=device).tile(2,1) # shape (2, N)
    else:
        indices = torch.cartesian_prod(torch.arange(N1, device=device), 
                                    torch.arange(N2, device=device)).T #shape (2, N1*N2)
    split = torch.split(indices, max_batch, dim=1)
    result = [placeholder_ker(X[ix], Y[iy], diag=True) for ix,iy in split]
    t2 = time.perf_counter()
    print("time INDEX\t", t1-t2)

    # split into batches VIA SPLIT
    t1 = time.perf_counter()
    split_X = torch.split(X, max_batch, dim=0)
    Y_max_batch = max(1, max_batch//N1)
    split_Y = torch.split(Y, Y_max_batch, dim=0)
    result = [placeholder_ker(ix, iy) for ix,iy in itertools.product(split_X, split_Y)]
    if max_batch >= N1:
        result = torch.cat(result, dim=1)
    else:
        result = torch.cat(result, dim=0).reshape(N1, N2)
    t2 = time.perf_counter()
    print("time SPLIT\t", t1-t2)

X = torch.randn(200, 7, 10)
Y = torch.randn(300, 7, 10)
test_indices(X, Y, False, 10000)

#split = torch.split(X, )

In [None]:
#### test dimensions of TimeSeriesKernels ####

N=3
N2=4
T, d = 7, 5
X = torch.randn(N, T, d) / d**0.5
Y = torch.randn(N2, T, d) / d**0.5
inputs = [
    (X, X),
    (X, Y),
    (X[0], X[0]),
    (X[0], Y[0]),
    (X[0], Y),
    (X, Y[0]),
]
diag_inputs = [
    (X, X),
    (Y, Y),
    (X[:min(N,N2)], Y[:min(N,N2)]),
    (X[0], X[0]),
    (X[0], Y[0]),
]
def test_kernel(ker: TimeSeriesKernel, inputs, diag=False):
    print(ker)
    for X, Y in inputs:
        out = ker(X, Y, diag, normalize=False)
        out_normalize = ker(X, Y, diag, normalize=True)
        # print(out, "out")
        # print(out_normalize, "out, normalize")
        print(out.shape)
    print()


sigker = TruncSigKernel(static_kernel=RBFKernel(), 
                        trunc_level=6, 
                        geo_order=1,
                        only_last=False,)
test_kernel(sigker, inputs)
test_kernel(sigker, diag_inputs, True)

sigpde = SigPDEKernel(static_kernel=RBFKernel(),
                     dyadic_order=3,)
test_kernel(sigpde, inputs)
test_kernel(sigpde, diag_inputs, True)

intker = StaticIntegralKernel(static_kernel=RBFKernel())
test_kernel(intker, inputs)
test_kernel(intker, diag_inputs, True)

pde = SigPDEKernel(static_kernel=RBFKernel(), dyadic_order=2)
test_kernel(pde, inputs)
test_kernel(pde, diag_inputs, True)

gak = GlobalAlignmentKernel(static_kernel=RBFKernel(sigma=sigma_gak(X)))
test_kernel(gak, inputs)
test_kernel(gak, diag_inputs, True)

flat = FlattenedStaticKernel(static_kernel=LinearKernel())
test_kernel(flat, inputs)
test_kernel(flat, diag_inputs, True)

res = ReservoirKernel()
test_kernel(res, inputs)
test_kernel(res, diag_inputs, True)

In [None]:
import numpy as np
import ksig

# Number of signature levels to use.
n_levels = 5 

# Use the RBF kernel for vector-valued data as static (base) kernel.
static_kernel = ksig.static.kernels.RBFKernel() 

# Instantiate the signature kernel, which takes as input the static kernel.
n_levels = 5
order = 1
sig_kernel = ksig.kernels.SignatureKernel(n_levels=n_levels, order=order, static_kernel=static_kernel)

# Generate 10 sequences of length 50 with 5 channels.
n_seq, l_seq, n_feat = 10, 50, 5 
X = np.random.randn(n_seq, l_seq, n_feat)

# Sequence kernels take as input an array of sequences of ndim == 3,
# and work as a callable for computing the kernel matrix. 
K_XX = sig_kernel(X)  # K_XX has shape (10, 10).

# The diagonal kernel entries can also be computed.
K_X = sig_kernel(X, diag=True)  # K_X has shape (10,).

# Generate another array of 8 sequences of length 20 and 5 features.
n_seq2, l_seq2 = 8, 20
Y = np.random.randn(n_seq2, l_seq2, n_feat)

# Compute the kernel matrix between arrays X and Y.
K_XY = sig_kernel(X, Y)  # K_XY has shape (10, 8)
K_XY

In [None]:
#Test that iisig gives the same result as mine
import iisignature
import numpy as np
import ksig

# Number of signature levels to use.
normalize=False
trunc_level = 5
geo_order = 5
N=2
N2= 2
T, d = 20, 2
torch.manual_seed(0)
X = torch.randn(N, T, d, dtype=torch.float64).to("cuda") / d
Y = torch.randn(N2, T, d, dtype=torch.float64).to("cuda") / d
X_np = X.cpu().numpy()
Y_np = Y.cpu().numpy()

ksigker = ksig.kernels.SignatureKernel(static_kernel=ksig.static.kernels.LinearKernel(), 
                                       normalize=normalize,
                                       n_levels=trunc_level, 
                                       order=geo_order)
mine = TruncSigKernel(LinearKernel(scale=1), 
                      normalize=normalize, 
                      trunc_level=trunc_level, 
                      geo_order=geo_order, 
                      max_batch=50000)

#test
out1 = ksigker(X_np, Y_np)
out2 = mine(X, Y)
featuresX = iisignature.sig(X_np, trunc_level)
featuresY = iisignature.sig(Y_np, trunc_level)
out3 = 1+np.dot(featuresX, featuresY.T)
print("ksig", out1)
print("\nmine", out2)
print("\niisig", out3)
print(np.mean(np.abs(out1 - out2.cpu().numpy())))
print(np.mean(np.abs(out1 - out3)))
print(np.mean(np.abs(out2.cpu().numpy() - out3)))