In [1]:
from typing import List, Dict, Set, Any, Optional, Tuple, Literal, Callable
import numpy as np
import torch
from torch import Tensor
import sigkernel
import os
import sys
import tslearn
import tslearn.metrics
import ksig
from tqdm import tqdm

from kernels.abstract_base import TimeSeriesKernel, StaticKernel
from kernels.static_kernels import LinearKernel, RBFKernel, PolyKernel
from kernels.integral import StaticIntegralKernel
from kernels.sig_pde import SigPDEKernel
from kernels.sig_trunc import TruncSigKernel
from kernels.gak import GlobalAlignmentKernel, sigma_gak

from features.random_fourier import RBF_RandomFourierFeatures
from features.random_sig_fourier import TRP_RFSF_Gaussian, TRP_RFSF_Linear

In [8]:
from typing import List, Dict, Set, Any, Optional, Tuple, Literal, Callable
import torch
from torch import Tensor
import signatory


def sig(
    X: Tensor,
    trunc_level: int,
):
    """
    Computes the truncated signature of time series of
    shape (T,d) with optional batch support.
    
    Args:
        X (Tensor): Tensor of shape (..., T, d) of time series.
        trunc_level (int): Signature truncation level.
    
    Returns:
        Tensor: Tensor of shape (..., D) where 
            D = 1 + d + d^2 + ... + d^trunc_level.
    """


ModuleNotFoundError: No module named 'signatory'

In [3]:
##########################################################
#### Linear TRP-RFSF features  vs  Vanilla Sig kernel ####
##########################################################

def LINEAR_trp_vs_kernel():
    #parameters
    N = 3
    N2 = 2
    T = 20
    d = 2
    trunc_level = 5
    n_features = d
    dtype = torch.float32
    #torch.manual_seed(3)
    X = torch.randn(N, T, d, dtype=dtype).to("cuda").detach() / np.sqrt(d)
    Y = torch.randn(N2,T, d, dtype=dtype).to("cuda").detach() / np.sqrt(d)

    #exact sig kernel
    sigker = TruncSigKernel(LinearKernel(), normalize=False, trunc_level=trunc_level, geo_order=1, max_batch=50000)
    K = sigker(X,Y)
    print("exact\n", K)

    #trp
    MC_iter = 10000
    res = []
    for i in tqdm(range(MC_iter)):
        trp = TRP_RFSF_Linear(trunc_level, n_features, only_last=True)
        feat_X = trp(X)
        feat_Y = trp(Y)
        K_trp = 1 + feat_X @ feat_Y.T
        res.append(K_trp)
    res = torch.stack(res)
    example = res[0]
    mean = res.mean(dim=0)
    print("mean\n", mean)
    print("example\n", example)

# LINEAR_trp_vs_kernel()
# # exact
# #  tensor([[ 4627.0444, -5744.9629],
# #         [ 9709.9150, -7802.3965],
# #         [-1727.5579,  2563.8979]], device='cuda:0')
# # 100%|██████████| 10000/10000 [00:15<00:00, 641.01it/s] # n_features = 500, trunc_level = 5
# # mean
# #  tensor([[ 4624.3896, -5737.0781],
# #         [ 9733.1250, -7796.5479],
# #         [-1733.9088,  2575.4778]], device='cuda:0')

exact
 tensor([[ 2581.8730,  -235.9912],
        [-2129.7358,  -272.5004],
        [ 6718.6289,  -402.8711]], device='cuda:0')


100%|██████████| 10000/10000 [00:14<00:00, 669.59it/s]

mean
 tensor([[ 2396.6672,  -120.0564],
        [-1895.8381,  -362.5847],
        [ 6214.2769,   193.0739]], device='cuda:0')
example
 tensor([[   247.9467,    763.2634],
        [  1037.1696,   3726.6875],
        [ -2612.9407, -10172.9092]], device='cuda:0')





In [4]:
#######################################################
#### Gaussian TRP-RFSF features  vs  SigRBF kernel ####
#######################################################

def GAUSSIAN_trp_vs_kernel():
    #parameters
    N = 3
    N2 = 2
    T = 20
    d = 2
    trunc_level = 5
    n_features = 5000
    sigma = 1.0
    dtype = torch.float32
    #torch.manual_seed(3)
    X = torch.randn(N, T, d, dtype=dtype).to("cuda").detach() / np.sqrt(d)
    Y = torch.randn(N2,T, d, dtype=dtype).to("cuda").detach() / np.sqrt(d)

    #exact sig kernel
    sigker = TruncSigKernel(RBFKernel(sigma=sigma), normalize=False, trunc_level=trunc_level, geo_order=1, max_batch=50000)
    K = sigker(X,Y)
    print("exact\n", K)

    #trp
    MC_iter = 10000
    res = []
    for i in tqdm(range(MC_iter)):
        trp = TRP_RFSF_Gaussian(trunc_level, n_features, sigma, only_last=True)
        feat_X = trp(X)
        feat_Y = trp(Y)
        K_trp = 1 + feat_X @ feat_Y.T
        res.append(K_trp)
    res = torch.stack(res)
    example = res[0]
    mean = res.mean(dim=0)
    print("mean\n", mean)
    print("example\n", example)

# GAUSSIAN_trp_vs_kernel()
# # exact
# #  tensor([[ -8.7953, -30.1428],
# #         [ -4.1622,  -4.5515],
# #         [-25.4650,  42.8830]], device='cuda:0')
# # 100%|██████████| 10000/10000 [05:55<00:00, 28.10it/s] # n_features=5000, trunc_level=5
# # mean
# #  tensor([[ -9.9236, -30.5771],
# #         [ -4.4291,  -4.5940],
# #         [-25.3243,  43.3625]], device='cuda:0')

In [5]:
############################################################
#### Test RBF_RandomFourierFeatures vs exact RBF kernel ####
############################################################
def rff_vs_exact_RBFKernel():
    N=3
    N2= 2
    d = 10
    sigma=1
    dtype = torch.float64
    # torch.manual_seed(1)
    X = torch.randn(N, d, dtype=dtype).to("cuda") /np.sqrt(d)
    Y = torch.randn(N2, d, dtype=dtype).to("cuda") / np.sqrt(d)

    # Exact RBF kernel
    k = RBFKernel(sigma=sigma)
    K = k(X, Y)

    # Approximate RBF kernel using RBF_RandomFourierFeatures
    N_MC = 10000
    res = []
    for i in range(N_MC):
        RFF = RBF_RandomFourierFeatures(n_features=1000,
                                        sigma=sigma,
                                        method="cos(x)sin(x)",
                                        # method = "cos(x + b)",
                                        )
        feat_X = RFF(X)
        feat_Y = RFF(Y)
        K_rff = feat_X @ feat_Y.T
        res.append(K_rff)
    K_rff = torch.mean(torch.stack(res), dim=0)

    print("K\n",K)
    print("K_rff\n",K_rff)
    print("diff\n", K-K_rff)
    print("diffmean\n", torch.mean(abs(K-K_rff)))
    # the RFF approach cant reproduce results smaller than 1e-5 for some reason
    
#rff_vs_exact_RBFKernel()

In [6]:
#####################################
######### FROM KSIG LIBRARY #########
#####################################
import numpy as np
import ksig

def ksig_readme():
    # Number of signature levels to use.
    n_levels = 5 
    normalize=False

    # Use 100 components in RFF and projection.
    n_components = 100

    # Instantiate RFF feature map.
    static_feat = ksig.static.features.RandomFourierFeatures(n_components=n_components)
    # Instantiate tensor random projections.
    proj = ksig.projections.TensorizedRandomProjection(n_components=n_components)

    # The RFSF-TRP feature map and kernel. Additionally to working as a callable for
    # computing a kernel, it implements a fit and a transform method.
    rfsf_trp_kernel = ksig.kernels.SignatureFeatures(
        n_levels=n_levels, static_features=static_feat, projection=proj, normalize=normalize)

    # Generate 1000 sequences of length 200 with 100 features.
    n_seq, l_seq, n_feat = 3, 20, 10
    X = np.random.randn(n_seq, l_seq, n_feat) / np.sqrt(n_feat)

    # Fit the kernel to the data.
    rfsf_trp_kernel.fit(X)

    # Compute the kernel matrix as before.
    K_XX = rfsf_trp_kernel(X)  # K_XX has shape (1000, 1000).

    # GEnerate another array of 800 sequences of length 250 and 100 features.
    n_seq2, l_seq2 = 4, 20
    Y = np.random.randn(n_seq2, l_seq2, n_feat) / np.sqrt(n_feat)

    # Compute the kernel matrix between X and Y.
    # The kernel does not have to be fitted a second time.
    K_XY = rfsf_trp_kernel(X, Y)  # K_XY has shape (1000, 800)

    # Alternatively, we may compute features separately for X and Y. Under the hood,
    # this is what the call method does, i.e. compute features and take their inner product.
    P_X = rfsf_trp_kernel.transform(X)  # P_X has shape (1000, 501)
    P_Y = rfsf_trp_kernel.transform(Y)  # P_Y shape shape (800, 501)
    print("P_X", P_X)






    # Use the RBF kernel for vector-valued data as static (base) kernel.
    static_kernel = ksig.static.kernels.RBFKernel()
    order = 1
    sig_kernel = ksig.kernels.SignatureKernel(n_levels=n_levels, order=order, static_kernel=static_kernel,
                                              normalize=normalize)

    # Sequence kernels take as input an array of sequences of ndim == 3,
    # and work as a callable for computing the kernel matrix.
    K_XX_exact = sig_kernel(X)
    K_XY_exact = sig_kernel(X, Y)  
    print("\n\nTRP XX\n", K_XX)
    print("Exact XX\n", K_XX_exact)
    print("\n\nTRP XY\n", K_XY)
    print("Exact XY\n", K_XY_exact)

ksig_readme()

P_X [[ 1.00000000e+00 -3.59423506e-03 -9.07996261e-02 ...  8.40732859e-01
   5.30873379e+00  2.69755228e-01]
 [ 1.00000000e+00 -1.93983825e-02  5.37721907e-02 ... -4.71890160e+00
   1.59428865e+00 -1.14701394e+00]
 [ 1.00000000e+00 -7.26373305e-02 -1.01710501e-03 ... -3.97536125e-01
  -2.62673210e-01 -6.75785009e-01]]


TRP XX
 [[259.35195906  -5.04945709  -0.91706391]
 [ -5.04945709 470.53149444  13.50306053]
 [ -0.91706391  13.50306053 183.76779889]]
Exact XX
 [[266.96674815   6.09468627   6.73704003]
 [  6.09468627 417.85548768   4.0151835 ]
 [  6.73704003   4.0151835  164.36345817]]


TRP XY
 [[  1.85705796  14.55856174  12.49293309  37.6238032 ]
 [ 69.12108422  21.17174776   4.64703248 -11.5869424 ]
 [ 15.17577528  -5.66409298   2.21091271   3.41681129]]
Exact XY
 [[7.10066583 8.11644864 6.70422197 6.06963588]
 [8.21421106 5.85429156 5.74094593 6.56326078]
 [7.37560119 5.40776958 4.61642167 6.67836765]]


In [7]:
#########################
## KSIG test TRP vs DP ##
#########################
import ksig
import numpy as np

def trp_vs_dp():
    n_seq, l_seq, n_feat = 10, 150, 100
    X = np.random.randn(n_seq, l_seq, n_feat)
    n_levels = 10  # Number of signature levels to use.
    n_components = 100 # Use dimension of RFF map

    # Instantiate RFF feature map.
    static_feat = ksig.static.features.RandomFourierFeatures(n_components=n_components)
    trp_proj = ksig.projections.TensorizedRandomProjection(n_components=n_components)
    dp_proj = ksig.projections.DiagonalProjection()

    # The RFSF-TRP feature map and kernel. Additionally to working as a callable for
    # computing a kernel, it implements a fit and a transform method.
    trp_kernel = ksig.kernels.SignatureFeatures(
        n_levels=n_levels, 
        static_features=static_feat, 
        projection=trp_proj
        )
    dp_kernel = ksig.kernels.SignatureFeatures(
        n_levels=n_levels,
        static_features=static_feat,
        projection=dp_proj
        )
    trp_kernel.fit(X)
    dp_kernel.fit(X)


    # TIME IT
    import timeit
    def function_dp():
        dp_kernel(X, X)
    def function_trp():
        trp_kernel(X, X)
    execution_time_dp = timeit.timeit(function_dp, number=10)
    print("Execution time of dp\t:", execution_time_dp)
    execution_time_trp = timeit.timeit(function_trp, number=10)
    print("Execution time of trp\t:", execution_time_trp)
#trp_vs_dp()