In [2]:
import numpy as np

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '3'

import sys
sys.path.append('..') # add to path parent dir of gpsig

import tensorflow as tf

gpus = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(gpus[0], True)

import gpflow
import gpsig

from sklearn.model_selection import ParameterGrid

from mmd_utils import *

### Noise vs noise (same distribution)

In [3]:
num_data1 = 1000
num_data2 = 2000
num_features = 2
min_len = 10
max_len = 100

# variable length
X_0 = [np.random.randn(np.random.randint(min_len, max_len), num_features) for i in range(num_data1)]
X_1 = [np.random.randn(np.random.randint(min_len, max_len), num_features) for i in range(num_data2)]

#### Kernels for vector-valued data

In [4]:
# Linear kernel from gpflow.kernels
params_grid = ParameterGrid({'tabulation' : ['interp', 'pad']})
d_lin = mmd_with_gpflow_kernel(gpflow.kernels.Linear, X_0, X_1, params_grid, name='Linear')

# RBF kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_rbf = mmd_with_gpflow_kernel(gpflow.kernels.RBF, X_0, X_1, params_grid, name='RBF')

# MA12 kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_ma12 = mmd_with_gpflow_kernel(gpflow.kernels.Matern12, X_0, X_1, params_grid, name='MA12')

# MA32 kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_ma32 = mmd_with_gpflow_kernel(gpflow.kernels.Matern32, X_0, X_1, params_grid, name='MA32')

# MA52 kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_ma52 = mmd_with_gpflow_kernel(gpflow.kernels.Matern52, X_0, X_1, params_grid, name='MA52')

Kernel: Linear
------------------


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))


Time elapsed: 1.76
Time per iteration: 0.88
Number of combinations: 2
Found parameters: {'tabulation': 'interp'}
MMD: 3.069e-04
------------------

Kernel: RBF
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 16.06
Time per iteration: 0.73
Number of combinations: 22
Found parameters: {'lengthscales': 10.0, 'tabulation': 'interp'}
MMD: 1.783e-04
------------------

Kernel: MA12
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 16.07
Time per iteration: 0.73
Number of combinations: 22
Found parameters: {'lengthscales': 10.0, 'tabulation': 'interp'}
MMD: 8.139e-05
------------------

Kernel: MA32
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 16.01
Time per iteration: 0.73
Number of combinations: 22
Found parameters: {'lengthscales': 10.0, 'tabulation': 'interp'}
MMD: 1.206e-04
------------------

Kernel: MA52
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 16.43
Time per iteration: 0.75
Number of combinations: 22
Found parameters: {'lengthscales': 10.0, 'tabulation': 'interp'}
MMD: 1.368e-04
------------------



#### Signature kernels

In [5]:
# LR-Sig-Linear from gpsig.kernels
params_grid = ParameterGrid({'num_levels' : [2, 3, 4, 5], 'add_time': [True, False], 'num_lags' : [0, 1]})
d_lrsiglin = mmd_with_gpsig_kernel(gpsig.kernels.SignatureLinear, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-Lin')

# LR-Sig-RBF from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigrbf = mmd_with_gpsig_kernel(gpsig.kernels.SignatureRBF, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-RBF')

# LR-Sig-MA12 from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigma12 = mmd_with_gpsig_kernel(gpsig.kernels.SignatureMatern12, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-MA12')

# LR-Sig-MA32 from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigma32 = mmd_with_gpsig_kernel(gpsig.kernels.SignatureMatern32, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-MA32')

# LR-Sig-MA52 from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigma52 = mmd_with_gpsig_kernel(gpsig.kernels.SignatureMatern52, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-MA52')

Kernel: LR-Sig-Lin
------------------


HBox(children=(IntProgress(value=0, max=16), HTML(value='')))


Time elapsed: 24.13
Time per iteration: 1.51
Number of combinations: 16
Found parameters: {'add_time': True, 'num_lags': 0, 'num_levels': 2}
MMD: -5.460e-05
------------------

Kernel: LR-Sig-RBF
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 131.75
Time per iteration: 1.50
Number of combinations: 88
Found parameters: {'add_time': False, 'lengthscales': 0.1, 'num_levels': 3}
MMD: 1.816e-04
------------------

Kernel: LR-Sig-MA12
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 130.86
Time per iteration: 1.49
Number of combinations: 88
Found parameters: {'add_time': False, 'lengthscales': 0.01, 'num_levels': 2}
MMD: 2.550e-04
------------------

Kernel: LR-Sig-MA32
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 128.44
Time per iteration: 1.46
Number of combinations: 88
Found parameters: {'add_time': False, 'lengthscales': 0.01, 'num_levels': 5}
MMD: 9.795e-05
------------------

Kernel: LR-Sig-MA52
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 131.25
Time per iteration: 1.49
Number of combinations: 88
Found parameters: {'add_time': False, 'lengthscales': 0.1, 'num_levels': 2}
MMD: 1.608e-04
------------------



### Signal vs signal
Load binary ($y \in \{0, 1\}$) time series classification dataset `NetFlow` and compute $MMD_U[P(x|y=0) \Vert P(x|y=1)]$

In [6]:
data = load_netflow_dataset()

X = list(np.squeeze(data['X_train'])) + list(np.squeeze(data['X_test']))
Y = list(np.squeeze(data['y_train'])) + list(np.squeeze(data['y_test']))

X_0 = [x for i, x in enumerate(X) if Y[i]==0] 
X_1 = [x for i, x in enumerate(X) if Y[i]==1] 

In [7]:
# Linear kernel from gpflow.kernels
params_grid = ParameterGrid({'tabulation' : ['interp', 'pad']})
d_lin = mmd_with_gpflow_kernel(gpflow.kernels.Linear, X_0, X_1, params_grid, name='Linear')

# RBF kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_rbf = mmd_with_gpflow_kernel(gpflow.kernels.RBF, X_0, X_1, params_grid, name='RBF')

# MA12 kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_ma12 = mmd_with_gpflow_kernel(gpflow.kernels.Matern12, X_0, X_1, params_grid, name='MA12')

# MA32 kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_ma32 = mmd_with_gpflow_kernel(gpflow.kernels.Matern32, X_0, X_1, params_grid, name='MA32')

# MA52 kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_ma52 = mmd_with_gpflow_kernel(gpflow.kernels.Matern52, X_0, X_1, params_grid, name='MA52')

Kernel: Linear
------------------


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))


Time elapsed: 1.28
Time per iteration: 0.64
Number of combinations: 2
Found parameters: {'tabulation': 'interp'}
MMD: 9.290e-03
------------------

Kernel: RBF
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 13.40
Time per iteration: 0.61
Number of combinations: 22
Found parameters: {'lengthscales': 10.0, 'tabulation': 'interp'}
MMD: 5.485e-03
------------------

Kernel: MA12
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 13.34
Time per iteration: 0.61
Number of combinations: 22
Found parameters: {'lengthscales': 100.0, 'tabulation': 'interp'}
MMD: 1.161e-02
------------------

Kernel: MA32
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 13.41
Time per iteration: 0.61
Number of combinations: 22
Found parameters: {'lengthscales': 10.0, 'tabulation': 'interp'}
MMD: 8.702e-03
------------------

Kernel: MA52
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 13.36
Time per iteration: 0.61
Number of combinations: 22
Found parameters: {'lengthscales': 10.0, 'tabulation': 'interp'}
MMD: 7.903e-03
------------------



#### Signature kernels

In [8]:
# LR-Sig-Linear from gpsig.kernels
params_grid = ParameterGrid({'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsiglin = mmd_with_gpsig_kernel(gpsig.kernels.SignatureLinear, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-Lin')

# LR-Sig-RBF from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigrbf = mmd_with_gpsig_kernel(gpsig.kernels.SignatureRBF, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-RBF')

# LR-Sig-MA12 from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigma12 = mmd_with_gpsig_kernel(gpsig.kernels.SignatureMatern12, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-MA12')

# LR-Sig-MA32 from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigma32 = mmd_with_gpsig_kernel(gpsig.kernels.SignatureMatern32, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-MA32')

# LR-Sig-MA52 from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigma52 = mmd_with_gpsig_kernel(gpsig.kernels.SignatureMatern52, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-MA52')

Kernel: LR-Sig-Lin
------------------


HBox(children=(IntProgress(value=0, max=8), HTML(value='')))


Time elapsed: 12.80
Time per iteration: 1.60
Number of combinations: 8
Found parameters: {'add_time': False, 'num_levels': 4}
MMD: 2.370e-01
------------------

Kernel: LR-Sig-RBF
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 142.68
Time per iteration: 1.62
Number of combinations: 88
Found parameters: {'add_time': False, 'lengthscales': 0.1, 'num_levels': 4}
MMD: 7.433e-01
------------------

Kernel: LR-Sig-MA12
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 143.13
Time per iteration: 1.63
Number of combinations: 88
Found parameters: {'add_time': False, 'lengthscales': 1.0, 'num_levels': 3}
MMD: 7.453e-01
------------------

Kernel: LR-Sig-MA32
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 144.08
Time per iteration: 1.64
Number of combinations: 88
Found parameters: {'add_time': True, 'lengthscales': 0.1, 'num_levels': 2}
MMD: 5.672e-01
------------------

Kernel: LR-Sig-MA52
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 144.73
Time per iteration: 1.64
Number of combinations: 88
Found parameters: {'add_time': False, 'lengthscales': 0.1, 'num_levels': 5}
MMD: 6.635e-01
------------------



### Signal vs noise
Compare `NetFlow` with noise

In [9]:
X_0 = X

num_data1 = len(X)
num_data2 = 1000
num_features = X[0].shape[1]
min_len = np.min([len(x) for x in X])
maxn_len = np.max([len(x) for x in X])

# variable length
X_1 = [np.random.randn(np.random.randint(min_len, max_len), num_features) for i in range(num_data2)]

In [10]:
# Linear kernel from gpflow.kernels
params_grid = ParameterGrid({'tabulation' : ['interp', 'pad']})
d_lin = mmd_with_gpflow_kernel(gpflow.kernels.Linear, X_0, X_1, params_grid, name='Linear')

# RBF kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_rbf = mmd_with_gpflow_kernel(gpflow.kernels.RBF, X_0, X_1, params_grid, name='RBF')

# MA12 kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_ma12 = mmd_with_gpflow_kernel(gpflow.kernels.Matern12, X_0, X_1, params_grid, name='MA12')

# MA32 kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_ma32 = mmd_with_gpflow_kernel(gpflow.kernels.Matern32, X_0, X_1, params_grid, name='MA32')

# MA52 kernel from gpflow.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'tabulation' : ['interp', 'pad']})
d_ma52 = mmd_with_gpflow_kernel(gpflow.kernels.Matern52, X_0, X_1, params_grid, name='MA52')

Kernel: Linear
------------------


HBox(children=(IntProgress(value=0, max=2), HTML(value='')))


Time elapsed: 2.21
Time per iteration: 1.10
Number of combinations: 2
Found parameters: {'tabulation': 'interp'}
MMD: 2.417e-01
------------------

Kernel: RBF
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 23.14
Time per iteration: 1.05
Number of combinations: 22
Found parameters: {'lengthscales': 100.0, 'tabulation': 'interp'}
MMD: 5.855e-02
------------------

Kernel: MA12
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 23.24
Time per iteration: 1.06
Number of combinations: 22
Found parameters: {'lengthscales': 100.0, 'tabulation': 'interp'}
MMD: 8.880e-02
------------------

Kernel: MA32
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 22.86
Time per iteration: 1.04
Number of combinations: 22
Found parameters: {'lengthscales': 100.0, 'tabulation': 'interp'}
MMD: 8.726e-02
------------------

Kernel: MA52
------------------


HBox(children=(IntProgress(value=0, max=22), HTML(value='')))


Time elapsed: 23.25
Time per iteration: 1.06
Number of combinations: 22
Found parameters: {'lengthscales': 100.0, 'tabulation': 'interp'}
MMD: 7.824e-02
------------------



#### Signature kernels

In [12]:
# LR-Sig-Linear from gpsig.kernels
params_grid = ParameterGrid({'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsiglin = mmd_with_gpsig_kernel(gpsig.kernels.SignatureLinear, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-Lin')

# LR-Sig-RBF from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigrbf = mmd_with_gpsig_kernel(gpsig.kernels.SignatureRBF, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-RBF')

# LR-Sig-MA12 from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigma12 = mmd_with_gpsig_kernel(gpsig.kernels.SignatureMatern12, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-MA12')

# LR-Sig-MA32 from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigma32 = mmd_with_gpsig_kernel(gpsig.kernels.SignatureMatern32, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-MA32')

# LR-Sig-MA52 from gpsig.kernels
params_grid = ParameterGrid({'lengthscales' : np.logspace(-5, 5, 11), 'num_levels' : [2, 3, 4, 5], 'add_time': [True, False]})
d_lrsigma52 = mmd_with_gpsig_kernel(gpsig.kernels.SignatureMatern52, X_0, X_1, params_grid, low_rank=True, batch_size=100, name='LR-Sig-MA52')

Kernel: LR-Sig-Lin
------------------


HBox(children=(IntProgress(value=0, max=8), HTML(value='')))


Time elapsed: 21.79
Time per iteration: 2.72
Number of combinations: 8
Found parameters: {'add_time': True, 'num_levels': 4}
MMD: 1.012e+00
------------------

Kernel: LR-Sig-RBF
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 244.75
Time per iteration: 2.78
Number of combinations: 88
Found parameters: {'add_time': True, 'lengthscales': 0.1, 'num_levels': 4}
MMD: 1.133e+00
------------------

Kernel: LR-Sig-MA12
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 245.56
Time per iteration: 2.79
Number of combinations: 88
Found parameters: {'add_time': True, 'lengthscales': 0.1, 'num_levels': 5}
MMD: 1.007e+00
------------------

Kernel: LR-Sig-MA32
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 245.62
Time per iteration: 2.79
Number of combinations: 88
Found parameters: {'add_time': True, 'lengthscales': 0.1, 'num_levels': 5}
MMD: 1.116e+00
------------------

Kernel: LR-Sig-MA52
------------------


HBox(children=(IntProgress(value=0, max=88), HTML(value='')))


Time elapsed: 249.70
Time per iteration: 2.84
Number of combinations: 88
Found parameters: {'add_time': True, 'lengthscales': 0.1, 'num_levels': 4}
MMD: 1.084e+00
------------------

