In [1]:
!pip install keras jax

Collecting ml-dtypes (from keras)
  Using cached ml_dtypes-0.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (21 kB)
Using cached ml_dtypes-0.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (4.5 MB)
Installing collected packages: ml-dtypes
  Attempting uninstall: ml-dtypes
    Found existing installation: ml-dtypes 0.3.2
    Uninstalling ml-dtypes-0.3.2:
      Successfully uninstalled ml-dtypes-0.3.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.16.1 requires ml-dtypes~=0.3.1, but you have ml-dtypes 0.5.0 which is incompatible.[0m[31m
[0mSuccessfully installed ml-dtypes-0.5.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpyt

In [2]:
!pip install iisignature iisignature_tensorflow_2 keras_sig pandas

Collecting ml-dtypes (from keras<4.0,>=3.0.0->keras_sig)
  Using cached ml_dtypes-0.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Using cached ml_dtypes-0.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)
Installing collected packages: ml-dtypes
  Attempting uninstall: ml-dtypes
    Found existing installation: ml_dtypes 0.5.0
    Uninstalling ml_dtypes-0.5.0:
      Successfully uninstalled ml_dtypes-0.5.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
jax 0.4.38 requires ml_dtypes>=0.4.0, but you have ml-dtypes 0.3.2 which is incompatible.[0m[31m
[0mSuccessfully installed ml-dtypes-0.3.2
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.0[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run:

!pip install nvidia-cudnn-cu12 --upgrade

In [3]:
import os
# Keras and backend configuration
BACKEND = 'tensorflow'
os.environ['KERAS_BACKEND'] = BACKEND

In [4]:
import numpy as np
import pandas as pd
import time
import json
from datetime import datetime
import platform
from typing import Callable, List, Dict
import keras_sig
import tensorflow as tf
from IPython.display import display

def get_cpu_info():
    if platform.system() == "Linux":
        try:
            with open('/proc/cpuinfo', 'r') as f:
                for line in f:
                    if 'model name' in line:
                        return line.split(':')[1].strip()
        except:
            pass
    return platform.processor() or platform.machine()

def time_function(func: Callable, number: int = 10) -> float:
    """Time a function over multiple runs and return average time in milliseconds"""
    # First call to compile
    compilation_start = time.time()
    _ = func()
    compilation_time = (time.time() - compilation_start) * 1000
    
    # Subsequent calls for execution time
    times = []
    for _ in range(number):
        start = time.time()
        _ = func()
        times.append((time.time() - start) * 1000)  # Convert to milliseconds
    
    return compilation_time, np.mean(times)

def run_benchmark(batch_size: int, seq_len: int, n_features: int, depth: int) -> Dict:
    """Run benchmark for a specific configuration"""
    
    # Generate paths
    paths = np.random.randn(batch_size, seq_len, n_features).astype(np.float32)
    paths_tf = tf.convert_to_tensor(paths)
    
    # Time implementation with explicit compilation and execution times
    compilation_time, execution_time = time_function(
        lambda: keras_sig.signature(paths_tf, depth)
    )
    
    results = {
        'batch_size': batch_size,
        'seq_len': seq_len,
        'n_features': n_features,
        'depth': depth,
        'compilation_time': compilation_time,
        'execution_time': execution_time
    }
    
    return results

def run_parameter_sweep():
    """Run benchmarks varying one parameter at a time"""
    
    # Default parameters
    default_batch_size = 128
    default_seq_len = 100
    default_n_features = 3
    default_depth = 4
    
    # Parameter ranges
    batch_sizes = [32, 64, 128, 256, 512]
    seq_lens = [50, 100, 200, 500, 1000]
    depths = [2, 3, 4, 5, 6]
    
    results = []
    
    # Display TensorFlow configuration
    print("TensorFlow devices:", tf.config.list_physical_devices())
    is_gpu = len(tf.config.list_physical_devices('GPU')) > 0
    print("GPU Available:", is_gpu)
    
    # Vary batch size
    print("\nVarying batch size...")
    for batch_size in batch_sizes:
        result = run_benchmark(
            batch_size=batch_size,
            seq_len=default_seq_len,
            n_features=default_n_features,
            depth=default_depth
        )
        results.append(result)
        print(f"Completed batch_size={batch_size}")
    
    # Vary sequence length
    print("\nVarying sequence length...")
    for seq_len in seq_lens:
        result = run_benchmark(
            batch_size=default_batch_size,
            seq_len=seq_len,
            n_features=default_n_features,
            depth=default_depth
        )
        results.append(result)
        print(f"Completed seq_len={seq_len}")
    
    # Vary depth
    print("\nVarying depth...")
    for depth in depths:
        result = run_benchmark(
            batch_size=default_batch_size,
            seq_len=default_seq_len,
            n_features=default_n_features,
            depth=depth
        )
        results.append(result)
        print(f"Completed depth={depth}")
    
    # Convert to DataFrame
    df = pd.DataFrame(results)
    
    # Save results
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    device_type = "gpu" if is_gpu else "cpu"
    csv_filename = f'tensorflow_signature_benchmarks_{device_type}_{timestamp}.csv'
    df.to_csv(csv_filename, index=False)
    
    # Save metadata about the system
    metadata = {
        'cpu_info': get_cpu_info(),
        'tensorflow_version': tf.__version__,
        'keras_sig_version': '1.0.2',
        'gpu_available': is_gpu,
        'gpu_devices': [device.name for device in tf.config.list_physical_devices('GPU')],
        'timestamp': timestamp,
        'system': platform.system(),
        'python_version': platform.python_version(),
        'platform': platform.platform()
    }
    
    with open(f'tensorflow_signature_benchmarks_{device_type}_metadata_{timestamp}.json', 'w') as f:
        json.dump(metadata, f, indent=4)
    
    return df, metadata

print("Starting TensorFlow signature benchmarks...")
df, metadata = run_parameter_sweep()
print("\nBenchmarks complete. Results saved to CSV.")
print("\nSystem information:")
for key, value in metadata.items():
    print(f"{key}: {value}")

display(df)

2025-01-05 23:39:28.850913: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-01-05 23:39:31.250401: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-01-05 23:39:31.257271: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-01-05 23:39:3

Starting TensorFlow signature benchmarks...
TensorFlow devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
GPU Available: True

Varying batch size...


2025-01-05 23:39:31.272286: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-01-05 23:39:31.272476: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-01-05 23:39:31.385844: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:998] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Completed batch_size=32
Completed batch_size=64
Completed batch_size=128
Completed batch_size=256
Completed batch_size=512

Varying sequence length...
Completed seq_len=50
Completed seq_len=100
Completed seq_len=200
Completed seq_len=500
Completed seq_len=1000

Varying depth...
Completed depth=2
Completed depth=3
Completed depth=4
Completed depth=5
Completed depth=6

Benchmarks complete. Results saved to CSV.

System information:
cpu_info: AMD Ryzen 9 5900X 12-Core Processor
tensorflow_version: 2.16.1
keras_sig_version: 1.0.2
gpu_available: True
gpu_devices: ['/physical_device:GPU:0']
timestamp: 20250105_233934
system: Linux
python_version: 3.11.0rc1
platform: Linux-5.15.0-102-generic-x86_64-with-glibc2.35


Unnamed: 0,batch_size,seq_len,n_features,depth,compilation_time,execution_time
0,32,100,3,4,52.453279,16.492653
1,64,100,3,4,16.370535,16.290522
2,128,100,3,4,16.453028,16.391492
3,256,100,3,4,16.476631,16.332078
4,512,100,3,4,17.096519,16.365337
5,128,50,3,4,16.474247,16.453505
6,128,100,3,4,16.405821,16.378903
7,128,200,3,4,16.448021,16.306758
8,128,500,3,4,16.875505,16.377831
9,128,1000,3,4,16.724586,16.412354


In [5]:
import keras
import numpy as np
import pandas as pd
import time
import json
from datetime import datetime
import platform
from iisignature_tensorflow_2 import SigLayer as iisigSigLayer

def get_cpu_info():
    if platform.system() == "Linux":
        try:
            with open('/proc/cpuinfo', 'r') as f:
                for line in f:
                    if 'model name' in line:
                        return line.split(':')[1].strip()
        except:
            pass
    return platform.processor() or platform.machine()

class SigNet(keras.Model):
    def __init__(self, in_channels, out_dimension, sig_input_size, sig_depth, sig_layer_class):
        super().__init__()
        self.dense1 = keras.layers.Dense(sig_input_size)
        self.signature = sig_layer_class(sig_depth)
        self.linear = keras.layers.Dense(out_dimension)
        
    def call(self, inputs):
        dense_out = self.dense1(inputs)
        y = self.signature(dense_out)
        z = self.linear(y)
        return z

def create_data(num_sample, seq_len, n_feature, n_ahead):
    X = np.random.randn(num_sample, seq_len, n_feature).astype(np.float32)
    y = np.random.randn(num_sample, n_ahead).astype(np.float32)
    return X, y

def measure_compilation_time(model, X, y, batch_size, use_jit=True):
    model.compile(
        optimizer=keras.optimizers.Adam(),
        loss="mse",
        jit_compile=use_jit
    )
    
    sample_X = X[:batch_size]
    compilation_start = time.time()
    if use_jit:
        model.predict(sample_X, verbose=0)
    compilation_time = time.time() - compilation_start
    
    return compilation_time

def train_model(model, X, y, batch_size, epochs=10):
    training_start = time.time()
    history = model.fit(
        X, y,
        batch_size=batch_size,
        epochs=epochs,
        verbose=0
    )
    training_time = time.time() - training_start
    
    return {
        'final_loss': float(history.history['loss'][-1]),
        'training_time': training_time,
        'avg_epoch_time': training_time/epochs
    }

def run_benchmarks():
    seq_lens = [100, 200, 350, 500]
    sig_input_sizes = [2, 4, 6, 10]
    depths = [2, 3, 4]
    
    batch_size = 128
    n_feature = 20
    n_ahead = 10
    epochs = 10
    
    results = []
    sig_layers = {
        'keras_sig': (keras_sig.SigLayer, True),  # (layer_class, use_jit)
    }
    
    total_runs = len(seq_lens) * len(sig_input_sizes) * len(depths) * len(sig_layers)
    current_run = 0
    
    for seq_len in seq_lens:
        num_sample = batch_size * 100 - 35
        
        for sig_input_size in sig_input_sizes:
            for depth in depths:
                for layer_name, (layer_class, use_jit) in sig_layers.items():
                    current_run += 1
                    print(f"\nRun {current_run}/{total_runs}")
                    print(f"Parameters: seq_len={seq_len}, sig_input_size={sig_input_size}, depth={depth}")
                    print(f"Implementation: {layer_name} (JIT: {use_jit})")
                    
                    X, y = create_data(num_sample, seq_len, n_feature, n_ahead)
                    model = SigNet(n_feature, n_ahead, sig_input_size, depth, layer_class)
                    
                    compilation_time = measure_compilation_time(model, X, y, batch_size, use_jit)
                    training_results = train_model(model, X, y, batch_size, epochs)
                    
                    results.append({
                        'seq_len': seq_len,
                        'sig_input_size': sig_input_size,
                        'depth': depth,
                        'implementation': layer_name,
                        'jit_enabled': use_jit,
                        'compilation_time': compilation_time,
                        'training_time': training_results['training_time'],
                        'avg_epoch_time': training_results['avg_epoch_time'],
                        'final_loss': training_results['final_loss']
                    })
                    
                    del model
                    keras.backend.clear_session()
                    
                    df = pd.DataFrame(results)
                    
                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    device_type = "gpu" if keras.backend.backend() == "jax" else "cpu"
                    
                    csv_filename = f'keras_benchmarks_{device_type}_{timestamp}.csv'
                    df.to_csv(csv_filename, index=False)
                    
                    metadata = {
                        'cpu_info': get_cpu_info(),
                        'keras_backend': keras.backend.backend(),
                        'keras_version': keras.__version__,
                        'keras_sig_version': '1.0.2',
                        'iisignature_version': '0.1.0',
                        'timestamp': timestamp,
                        'system': platform.system(),
                        'python_version': platform.python_version(),
                        'platform': platform.platform(),
                        'batch_size': batch_size,
                        'n_feature': n_feature,
                        'n_ahead': n_ahead,
                        'epochs': epochs
                    }
                    
                    with open(f'keras_benchmarks_{device_type}_metadata_{timestamp}.json', 'w') as f:
                        json.dump(metadata, f, indent=4)

    df = pd.DataFrame(results)
    
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    device_type = "gpu" if keras.backend.backend() == "jax" else "cpu"
    
    csv_filename = f'keras_benchmarks_{device_type}_{timestamp}.csv'
    df.to_csv(csv_filename, index=False)
    
    metadata = {
        'cpu_info': get_cpu_info(),
        'keras_backend': keras.backend.backend(),
        'keras_version': keras.__version__,
        'keras_sig_version': '1.0.2',
        'iisignature_version': '0.1.0',
        'timestamp': timestamp,
        'system': platform.system(),
        'python_version': platform.python_version(),
        'platform': platform.platform(),
        'batch_size': batch_size,
        'n_feature': n_feature,
        'n_ahead': n_ahead,
        'epochs': epochs
    }
    
    with open(f'keras_benchmarks_{device_type}_metadata_{timestamp}.json', 'w') as f:
        json.dump(metadata, f, indent=4)
    
    return df, metadata

print("Starting Keras signature benchmarks...")
df, metadata = run_benchmarks()
print("\nBenchmarks complete. Results saved to CSV and JSON files.")
print("\nSystem information:")
for key, value in metadata.items():
    print(f"{key}: {value}")

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
print("\nBenchmark Results:")
print(df)

Starting Keras signature benchmarks...

Run 1/48
Parameters: seq_len=100, sig_input_size=2, depth=2
Implementation: keras_sig (JIT: True)


I0000 00:00:1736120375.739739  111940 service.cc:145] XLA service 0x7f28840045f0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1736120375.739777  111940 service.cc:153]   StreamExecutor device (0): NVIDIA GeForce RTX 4090, Compute Capability 8.9
2025-01-05 23:39:35.783525: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8906
I0000 00:00:1736120375.929715  111940 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
2025-01-05 23:39:37.162269: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.



Run 2/48
Parameters: seq_len=100, sig_input_size=2, depth=3
Implementation: keras_sig (JIT: True)

Run 3/48
Parameters: seq_len=100, sig_input_size=2, depth=4
Implementation: keras_sig (JIT: True)

Run 4/48
Parameters: seq_len=100, sig_input_size=4, depth=2
Implementation: keras_sig (JIT: True)

Run 5/48
Parameters: seq_len=100, sig_input_size=4, depth=3
Implementation: keras_sig (JIT: True)

Run 6/48
Parameters: seq_len=100, sig_input_size=4, depth=4
Implementation: keras_sig (JIT: True)

Run 7/48
Parameters: seq_len=100, sig_input_size=6, depth=2
Implementation: keras_sig (JIT: True)

Run 8/48
Parameters: seq_len=100, sig_input_size=6, depth=3
Implementation: keras_sig (JIT: True)

Run 9/48
Parameters: seq_len=100, sig_input_size=6, depth=4
Implementation: keras_sig (JIT: True)

Run 10/48
Parameters: seq_len=100, sig_input_size=10, depth=2
Implementation: keras_sig (JIT: True)

Run 11/48
Parameters: seq_len=100, sig_input_size=10, depth=3
Implementation: keras_sig (JIT: True)

Run 1