In [1]:
import torch
import sigkernel2
import sigkernel
import csv
import timeit
import math
device = torch.cuda.device('cuda')

In [2]:
def generate(batch_size, length, dimension, device = torch.device('cpu')):
  random_walks = torch.randn(batch_size, length, dimension, dtype = torch.double, device = device)
  random_walks = torch.cumsum(random_walks, dim=1)
  return random_walks

In [3]:
sig = sigkernel2.SigKernel(sigkernel2.RBFKernel(1), 2)
sig1 = sigkernel.SigKernel(sigkernel.RBFKernel(1), 2)

In [4]:
# Warm up to ensure JIT compilation
X = generate(1, 128, 7, device = torch.device('cuda:0'))
sig.compute_kernel(X, X, strided=False, alt_scheme=False)
sig.compute_kernel(X, X, strided=True, alt_scheme=False)
sig.compute_kernel(X, X, strided=False, alt_scheme=True)
sig.compute_kernel(X, X, strided=True, alt_scheme=True)
sig1.compute_kernel(X, X)



tensor([1.3791e+32], device='cuda:0', dtype=torch.float64)

In [72]:
torch.manual_seed(42)

if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

dyadic_order = 0

sig = sigkernel2.SigKernel(sigkernel2.RBFKernel(1), dyadic_order = 0)
sig1 = sigkernel.SigKernel(sigkernel.RBFKernel(1), dyadic_order = 0)

lengths = t = [16 * 2**i for i in range(6)]
lengths.append(1023)

execs = 100
reps = 10

with open("../speciale/rplots/bench_gpu_low.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Implementation", "Length", "Dyadic Order", "Run", "Result"])
      
    for l in lengths:
        
        x = generate(100, l, 7, device = torch.device('cuda:0'))
        
        impl = {
            "Baseline": lambda: sig1.compute_kernel(x, x),
            "Improved": lambda: sig.compute_kernel(x, x, strided=False, alt_scheme=False)
        }
        
        for name, func in impl.items():
            # Use timeit with the function directly
            timing_results = timeit.repeat(func, number=execs, repeat=reps)
            
            # Save results to CSV
            for run, result in enumerate(timing_results, start=1):
                writer.writerow([name, l, dyadic_order, run, result / execs])

In [73]:
torch.manual_seed(42)

if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

dyadic_order = 1

sig = sigkernel2.SigKernel(sigkernel2.RBFKernel(1), dyadic_order = 0)
sig1 = sigkernel.SigKernel(sigkernel.RBFKernel(1), dyadic_order = 0)

lengths = t = [16 * 2**i for i in range(5)]
lengths.append(511)

execs = 100
reps = 10

with open("../speciale/rplots/bench_gpu_low_1.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Implementation", "Length", "Dyadic Order", "Run", "Result"])
      
    for l in lengths:
        
        x = generate(100, l, 7, device = torch.device('cuda:0'))
        
        impl = {
            "Baseline": lambda: sig1.compute_kernel(x, x),
            "Improved": lambda: sig.compute_kernel(x, x, strided=False, alt_scheme=False)
        }
        
        for name, func in impl.items():
            # Use timeit with the function directly
            timing_results = timeit.repeat(func, number=execs, repeat=reps)
            
            # Save results to CSV
            for run, result in enumerate(timing_results, start=1):
                writer.writerow([name, l, dyadic_order, run, result / execs])

In [32]:
torch.manual_seed(42)

if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

dyadic_order = 0

sig1 = sigkernel.SigKernel(sigkernel.RBFKernel(1), dyadic_order = 0)
x = generate(1, l, 7, device = torch.device('cpu'))

sig1.compute_kernel(x, x)

lengths = t = [16 * 2**i for i in range(6)]
lengths.append(1023)

execs = 100
reps = 10

with open("../speciale/rplots/bench_gpu_cpu.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Implementation", "Length", "Dyadic Order", "Run", "Result"])
      
    for l in lengths:
        
        x = generate(1, l, 7, device = torch.device('cuda:0'))
        x_h = x.cpu()
        
        impl = {
            "Baseline - CPU": lambda: sig1.compute_kernel(x_h, x_h),
            "Baseline - GPU": lambda: sig1.compute_kernel(x, x)
        }
        
        for name, func in impl.items():
            # Use timeit with the function directly
            timing_results = timeit.repeat(func, number=execs, repeat=reps)
            
            # Save results to CSV
            for run, result in enumerate(timing_results, start=1):
                writer.writerow([name, l, dyadic_order, run, result / execs])

TypeError: cython_backend.sigkernel_cython() takes exactly one argument (2 given)

In [24]:
def bench_memory(func):
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    func()
    return torch.cuda.max_memory_allocated() / 1024 ** 2

In [28]:
orders = [0, 1, 2, 3]

with open("../speciale/rplots/bench_gpu_mem.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Implementation", "Length", "Dyadic Order", "Result"])
    
    x = generate(100, 128, 7, device = torch.device('cuda:0'))
    
    for ord in orders:
        sig = sigkernel2.SigKernel(sigkernel2.RBFKernel(1), ord)
        sig1 = sigkernel.SigKernel(sigkernel.RBFKernel(1), ord)
        
        impl = {
            "Baseline": lambda: sig1.compute_kernel(x, x),
            "Improved": lambda: sig.compute_kernel(x, x, strided=False, alt_scheme=False)
        }
        
        for name, func in impl.items():
            # Use timeit with the function directly
            result = bench_memory(func)
            
            writer.writerow([name, 128, ord, result]) 