In [1]:
import torch
import sigkernel2
import sigkernel
import csv
import timeit
import math
device = torch.cuda.device('cuda')

In [3]:
def generate(batch_size, length, dimension, device = torch.device('cpu')):
  random_walks = torch.randn(batch_size, length, dimension, dtype = torch.double, device = device)
  random_walks = torch.cumsum(random_walks, dim=1)
  return random_walks

In [7]:
sig = sigkernel2.SigKernel(sigkernel2.LinearKernel(0.05), 1)
sig1 = sigkernel.SigKernel(sigkernel.LinearKernel(0.05), 1)

In [5]:
# Warm up to ensure JIT compilation
X = generate(2, 128, 7, device = torch.device('cuda:0'))
sig.compute_kernel(X, X, strided=False, alt_scheme=False)
sig.compute_kernel(X, X, strided=True, alt_scheme=False)
sig.compute_kernel(X, X, strided=False, alt_scheme=True)
sig.compute_kernel(X, X, strided=True, alt_scheme=True)
sig.compute_kernel(X, X, lean=True)
sig.compute_kernel(X, X, strided=True, lean=True)
sig1.compute_kernel(X, X)
X = generate(100, 1024, 7, device = torch.device('cuda:0'))



In [42]:
torch.manual_seed(42)

if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

dyadic_order = 0

sig = sigkernel2.SigKernel(sigkernel2.LinearKernel(0.05), dyadic_order = 0)
sig1 = sigkernel.SigKernel(sigkernel.LinearKernel(0.05), dyadic_order = 0)

lengths = t = [16 * 2**i for i in range(6)]
lengths.append(1023)

execs = 100
reps = 10

with open("../speciale/rplots/bench_gpu_low.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Implementation", "Length", "Dyadic Order", "Run", "Result"])
      
    for l in lengths:
        print(f"\rLength: {l}", end="")
        
        x = generate(100, l, 7, device = torch.device('cuda:0'))
        
        impl = {
            "Baseline": lambda: sig1.compute_kernel(x, x),
            "Improved": lambda: sig.compute_kernel(x, x, lean=True)
        }
        
        for name, func in impl.items():
            # Use timeit with the function directly
            timing_results = timeit.repeat(func, number=execs, repeat=reps)
            
            # Save results to CSV
            for run, result in enumerate(timing_results, start=1):
                writer.writerow([name, l, dyadic_order, run, result / execs])

Length: 1023

In [43]:
torch.manual_seed(42)

if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

dyadic_order = 1

sig = sigkernel2.SigKernel(sigkernel2.LinearKernel(0.05), dyadic_order = dyadic_order)
sig1 = sigkernel.SigKernel(sigkernel.LinearKernel(0.05), dyadic_order = dyadic_order)

lengths = t = [16 * 2**i for i in range(5)]
lengths.append(511)

execs = 100
reps = 10

with open("../speciale/rplots/bench_gpu_low_1.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Implementation", "Length", "Dyadic Order", "Run", "Result"])
      
    for l in lengths:
        print(f"\rLength: {l}", end="")
        
        x = generate(100, l, 7, device = torch.device('cuda:0'))
        
        impl = {
            "Baseline": lambda: sig1.compute_kernel(x, x),
            "Improved": lambda: sig.compute_kernel(x, x, lean=True)
        }
        
        for name, func in impl.items():
            # Use timeit with the function directly
            timing_results = timeit.repeat(func, number=execs, repeat=reps)
            
            # Save results to CSV
            for run, result in enumerate(timing_results, start=1):
                writer.writerow([name, l, dyadic_order, run, result / execs])

Length: 16



Length: 1024

In [53]:
torch.manual_seed(42)

if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

dyadic_order = 0

sig1 = sigkernel.SigKernel(sigkernel.LinearKernel(0.05), dyadic_order = dyadic_order)

lengths = t = [16 * 2**i for i in range(6)]
lengths.append(1023)

execs = 100
reps = 10

with open("../speciale/rplots/bench_gpu_cpu.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Implementation", "Length", "Dyadic Order", "Run", "Result"])
      
    for l in lengths:
        print(f"\rLength: {l}", end="")
        
        x = generate(10, l, 7, device = torch.device('cuda:0'))
        x_h = x.cpu()
        
        impl = {
            "Baseline - CPU": lambda: sig1.compute_kernel(x_h, x_h),
            "Baseline - GPU": lambda: sig1.compute_kernel(x, x)
        }
        
        for name, func in impl.items():
            # Use timeit with the function directly
            timing_results = timeit.repeat(func, number=execs, repeat=reps)
            
            # Save results to CSV
            for run, result in enumerate(timing_results, start=1):
                writer.writerow([name, l, dyadic_order, run, result / execs])

Length: 16



Length: 1023

In [45]:
def bench_memory(func):
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    func()
    return torch.cuda.max_memory_allocated() / 1024 ** 2

In [46]:
orders = [0, 1, 2, 3, 4]

with open("../speciale/rplots/bench_gpu_mem.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Implementation", "Length", "Dyadic Order", "Result"])
    
    x = generate(100, 64, 7, device = torch.device('cuda:0'))
    
    for ord in orders:
        print(f"\rOrder: {ord}", end="")
        sig = sigkernel2.SigKernel(sigkernel2.LinearKernel(0.05), ord)
        sig1 = sigkernel.SigKernel(sigkernel.LinearKernel(0.05), ord)
        
        impl = {
            "Baseline": lambda: sig1.compute_kernel(x, x),
            "Improved": lambda: sig.compute_kernel(x, x, lean=True)
        }
        
        for name, func in impl.items():
            # Use timeit with the function directly
            result = bench_memory(func)
            
            writer.writerow([name, 128, ord, result]) 

Order: 4

In [54]:
lengths = [32, 64, 128, 256, 512, 1023]

with open("../speciale/rplots/bench_gpu_mem_len.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Implementation", "Length", "Dyadic Order", "Result"])
    
    for l in lengths:
        print(f"\rLength: {l}", end="")
        x = generate(100, l, 7, device = torch.device('cuda:0'))
        sig = sigkernel2.SigKernel(sigkernel2.LinearKernel(0.05), 0)
        sig1 = sigkernel.SigKernel(sigkernel.LinearKernel(0.05), 0)
        
        impl = {
            "Baseline": lambda: sig1.compute_kernel(x, x),
            "Improved": lambda: sig.compute_kernel(x, x, lean=True)
        }
        
        for name, func in impl.items():
            # Use timeit with the function directly
            result = bench_memory(func)
            
            writer.writerow([name, l, 0, result]) 

Length: 1023

In [13]:
torch.manual_seed(42)

if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

dyadic_order = 0

sig = sigkernel2.SigKernel(sigkernel2.LinearKernel(0.05), dyadic_order = dyadic_order)

lengths = t = [32, 64, 128, 256, 512, 768, 1024, 1280, 1536, 1792, 2048]

execs = 100
reps = 10

with open("../speciale/rplots/bench_strided.csv", "w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["Implementation", "Length", "Dyadic Order", "Run", "Result"])
      
    for l in lengths:
        print(f"\rLength: {l}", end="")
        
        x = generate(10, l, 7, device = torch.device('cuda:0'))
        
        impl = {
            "Strided": lambda: sig.compute_kernel(x, x, strided=True, lean=True),
            "Unstrided": lambda: sig.compute_kernel(x, x, lean=True)
        }
        
        for name, func in impl.items():
            # Use timeit with the function directly
            timing_results = timeit.repeat(func, number=execs, repeat=reps)
            
            # Save results to CSV
            for run, result in enumerate(timing_results, start=1):
                writer.writerow([name, l, dyadic_order, run, result / execs])

Length: 32



Length: 5118