In [12]:
import time

import torch
import torch_scatter as ts

In [13]:
def get_uniform_data(n, type):
    output = type(n).fill_(0)
    index = torch.arange(0, n, out=torch.LongTensor())
    input = type(n).fill_(1)
    return output, index, input

def get_even_data(n, type):
    output = type(n).fill_(0)
    index = torch.LongTensor(n).fill_(0)
    input = type(n).fill_(1)
    return output, index, input

In [15]:
def benchmark(name, output, index, input):
    func = getattr(ts, name)
    runtimes = []
    for type in types:
        runtime = 0
        for i in range(num_runs):
            t = time.process_time()
            func(output, index, input, 0)
            runtime += time.process_time() - t
        runtime /= num_runs
        runtimes.append(runtime)
    return runtimes

def benchmark_pytorch_scatter_add(output, index, input):
    runtimes = []
    for type in types:
        runtime = 0
        for i in range(num_runs):
            t = time.process_time()
            output.scatter_add_(0, index, input)
            runtime += time.process_time() - t
        runtime /= num_runs
        runtimes.append(runtime)
    return runtimes

In [14]:
types = [torch.FloatTensor, torch.DoubleTensor, torch.ByteTensor, torch.CharTensor,
         torch.ShortTensor, torch.IntTensor, torch.LongTensor]

n = 1000000
num_runs = 100

# CPU Benchmark

## Compare to PyTorch `scatter_add_`

In [16]:
output, index, input = get_uniform_data(n, type=torch.FloatTensor)
runtimes = benchmark_pytorch_scatter_add(output, index, input)
print(runtimes)

output, index, input = get_even_data(n, type=torch.FloatTensor)
runtimes = benchmark_pytorch_scatter_add(output, index, input)
print(runtimes)

[0.0013608833600000025, 0.0010018577800000373, 0.001011737080000028, 0.0010304318000000912, 0.0010575933199999277, 0.001046791089999779, 0.001026226260000076]
[0.0024379739199999763, 0.0020916335999999623, 0.0021022859400000016, 0.0020662273700000535, 0.002074936339999951, 0.002058829469999992, 0.002079201120000054]


In [17]:
output, index, input = get_uniform_data(n, type=torch.FloatTensor)
runtimes = benchmark('scatter_add_', output, index, input)
print(runtimes)

output, index, input = get_even_data(n, type=torch.FloatTensor)
runtimes = benchmark('scatter_add_', output, index, input)
print(runtimes)

[0.00494133797000007, 0.004897300620000032, 0.0045747565699999805, 0.004568737810000112, 0.004554663379999937, 0.004549899970000002, 0.004568819980000019]
[0.0049159168400001365, 0.004562161230000079, 0.004574221990000033, 0.004564846730000127, 0.004551143499999953, 0.00455917282999998, 0.004575252730000017]


## Compare internally

# GPU Benchmark

## Compare to PyTorch `scatter_add_`

## Compare internally