In [2]:
import numpy as np
import pandas as pd
import torch

import matplotlib.pyplot as plt

import time

#### In this notebook, we will benchmark some basic operations with tensors on the cpu and on the gpu

# Benchmarking Tensor Operations



## PART 1

In [6]:
# Create three compatible random tensors 
x = torch.tensor([1, 2, 3])
y = torch.zeros(2, 3)
z = torch.randn(2, 3)

In [7]:
# element-wise addition of two tensors
a = x + y
print(a)

tensor([[1., 2., 3.],
        [1., 2., 3.]])


In [8]:
# element-wise multiplication of two tensors
b = y * z
print(b)

tensor([[0., -0., -0.],
        [-0., 0., -0.]])


In [9]:
# matrix multiplication of two tensors
c = y @ z.T
print(c)

tensor([[0., 0.],
        [0., 0.]])


## PART 2

In [10]:
def benchmark(op, *args):
    start = time.time()
    op(*args)
    end = time.time()
    return end - start

In [11]:
def run_benchmarks(op, sizes):
    for size in sizes:
        x = torch.randn(size, size)
        y = torch.randn(size, size)
        z = torch.randn(size, size)

        cpu_time = benchmark(op, x, y, z)

        if torch.cuda.is_available():
            x = x.cuda()
            y = y.cuda()
            z = z.cuda()

            gpu_time = benchmark(op, x, y, z)

            print(f"{op.__name__}: size={size}, CPU={cpu_time:.4f}s, GPU={gpu_time:.4f}s")
        else:
            print(f"{op.__name__}: size={size}, CPU={cpu_time:.4f}s")


In [14]:
sizes = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000]

for op in [lambda x, y, z: x + y, lambda x, y, z: y * z, lambda x, y, z: y @ z.T]:
    run_benchmarks(op, sizes)

<lambda>: size=100, CPU=0.0000s, GPU=0.0001s
<lambda>: size=200, CPU=0.0005s, GPU=0.0001s
<lambda>: size=300, CPU=0.0001s, GPU=0.0000s
<lambda>: size=400, CPU=0.0001s, GPU=0.0004s
<lambda>: size=500, CPU=0.0002s, GPU=0.0001s
<lambda>: size=600, CPU=0.0007s, GPU=0.0000s
<lambda>: size=700, CPU=0.0011s, GPU=0.0001s
<lambda>: size=800, CPU=0.0006s, GPU=0.0001s
<lambda>: size=900, CPU=0.0011s, GPU=0.0000s
<lambda>: size=1000, CPU=0.0013s, GPU=0.0001s
<lambda>: size=100, CPU=0.0000s, GPU=0.0001s
<lambda>: size=200, CPU=0.0001s, GPU=0.0000s
<lambda>: size=300, CPU=0.0001s, GPU=0.0000s
<lambda>: size=400, CPU=0.0001s, GPU=0.0000s
<lambda>: size=500, CPU=0.0002s, GPU=0.0000s
<lambda>: size=600, CPU=0.0003s, GPU=0.0001s
<lambda>: size=700, CPU=0.0004s, GPU=0.0001s
<lambda>: size=800, CPU=0.0005s, GPU=0.0000s
<lambda>: size=900, CPU=0.0008s, GPU=0.0000s
<lambda>: size=1000, CPU=0.0007s, GPU=0.0000s
<lambda>: size=100, CPU=0.0003s, GPU=4.2019s
<lambda>: size=200, CPU=0.0009s, GPU=0.0001s
<lambda>