In [1]:
import torch
import numpy as np
import time
import cv2
import os

In [2]:
### PyTorch

In [3]:
const255torch = torch.tensor(255.0)
const128torch = torch.tensor(128.0)

In [4]:
### Nested

In [5]:
def darken_blend_8_torch(base, active):
  return torch.where(torch.greater(base, active), active, base)

In [6]:
def color_burn_8_torch(base, active):
  return torch.where(torch.eq(active, 0), const255torch, const255torch - (const255torch - base) / active)

In [7]:
def lighten_blend_8_torch(base, active):
  return torch.where(torch.less(base, active), active, base)

In [8]:
def color_dodge_8_torch(base, active):
  return torch.where(torch.eq(active, const255torch), const255torch, base / (const255torch - active))

In [9]:
def overlay_blend_8_torch(base, active):
  return torch.where(torch.greater_equal(base, const128torch), 2 * base + base - 2 * base * base / const255torch - const128torch, 2 * base * base / const128torch)

In [10]:
def multiply_blend_8_torch(base, active):
  return base * active / const255torch

In [11]:
def linear_burn_8_torch(base, active):
  return base + active - const255torch

In [12]:
def screen_blend_8_torch(base, active):
  return base + active - base * active / const255torch

In [13]:
def linear_dodge_8_torch(base, active):
  return base + active

In [14]:
### Single

In [15]:
def normal_blend_f_torch(base, active, opacity):
  return opacity * active + (1-opacity)*base

In [16]:
def normal_blend_8_torch(base, active, opacity):
  return opacity * active + (const255torch - opacity) * base

In [17]:
### Runner

In [18]:
size = 512
np.random.seed(1)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

list1 = np.random.rand(size).astype('float32')
list2 = np.random.rand(size).astype('float32')
list3 = np.random.rand(size)
list1 = torch.from_numpy(list1).to(device)
list2 = torch.from_numpy(list2).to(device)

int1 = torch.from_numpy(np.random.rand(1).astype('float32')).to(device)

In [19]:
folder = "./data/"

img_files = [os.path.join(folder, f) for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]

bases = []
actives = []

for _file in img_files:
    img = cv2.imread(_file, cv2.IMREAD_GRAYSCALE)
    rnd = np.random.rand(*img.shape).astype('float32')

    bases.append(img)
    actives.append(rnd)

In [20]:
def runner(bases, actives, f):
    total_time = 0
    for i in range(len(bases)):
        b = torch.from_numpy(bases[i]).to(device)
        a = torch.from_numpy(actives[i]).to(device)
        start_time = time.perf_counter()
        f(b, a)
        end_time = time.perf_counter()
        del a
        del b
        total_time += (end_time - start_time) * 1000
    return total_time

def mat_timer(bases, actives, f):
    runs = 5
    times = []
    for _ in range(runs):
        times.append(runner(bases, actives, f))
    times = np.array(times)
    print(f"{np.average(times)}ms +/- {np.std(times)}ms")

In [21]:
mat_timer(bases, actives, darken_blend_8_torch)

94.31348419748247ms +/- 6.106954591227638ms


In [22]:
mat_timer(bases, actives, color_burn_8_torch)

231.12374413758516ms +/- 4.916791931777994ms


In [23]:
mat_timer(bases, actives, lighten_blend_8_torch)

90.37575870752335ms +/- 1.0758306560045938ms


In [24]:
mat_timer(bases, actives, color_dodge_8_torch)

194.29245134815574ms +/- 3.0867953378090833ms


In [25]:
mat_timer(bases, actives, overlay_blend_8_torch) 

335.55151484906673ms +/- 2.671636618538504ms


In [26]:
mat_timer(bases, actives, multiply_blend_8_torch)

59.6296947915107ms +/- 0.5539678870297035ms


In [27]:
mat_timer(bases, actives, linear_burn_8_torch)

59.55152353271842ms +/- 0.6578866682008498ms


In [28]:
mat_timer(bases, actives, screen_blend_8_torch)

110.6073911767453ms +/- 0.2183459339447234ms


In [29]:
mat_timer(bases, actives, linear_dodge_8_torch)

31.730333203449845ms +/- 0.2485366967520119ms


In [30]:
%timeit list3 = normal_blend_f_torch(list1, list2, int1)

10.2 µs ± 60.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [31]:
%timeit list3 = normal_blend_8_torch(list1, list2, int1)

9.43 µs ± 29.6 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)
