In [1]:
import torch
import numpy as np
import time
import cv2
import os

In [2]:
rng = np.random.default_rng(1)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
folder = "./data/"

img_files = [os.path.join(folder, f) for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]

bases = []
actives = []

for _file in img_files:
    img = cv2.imread(_file, cv2.IMREAD_GRAYSCALE).astype(np.uint8)
    rnd = (rng.random(img.shape, dtype = np.float32).astype(np.uint8) * 255).astype(np.uint8)
    bases.append(img)
    actives.append(rnd)

In [4]:
def mat_runner(bases, actives, f):
    total_time = 0
    for i in range(len(bases)):
        b = torch.from_numpy(bases[i]).to(dtype=torch.uint8).to(device)
        a = torch.from_numpy(actives[i]).to(dtype=torch.uint8).to(device)
        start_time = time.perf_counter()
        f(b, a)
        end_time = time.perf_counter()
        del a
        del b
        total_time += (end_time - start_time) * 1000
    return total_time

def vec_runner_int(bases, actives, f):
    total_time = 0
    for i in range(len(bases)):
        base = bases[i].flatten()
        active = actives[i].flatten()
        b = torch.from_numpy(base).to(dtype=torch.uint8).to(device)
        a = torch.from_numpy(active).to(dtype=torch.uint8).to(device)
        opacity = torch.from_numpy(rng.random(1, dtype = np.float32).astype(np.uint8)).to(dtype=torch.uint8).to(device)
        start_time = time.perf_counter()
        f(b, a, opacity)
        end_time = time.perf_counter()
        del a
        del b
        total_time += (end_time - start_time) * 1000
    return total_time

def vec_runner_float(bases, actives, f):
    total_time = 0
    for i in range(len(bases)):
        base = bases[i].flatten().astype(np.float32)
        active = actives[i].flatten().astype(np.float32)
        b = torch.from_numpy(base).to(dtype=torch.float32).to(device)
        a = torch.from_numpy(active).to(dtype=torch.float32).to(device)
        opacity = torch.from_numpy(rng.random(1, dtype = np.float32)).to(dtype=torch.float32).to(device)
        start_time = time.perf_counter()
        f(b, a, opacity)
        end_time = time.perf_counter()
        del a
        del b
        total_time += (end_time - start_time) * 1000
    return total_time



def timer(input1, input2, f, runner):
    runs = 10
    times = []
    for _ in range(runs):
        times.append(runner(input1, input2, f))
    times = np.array(times)
    print(f"{f.__name__}")
    print(f"{np.average(times)}ms +/- {np.std(times)}ms")

In [33]:
### Nested

In [13]:
def darken_blend_8_torch(base, active):
  return torch.where(torch.greater(base, active), active, base)

In [35]:
def color_burn_8_torch(base, active):
  return torch.where(torch.eq(active, 0), 255, (255) - (((255) - (base)) // (active)))

In [36]:
def lighten_blend_8_torch(base, active):
  return torch.where(torch.less(base, active), active, base)

In [37]:
def color_dodge_8_torch(base, active):
  return torch.where(torch.eq(active, 255), 255, (base) // ((255) - (active)))

In [38]:
def overlay_blend_8_torch(base, active):
  return torch.where(torch.greater_equal(base, 128), ((((2) * (base)) + (base)) - ((((2) * (base)) * (base)) // (255))) - (255), (((2) * (base)) * (base)) // (255))

In [39]:
def multiply_blend_8_torch(base, active):
  return ((base) * (active)) // (255)

In [40]:
def linear_burn_8_torch(base, active):
  return ((base) + (active)) - (255)

In [41]:
def screen_blend_8_torch(base, active):
  return ((base) + (active)) - (((base) * (active)) // (255))

In [42]:
def linear_dodge_8_torch(base, active):
  return (base) + (active)

In [43]:
### Single

In [44]:
def normal_blend_f_torch(base, active, opacity):
  return ((opacity) * (active)) + (((1) - (opacity)) * (base))

In [45]:
def normal_blend_8_torch(base, active, opacity):
  return ((opacity) * (active)) + (((255) - (opacity)) * (base))

In [14]:
timer(bases, actives, darken_blend_8_torch, mat_runner)

darken_blend_8_torch
1.798919402062893ms +/- 0.7368922269424595ms


In [47]:
timer(bases, actives, color_burn_8_torch, mat_runner)

color_burn_8_torch
18.854814488440752ms +/- 0.19408251504103768ms


In [48]:
timer(bases, actives, lighten_blend_8_torch, mat_runner)

lighten_blend_8_torch
5.7126987259835005ms +/- 0.04121597265064468ms


In [49]:
timer(bases, actives, color_dodge_8_torch, mat_runner)

color_dodge_8_torch
15.784840658307076ms +/- 0.22839506571085957ms


In [50]:
timer(bases, actives, overlay_blend_8_torch, mat_runner)

overlay_blend_8_torch
34.80057017877698ms +/- 0.24415805462922735ms


In [51]:
timer(bases, actives, multiply_blend_8_torch, mat_runner)

multiply_blend_8_torch
6.23552929610014ms +/- 0.10504753311250051ms


In [52]:
timer(bases, actives, linear_burn_8_torch, mat_runner)

linear_burn_8_torch
5.649316171184182ms +/- 0.07622874558392068ms


In [53]:
timer(bases, actives, screen_blend_8_torch, mat_runner)

screen_blend_8_torch
11.120167281478643ms +/- 0.13777829044284606ms


In [54]:
timer(bases, actives, linear_dodge_8_torch, mat_runner)

linear_dodge_8_torch
2.6704106014221907ms +/- 0.0470401843385709ms


In [55]:
timer(bases, actives, normal_blend_f_torch, vec_runner_float)

normal_blend_f_torch
13.975820783525705ms +/- 0.13354431660815236ms


In [56]:
timer(bases, actives, normal_blend_8_torch, vec_runner_int)

normal_blend_8_torch
11.908606812357903ms +/- 0.266826537819847ms


In [5]:
torch.manual_seed(0)

def mat_runner_float(bases, actives, f):
    total_time = 0
    for i in range(len(bases)):
        base = bases[i].astype(np.float32)
        active = actives[i].astype(np.float32)
        b = torch.from_numpy(base).to(dtype=torch.float32).to(device)
        a = torch.from_numpy(active).to(dtype=torch.float32).to(device)
        opacity = torch.from_numpy(rng.random(1, dtype = np.float32)).to(dtype=torch.float32).to(device)
        start_time = time.perf_counter()
        f(b, a, opacity)
        end_time = time.perf_counter()
        del a
        del b
        total_time += (end_time - start_time) * 1000
    return total_time

In [16]:
def dissolve_blend_8_torch(base, active, opacity):
    return torch.where((opacity) - (((torch.randint(1, 2147483647, base.shape, dtype=torch.int32, device='cuda') % (100)) + (1)) // (100)) >= 0, active, base)

In [17]:
timer(bases, actives, dissolve_blend_8_torch, mat_runner_float)

dissolve_blend_8_torch
260.7847813698754ms +/- 0.3539786605415327ms
