In [1]:
import tensorflow as tf
import numpy as np
import time
import cv2
import os

2024-01-12 17:37:23.429820: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-01-12 17:37:23.530250: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-01-12 17:37:23.530282: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-01-12 17:37:23.544651: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-01-12 17:37:23.581928: I tensorflow/core/platform/cpu_feature_guar

In [2]:
rng = np.random.default_rng(1)

In [3]:
folder = "./data/"

img_files = [os.path.join(folder, f) for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]

bases = []
actives = []

for _file in img_files:
    img = cv2.imread(_file, cv2.IMREAD_GRAYSCALE).astype(np.uint8)
    rnd = (rng.random(img.shape, dtype = np.float32) * 255).astype(np.uint8)
    bases.append(img)
    actives.append(rnd)

In [4]:
tf.random.set_seed(0)

def mat_runner(bases, actives, f):
    total_time = 0
    for i in range(len(bases)):
        with tf.device('/GPU:0'):
            b = tf.convert_to_tensor(bases[i], np.uint8)
            a = tf.convert_to_tensor(actives[i], np.uint8)
            start_time = time.perf_counter()
            f(b, a)
            end_time = time.perf_counter()
            del a
            del b
            
        total_time += (end_time - start_time) * 1000
    return total_time

def mat_runner_float(bases, actives, f):
    total_time = 0
    for i in range(len(bases)):
        base = bases[i].astype(np.float32)
        active = actives[i].astype(np.float32)
        with tf.device('/GPU:0'):
            b = tf.convert_to_tensor(base, np.float32)
            a = tf.convert_to_tensor(active, np.float32)
            opacity = tf.convert_to_tensor(rng.random(1, dtype = np.float32), np.float32)
            start_time = time.perf_counter()
            f(b, a, opacity)
            end_time = time.perf_counter()
            del a
            del b
        
        total_time += (end_time - start_time) * 1000
    return total_time

def vec_runner_int(bases, actives, f):
    total_time = 0
    for i in range(len(bases)):
        base = bases[i].flatten()
        active = actives[i].flatten()
        with tf.device('/GPU:0'):
            b = tf.convert_to_tensor(base, np.uint8)
            a = tf.convert_to_tensor(active, np.uint8)
            opacity = tf.convert_to_tensor(rng.random(1, dtype = np.float32).astype(np.uint8), np.uint8)
            start_time = time.perf_counter()
            f(b, a, opacity)
            end_time = time.perf_counter()
            del a
            del b
        
        total_time += (end_time - start_time) * 1000
    return total_time

def vec_runner_float(bases, actives, f):
    total_time = 0
    for i in range(len(bases)):
        base = bases[i].flatten().astype(np.float32)
        active = actives[i].flatten().astype(np.float32)
        with tf.device('/GPU:0'):
            b = tf.convert_to_tensor(base, np.float32)
            a = tf.convert_to_tensor(active, np.float32)
            assert a.device.endswith('GPU:0')
            assert b.device.endswith('GPU:0')
            opacity = tf.convert_to_tensor(rng.random(1, dtype = np.float32), np.float32)
            start_time = time.perf_counter()
            f(b, a, opacity)
            end_time = time.perf_counter()
            del a
            del b
        
        total_time += (end_time - start_time) * 1000
    return total_time
    
def timer(input1, input2, f, runner):
    runs = 10
    times = []
    for _ in range(runs):
        times.append(runner(input1, input2, f))
    times = np.array(times)
    print(f"{f.__name__}")
    print(f"{np.average(times)}ms +/- {np.std(times)}ms")

In [None]:
### Tensorflow

In [None]:
def dissolve_blend_8_tf(base, active, opacity):
    return tf.where(tf.greater_equal(opacity - ((tf.random.uniform(tf.shape(base), 1, 2147483647) % 100) + 1) / 100, 0), active, base) 

In [None]:
def darken_blend_8_tf(base, active):
  return tf.where(tf.greater(base, active), active, base)

In [None]:
def color_burn_8_tf(base, active):
  return tf.where(tf.equal(active, 0), 255, (255) - (((255) - (base)) // (active)))

In [None]:
def lighten_blend_8_tf(base, active):
  return tf.where(tf.less(base, active), active, base)

In [None]:
def color_dodge_8_tf(base, active):
  return tf.where(tf.equal(active, 255), 255, (base) // ((255) - (active)))

In [None]:
def overlay_blend_8_tf(base, active):
  return tf.where(tf.greater_equal(base, 128), ((((2) * (base)) + (base)) - ((((2) * (base)) * (base)) // (255))) - (255), (((2) * (base)) * (base)) // (255))

In [None]:
def multiply_blend_8_tf(base, active):
  return ((base) * (active)) // (255)

In [None]:
def linear_burn_8_tf(base, active):
  return ((base) + (active)) - (255)

In [None]:
def screen_blend_8_tf(base, active):
  return ((base) + (active)) - (((base) * (active)) // (255))

In [None]:
def linear_dodge_8_tf(base, active):
  return (base) + (active)

In [None]:
def normal_blend_f_tf(base, active, opacity):
  return ((opacity) * (active)) + (((1) - (opacity)) * (base))

In [None]:
def normal_blend_8_tf(base, active, opacity):
  return ((opacity) * (active)) + (((255) - (opacity)) * (base))

In [108]:
timer(bases, actives, darken_blend_8_tf, mat_runner)

darken_blend_8_tf
214.20361720956862ms +/- 5.394366716981458ms


In [109]:
timer(bases, actives, color_burn_8_tf, mat_runner)

color_burn_8_tf
733.1378387287259ms +/- 5.817201229482697ms


In [110]:
timer(bases, actives, lighten_blend_8_tf, mat_runner)

lighten_blend_8_tf
214.54936317168176ms +/- 3.055144172695127ms


In [111]:
timer(bases, actives, color_dodge_8_tf, mat_runner)

color_dodge_8_tf
571.1265732999891ms +/- 6.46689854019949ms


In [112]:
timer(bases, actives, overlay_blend_8_tf, mat_runner)

overlay_blend_8_tf
1511.6393396165222ms +/- 13.745712152811908ms


In [113]:
timer(bases, actives, multiply_blend_8_tf, mat_runner)

multiply_blend_8_tf
260.3905257768929ms +/- 4.007972900205248ms


In [114]:
timer(bases, actives, linear_burn_8_tf, mat_runner)

linear_burn_8_tf
285.41208803653717ms +/- 5.8392000442114ms


In [115]:
timer(bases, actives, screen_blend_8_tf, mat_runner)

screen_blend_8_tf
510.63010660000145ms +/- 6.889555390328346ms


In [116]:
timer(bases, actives, linear_dodge_8_tf, mat_runner)

linear_dodge_8_tf
138.55432313866913ms +/- 2.4073303672681443ms


In [117]:
timer(bases, actives, normal_blend_f_tf, vec_runner_float)

normal_blend_f_tf
571.01951953955ms +/- 5.335591010085858ms


In [118]:
timer(bases, actives, normal_blend_8_tf, vec_runner_int)

normal_blend_8_tf
598.486645705998ms +/- 8.31914628954157ms


In [26]:
timer(bases, actives, dissolve_blend_8_tf, mat_runner_float)

<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'float32'>
<dtype: 'flo