<a href="https://colab.research.google.com/github/takayama-rado/trado_samples/blob/main/colab_files/exp_track_affine_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Load library

In [1]:
# Standard modules.
import gc
import sys
import time
from functools import partial

# CV/ML.
import numpy as np

import tensorflow as tf

In [2]:
print(f"Python:{sys.version}")
print(f"Numpy:{np.__version__}")
print(f"Tensorflow:{tf.__version__}")

Python:3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]
Numpy:1.23.5
Tensorflow:2.14.0


# 2. Load data

In [3]:
!wget https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static.npy

--2023-10-30 06:44:44--  https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static.npy
Resolving github.com (github.com)... 140.82.112.3
Connecting to github.com (github.com)|140.82.112.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static.npy [following]
--2023-10-30 06:44:44--  https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static.npy
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2300608 (2.2M) [application/octet-stream]
Saving to: ‘finger_far0_non_static.npy’


2023-10-30 06:44:45 (28.0 MB/s) - ‘finger_far0_non_static.npy’ saved [2300608/2300608]



In [4]:
!wget https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static_affine.npy

--2023-10-30 06:44:45--  https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static_affine.npy
Resolving github.com (github.com)... 140.82.113.4
Connecting to github.com (github.com)|140.82.113.4|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static_affine.npy [following]
--2023-10-30 06:44:45--  https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static_affine.npy
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2300608 (2.2M) [application/octet-stream]
Saving to: ‘finger_far0_non_static_affine.npy’


2023-10-30 06:44:45 (24.9 MB/s) - ‘finger_far0_non_static_affine.

In [5]:
!ls

finger_far0_non_static_affine.npy  finger_far0_non_static.npy  sample_data


# 3. Evaluation settings

In [6]:
def get_perf_str(val):
    token_si = ["", "m", "µ", "n", "p"]
    exp_si = [1, 1e3, 1e6, 1e9, 1e12]
    perf_str = f"{val:3g}s"
    si = ""
    sval = val
    for token, exp in zip(token_si, exp_si):
        if val * exp > 1.0:
            si = token
            sval = val * exp
            break
    perf_str = f"{sval:3g}{si}s"
    return perf_str

In [7]:
def print_perf_time(intervals, top_k=None):
    if top_k is not None:
        intervals = np.sort(intervals)[:top_k]
    min = intervals.min()
    max = intervals.max()
    mean = intervals.mean()
    std = intervals.std()

    smin = get_perf_str(min)
    smax = get_perf_str(max)
    mean = get_perf_str(mean)
    std = get_perf_str(std)
    if top_k:
        print(f"Top {top_k} summary: Max {smax}, Min {smin}, Mean +/- Std {mean} +/- {std}")
    else:
        print(f"Overall summary: Max {smax}, Min {smin}, Mean +/- Std {mean} +/- {std}")

In [8]:
class PerfMeasure():
    def __init__(self,
                 trials=100,
                 top_k=10):
        self.trials = trials
        self.top_k = top_k

    def __call__(self, func):
        gc.collect()
        gc.disable()
        intervals = []
        for _ in range(self.trials):
            start = time.perf_counter()
            func()
            end = time.perf_counter()
            intervals.append(end - start)
        intervals = np.array(intervals)
        print_perf_time(intervals)
        if self.top_k:
            print_perf_time(intervals, self.top_k)
        gc.enable()
        gc.collect()

In [9]:
TRIALS = 100
TOPK = 10
pmeasure = PerfMeasure(TRIALS, TOPK)

# 4. Implement affine transformation

## 4.1 Based on define-by-run (eager execution).

In [10]:
def get_affine_matrix_2d_tf_eager(center,
                                  trans,
                                  scale,
                                  rot,
                                  skew,
                                  dtype=tf.float32):
    center_m = tf.identity([[1.0, 0.0, -center[0]],
                            [0.0, 1.0, -center[1]],
                            [0.0, 0.0, 1.0]])
    scale_m = tf.identity([[scale[0], 0.0, 0.0],
                           [0.0, scale[1], 0.0],
                           [0.0, 0.0, 1.0]])
    _cos = tf.math.cos(rot)
    _sin = tf.math.sin(rot)
    rot_m = tf.identity([[_cos, -_sin, 0.0],
                         [_sin, _cos, 0],
                         [0.0, 0.0, 1.0]])
    _tan = tf.math.tan(skew)
    skew_m = tf.identity([[1.0, _tan[0], 0.0],
                          [_tan[1], 1.0, 0.0],
                          [0.0, 0.0, 1.0]])
    move = center + trans
    trans_m = tf.identity([[1.0, 0.0, move[0]],
                           [0.0, 1.0, move[1]],
                           [0.0, 0.0, 1.0]])
    # Cast to required dtype.
    center_m = tf.cast(center_m, dtype=dtype)
    scale_m = tf.cast(scale_m, dtype=dtype)
    rot_m = tf.cast(rot_m, dtype=dtype)
    skew_m = tf.cast(skew_m, dtype=dtype)
    trans_m = tf.cast(trans_m, dtype=dtype)
    # Make affine matrix.
    mat = tf.eye(3, 3, dtype=dtype)
    mat = tf.linalg.matmul(center_m, mat)
    mat = tf.linalg.matmul(scale_m, mat)
    mat = tf.linalg.matmul(rot_m, mat)
    mat = tf.linalg.matmul(skew_m, mat)
    mat = tf.linalg.matmul(trans_m, mat)
    return tf.cast(mat, dtype=dtype)

In [11]:
def apply_affine_tf_eager(inputs, mat):
    xy = tf.gather(inputs, [0, 1], axis=-1)
    # Can not determine shape.
    # shape = xy.shape
    # ones = tf.ones([shape[0], shape[1], 1], dtype=xy.dtype)
    ones = tf.ones_like(xy[:, :, :1])
    xy = tf.concat([xy, ones], axis=-1)
    xy = tf.einsum("...j,ij", xy, mat)
    # Remove homogeneous coordinate.
    xy = xy[:, :, :-1]

    # Transpose for scatter.
    # `[T, J, C] -> [C, T, J]`
    inputs = tf.transpose(inputs, [2, 0, 1])
    xy = tf.transpose(xy, [2, 0, 1])
    indices = tf.constant([0, 1], dtype=tf.int32)[..., None]
    inputs = tf.tensor_scatter_nd_update(inputs, indices, xy)
    # `[C, T, J] -> [T, J, C]`
    inputs = tf.transpose(inputs, [1, 2, 0])
    return inputs

In [12]:
# Load data.
trackfile = "./finger_far0_non_static.npy"
reffile = "./finger_far0_non_static_affine.npy"
trackdata = np.load(trackfile).astype(np.float32)
refdata = np.load(reffile).astype(np.float32)
print(trackdata.shape)

# Remove person axis.
trackdata = trackdata[0]
refdata = refdata[0]

(1, 130, 553, 4)


In [13]:
# Get affine matrix.
center = np.array([638.0, 389.0])
trans = np.array([100.0, 0.0])
scale = np.array([2.0, 0.5])
rot = float(np.radians(15.0))
skew = np.radians([15.0, 15.0])
dtype = tf.float32
print("Parameters")
print("Center:", center)
print("Trans:", trans)
print("Scale:", scale)
print("Rot:", rot)
print("Skew:", skew)

Parameters
Center: [638. 389.]
Trans: [100.   0.]
Scale: [2.  0.5]
Rot: 0.2617993877991494
Skew: [0.26179939 0.26179939]


In [14]:
def perf_wrap_func(trackdata, center, trans, scale, rot, skew, dtype):
    mat = get_affine_matrix_2d_tf_eager(center, trans, scale, rot, skew, dtype=dtype)
    newtrack = apply_affine_tf_eager(trackdata, mat)

In [15]:
testtrack = trackdata.copy().astype(np.float32)

# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
mat = get_affine_matrix_2d_tf_eager(center, trans, scale, rot, skew, dtype=dtype)
newtrack = apply_affine_tf_eager(testtrack, mat)
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

# Evaluate difference.
diff = (np.round(newtrack.numpy()) - np.round(refdata)).sum()
print(f"Sum of error:{diff}")

testtrack = trackdata.copy().astype(np.float32)

print("Time after second call.")
testtrack = trackdata.copy()
target_fn = partial(perf_wrap_func,
                    trackdata=testtrack,
                    center=center, trans=trans, scale=scale, rot=rot, skew=skew,
                    dtype=dtype)
pmeasure(target_fn)

Time of first call.
Overall summary: Max 204.563ms, Min 204.563ms, Mean +/- Std 204.563ms +/-   0s
Sum of error:0.0
Time after second call.
Overall summary: Max 43.2781ms, Min 18.1728ms, Mean +/- Std 20.6453ms +/- 3.22142ms
Top 10 summary: Max 18.8043ms, Min 18.1728ms, Mean +/- Std 18.5944ms +/- 168.285µs


In [16]:
testtrack = trackdata.copy().astype(np.float32)

# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
mat = get_affine_matrix_2d_tf_eager(center, trans, scale, rot, skew, dtype=dtype)
newtrack = apply_affine_tf_eager(testtrack[:-1], mat)
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

testtrack = trackdata.copy().astype(np.float32)

print("Time after second call.")
testtrack = trackdata.copy()
target_fn = partial(perf_wrap_func,
                    trackdata=testtrack[:-1],
                    center=center, trans=trans, scale=scale, rot=rot, skew=skew,
                    dtype=dtype)
pmeasure(target_fn)

Time of first call.
Overall summary: Max 26.7066ms, Min 26.7066ms, Mean +/- Std 26.7066ms +/-   0s
Time after second call.
Overall summary: Max 86.6408ms, Min 22.0264ms, Mean +/- Std 40.6909ms +/- 13.1015ms
Top 10 summary: Max 28.081ms, Min 22.0264ms, Mean +/- Std 25.3254ms +/- 2.30584ms


# 4.2 Based on define-and-run (graph execution)

In [17]:
@tf.function
def get_affine_matrix_2d_tf(center,
                            trans,
                            scale,
                            rot,
                            skew,
                            dtype=tf.float32):
    center_m = tf.identity([[1.0, 0.0, -center[0]],
                            [0.0, 1.0, -center[1]],
                            [0.0, 0.0, 1.0]])
    scale_m = tf.identity([[scale[0], 0.0, 0.0],
                           [0.0, scale[1], 0.0],
                           [0.0, 0.0, 1.0]])
    _cos = tf.math.cos(rot)
    _sin = tf.math.sin(rot)
    rot_m = tf.identity([[_cos, -_sin, 0.0],
                         [_sin, _cos, 0],
                         [0.0, 0.0, 1.0]])
    _tan = tf.math.tan(skew)
    skew_m = tf.identity([[1.0, _tan[0], 0.0],
                          [_tan[1], 1.0, 0.0],
                          [0.0, 0.0, 1.0]])
    move = center + trans
    trans_m = tf.identity([[1.0, 0.0, move[0]],
                           [0.0, 1.0, move[1]],
                           [0.0, 0.0, 1.0]])
    # Cast to required dtype.
    center_m = tf.cast(center_m, dtype=dtype)
    scale_m = tf.cast(scale_m, dtype=dtype)
    rot_m = tf.cast(rot_m, dtype=dtype)
    skew_m = tf.cast(skew_m, dtype=dtype)
    trans_m = tf.cast(trans_m, dtype=dtype)
    # Make affine matrix.
    mat = tf.eye(3, 3, dtype=dtype)
    mat = tf.linalg.matmul(center_m, mat)
    mat = tf.linalg.matmul(scale_m, mat)
    mat = tf.linalg.matmul(rot_m, mat)
    mat = tf.linalg.matmul(skew_m, mat)
    mat = tf.linalg.matmul(trans_m, mat)
    return tf.cast(mat, dtype=dtype)

In [18]:
@tf.function(input_signature=(
    tf.TensorSpec(shape=[None, None, 4], dtype=tf.float32),
    tf.TensorSpec(shape=[3, 3], dtype=tf.float32),))
def apply_affine_tf(inputs, mat):
    xy = tf.gather(inputs, [0, 1], axis=-1)
    # Can not determine shape.
    # shape = xy.shape
    # ones = tf.ones([shape[0], shape[1], 1], dtype=xy.dtype)
    ones = tf.ones_like(xy[:, :, :1])
    xy = tf.concat([xy, ones], axis=-1)
    xy = tf.einsum("...j,ij", xy, mat)
    # Remove homogeneous coordinate.
    xy = xy[:, :, :-1]

    # Transpose for scatter.
    # `[T, J, C] -> [C, T, J]`
    inputs = tf.transpose(inputs, [2, 0, 1])
    xy = tf.transpose(xy, [2, 0, 1])
    indices = tf.constant([0, 1], dtype=tf.int32)[..., None]
    inputs = tf.tensor_scatter_nd_update(inputs, indices, xy)
    # `[C, T, J] -> [T, J, C]`
    inputs = tf.transpose(inputs, [1, 2, 0])
    return inputs

In [19]:
def perf_wrap_func(trackdata, center, trans, scale, rot, skew, dtype):
    mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=dtype)
    newtrack = apply_affine_tf(trackdata, mat)

In [20]:
testtrack = trackdata.copy().astype(np.float32)

# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=dtype)
newtrack = apply_affine_tf(testtrack, mat)
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

# Evaluate difference.
diff = (np.round(newtrack.numpy()) - np.round(refdata)).sum()
print(f"Sum of error:{diff}")

testtrack = trackdata.copy().astype(np.float32)

print("Time after second call.")
testtrack = trackdata.copy()
target_fn = partial(perf_wrap_func,
                    trackdata=testtrack,
                    center=center, trans=trans, scale=scale, rot=rot, skew=skew,
                    dtype=dtype)
pmeasure(target_fn)

Time of first call.
Overall summary: Max 895.778ms, Min 895.778ms, Mean +/- Std 895.778ms +/-   0s
Sum of error:0.0
Time after second call.
Overall summary: Max 73.0112ms, Min 16.8224ms, Mean +/- Std 29.15ms +/- 7.81842ms
Top 10 summary: Max 22.767ms, Min 16.8224ms, Mean +/- Std 20.7323ms +/- 1.64511ms


In [21]:
testtrack = trackdata.copy().astype(np.float32)

# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=dtype)
newtrack = apply_affine_tf_eager(testtrack[:-1], mat)
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

testtrack = trackdata.copy().astype(np.float32)

print("Time after second call.")
testtrack = trackdata.copy()
target_fn = partial(perf_wrap_func,
                    trackdata=testtrack[:-1],
                    center=center, trans=trans, scale=scale, rot=rot, skew=skew,
                    dtype=dtype)
pmeasure(target_fn)

Time of first call.
Overall summary: Max 82.4818ms, Min 82.4818ms, Mean +/- Std 82.4818ms +/-   0s
Time after second call.
Overall summary: Max 69.1991ms, Min 17.3533ms, Mean +/- Std 30.7778ms +/- 8.9923ms
Top 10 summary: Max 22.3045ms, Min 17.3533ms, Mean +/- Std 19.5941ms +/- 1.71975ms


# 5. Application to randomized transformation

## 5.1. Implementation2: Call JIT function from a python process.

In [22]:
class RandomAffineTransform2D_TF():
    def __init__(self,
                 center_joints,
                 apply_ratio,
                 trans_range,
                 scale_range,
                 rot_range,
                 skew_range,
                 random_seed=None,
                 dtype=tf.float32):

        self.center_joints = center_joints
        if isinstance(self.center_joints, int):
            self.center_joints = [self.center_joints]

        self.apply_ratio = apply_ratio
        self.trans_range = trans_range
        self.scale_range = scale_range
        self.rot_range = np.radians(rot_range).tolist()
        self.skew_range = np.radians(skew_range).tolist()
        self.dtype = dtype
        if random_seed is not None:
            self.rng = tf.random.Generator.from_seed(random_seed)
        else:
            self.rng = tf.random.get_global_generator()

    def __call__(self, inputs):
        if self.rng.uniform((), minval=0.0, maxval=1.0) >= self.apply_ratio:
            return inputs

        temp = tf.gather(inputs, self.center_joints, axis=1)
        shape = temp.shape
        temp = tf.reshape(temp, [-1, len(self.center_joints), shape[-1]])
        mask = tf.where(tf.reduce_sum(temp, axis=(1, 2)) != 0, 1.0, 0.0)
        mask = tf.cast(mask, dtype=self.dtype)

        temp = temp * mask[:, None, None]
        mask_sum = tf.reduce_sum(mask)
        # `[T, J, C] -> [J, C] -> [C]`
        center = tf.reduce_sum(temp, axis=0) / mask_sum
        center = tf.reduce_mean(center, axis=0)
        # Use only x and y.
        center = center[:2]

        trans = self.rng.uniform((2,), minval=self.trans_range[0], maxval=self.trans_range[1])
        scale = self.rng.uniform((2,), self.scale_range[0], self.scale_range[1])
        rot = self.rng.uniform((), self.rot_range[0], self.rot_range[1])
        skew = self.rng.uniform((2,), self.skew_range[0], self.skew_range[1])

        mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=self.dtype)

        # Apply transformation.
        inputs = apply_affine_tf(inputs, mat)
        return inputs

In [23]:
aug_fn = RandomAffineTransform2D_TF(
    center_joints=[11, 12],
    apply_ratio=1.0,
    trans_range=[-100.0, 100.0],
    scale_range=[0.5, 2.0],
    rot_range=[-30.0, 30.0],
    skew_range=[-30.0, 30.0],
    dtype=dtype)

In [24]:
testtrack = trackdata.copy().astype(np.float32)

# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
temp = aug_fn(testtrack)
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

testtrack = trackdata.copy().astype(np.float32)
print("Time after second call.")
target_fn = partial(aug_fn, inputs=testtrack)
pmeasure(target_fn)

Time of first call.
Overall summary: Max 532.927ms, Min 532.927ms, Mean +/- Std 532.927ms +/-   0s
Time after second call.
Overall summary: Max 131.862ms, Min 30.534ms, Mean +/- Std 54.5549ms +/- 20.7024ms
Top 10 summary: Max 39.0679ms, Min 30.534ms, Mean +/- Std 36.5016ms +/- 2.63933ms


In [25]:
testtrack = trackdata.copy().astype(np.float32)

# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
temp = aug_fn(trackdata.copy().astype(np.float32)[:-1])
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

testtrack = trackdata.copy().astype(np.float32)
print("Time after second call.")
target_fn = partial(aug_fn, inputs=testtrack[:-1])
pmeasure(target_fn)

Time of first call.
Overall summary: Max 66.6588ms, Min 66.6588ms, Mean +/- Std 66.6588ms +/-   0s
Time after second call.
Overall summary: Max 126.075ms, Min 22.4291ms, Mean +/- Std 32.7429ms +/- 18.0349ms
Top 10 summary: Max 22.9823ms, Min 22.4291ms, Mean +/- Std 22.7506ms +/- 172.265µs


## 5.2. Implementation2: Apply JIT compile to whole affine process.

In [26]:
class RandomAffineTransform2D_TF_JIT():
    def __init__(self,
                 center_joints,
                 apply_ratio,
                 trans_range,
                 scale_range,
                 rot_range,
                 skew_range,
                 random_seed=None,
                 dtype=tf.float32):

        self.center_joints = center_joints
        if isinstance(self.center_joints, int):
            self.center_joints = [self.center_joints]

        self.apply_ratio = apply_ratio
        self.trans_range = trans_range
        self.scale_range = scale_range
        self.rot_range = np.radians(rot_range).tolist()
        self.skew_range = np.radians(skew_range).tolist()
        self.dtype = dtype
        if random_seed is not None:
            self.rng = tf.random.Generator.from_seed(random_seed)
        else:
            self.rng = tf.random.get_global_generator()

    @tf.function(input_signature=(tf.TensorSpec(shape=[None, None, 4], dtype=tf.float32),))
    def __call__(self, inputs):
        if self.rng.uniform((), minval=0.0, maxval=1.0) >= self.apply_ratio:
            retval = inputs
        else:
            temp = tf.gather(inputs, self.center_joints, axis=1)
            shape = temp.shape
            temp = tf.reshape(temp, [-1, len(self.center_joints), shape[-1]])
            mask = tf.where(tf.reduce_sum(temp, axis=(1, 2)) != 0, 1.0, 0.0)
            mask = tf.cast(mask, dtype=self.dtype)

            temp = temp * mask[:, None, None]
            mask_sum = tf.reduce_sum(mask)
            # `[T, J, C] -> [J, C] -> [C]`
            center = tf.reduce_sum(temp, axis=0) / mask_sum
            center = tf.reduce_mean(center, axis=0)
            # Use only x and y.
            center = center[:2]

            trans = self.rng.uniform((2,), minval=self.trans_range[0], maxval=self.trans_range[1])
            scale = self.rng.uniform((2,), self.scale_range[0], self.scale_range[1])
            rot = self.rng.uniform((), self.rot_range[0], self.rot_range[1])
            skew = self.rng.uniform((2,), self.skew_range[0], self.skew_range[1])

            mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=self.dtype)

            # Apply transformation.
            inputs = apply_affine_tf(inputs, mat)
            retval = inputs
        return retval

In [27]:
aug_fn = RandomAffineTransform2D_TF_JIT(
    center_joints=[11, 12],
    apply_ratio=1.0,
    trans_range=[-100.0, 100.0],
    scale_range=[0.5, 2.0],
    rot_range=[-30.0, 30.0],
    skew_range=[-30.0, 30.0],
    dtype=dtype)

In [28]:
testtrack = trackdata.copy().astype(np.float32)

# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
temp = aug_fn(testtrack)
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

testtrack = trackdata.copy().astype(np.float32)
print("Time after second call.")
target_fn = partial(aug_fn, inputs=testtrack)
pmeasure(target_fn)

Time of first call.
Overall summary: Max 409.075ms, Min 409.075ms, Mean +/- Std 409.075ms +/-   0s
Time after second call.
Overall summary: Max 28.6829ms, Min 17.5935ms, Mean +/- Std 24.8284ms +/- 1.49898ms
Top 10 summary: Max 24.0776ms, Min 17.5935ms, Mean +/- Std 22.1906ms +/- 2.34517ms


In [29]:
testtrack = trackdata.copy().astype(np.float32)

# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
temp = aug_fn(testtrack[:-1])
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

testtrack = trackdata.copy().astype(np.float32)
print("Time after second call.")
target_fn = partial(aug_fn, inputs=testtrack[:-1])
pmeasure(target_fn)

Time of first call.
Overall summary: Max 16.2957ms, Min 16.2957ms, Mean +/- Std 16.2957ms +/-   0s
Time after second call.
Overall summary: Max 20.75ms, Min 13.231ms, Mean +/- Std 14.4953ms +/- 1.39201ms
Top 10 summary: Max 13.628ms, Min 13.231ms, Mean +/- Std 13.4536ms +/- 126.572µs
