<a href="https://colab.research.google.com/github/takayama-rado/trado_samples/blob/main/colab_files/exp_track_affine_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Load library

In [1]:
# Standard modules.
import sys
import time
from pathlib import Path

# CV/ML.
import numpy as np

import tensorflow as tf

In [2]:
print(f"Python:{sys.version}")
print(f"Numpy:{np.__version__}")
print(f"Tensorflow:{tf.__version__}")

Python:3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]
Numpy:1.23.5
Tensorflow:2.13.0


# 2. Load data

In [3]:
!wget https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static.npy

--2023-10-17 02:41:31--  https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static.npy
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static.npy [following]
--2023-10-17 02:41:31--  https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static.npy
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2300608 (2.2M) [application/octet-stream]
Saving to: ‘finger_far0_non_static.npy’


2023-10-17 02:41:31 (27.7 MB/s) - ‘finger_far0_non_static.npy’ saved [2300608/2300608]



In [4]:
!wget https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static_affine.npy

--2023-10-17 02:41:31--  https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static_affine.npy
Resolving github.com (github.com)... 140.82.113.3
Connecting to github.com (github.com)|140.82.113.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static_affine.npy [following]
--2023-10-17 02:41:32--  https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static_affine.npy
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2300608 (2.2M) [application/octet-stream]
Saving to: ‘finger_far0_non_static_affine.npy’


2023-10-17 02:41:32 (25.0 MB/s) - ‘finger_far0_non_static_affine.

In [5]:
!ls

finger_far0_non_static_affine.npy  finger_far0_non_static.npy  sample_data


# 3. Implement affine transformation

## 3.1 Based on define-by-run (eager execution).

In [6]:
def get_affine_matrix_2d_tf_eager(center,
                                  trans,
                                  scale,
                                  rot,
                                  skew,
                                  dtype=tf.float32):
    center_m = tf.identity([[1.0, 0.0, -center[0]],
                            [0.0, 1.0, -center[1]],
                            [0.0, 0.0, 1.0]])
    scale_m = tf.identity([[scale[0], 0.0, 0.0],
                           [0.0, scale[1], 0.0],
                           [0.0, 0.0, 1.0]])
    _cos = tf.math.cos(rot)
    _sin = tf.math.sin(rot)
    rot_m = tf.identity([[_cos, -_sin, 0.0],
                         [_sin, _cos, 0],
                         [0.0, 0.0, 1.0]])
    _tan = tf.math.tan(skew)
    skew_m = tf.identity([[1.0, _tan[0], 0.0],
                          [_tan[1], 1.0, 0.0],
                          [0.0, 0.0, 1.0]])
    move = center + trans
    trans_m = tf.identity([[1.0, 0.0, move[0]],
                           [0.0, 1.0, move[1]],
                           [0.0, 0.0, 1.0]])
    # Cast to required dtype.
    center_m = tf.cast(center_m, dtype=dtype)
    scale_m = tf.cast(scale_m, dtype=dtype)
    rot_m = tf.cast(rot_m, dtype=dtype)
    skew_m = tf.cast(skew_m, dtype=dtype)
    trans_m = tf.cast(trans_m, dtype=dtype)
    # Make affine matrix.
    mat = tf.eye(3, 3, dtype=dtype)
    mat = tf.linalg.matmul(center_m, mat)
    mat = tf.linalg.matmul(scale_m, mat)
    mat = tf.linalg.matmul(rot_m, mat)
    mat = tf.linalg.matmul(skew_m, mat)
    mat = tf.linalg.matmul(trans_m, mat)
    return tf.cast(mat, dtype=dtype)

In [7]:
def apply_affine_tf_eager(inputs, mat):
    xy = tf.gather(inputs, [0, 1], axis=-1)
    # Can not determine shape.
    # shape = xy.shape
    # ones = tf.ones([xy.shape[0], xy.shape[1], 1], dtype=self.dtype)
    ones = tf.ones_like(xy[:, :, :1])
    xy = tf.concat([xy, ones], axis=-1)
    xy = tf.einsum("...j,ij", xy, mat)
    # Remove homogeneous coordinate.
    xy = xy[:, :, :-1]

    # Transpose for scatter.
    # `[T, J, C] -> [C, T, J]`
    inputs = tf.transpose(inputs, [2, 0, 1])
    xy = tf.transpose(xy, [2, 0, 1])
    indices = tf.constant([0, 1], dtype=tf.int32)[..., None]
    inputs = tf.tensor_scatter_nd_update(inputs, indices, xy)
    # `[C, T, J] -> [T, J, C]`
    inputs = tf.transpose(inputs, [1, 2, 0])
    return inputs

In [8]:
# Load data.
trackfile = Path("./finger_far0_non_static.npy")
reffile = Path("./finger_far0_non_static_affine.npy")
trackdata = np.load(trackfile)
refdata = np.load(reffile)
print(trackdata.shape)

# Remove person axis.
trackdata = trackdata[0]
refdata = refdata[0]

(1, 130, 553, 4)


In [9]:
# Get affine matrix.
center = np.array([638.0, 389.0])
trans = np.array([100.0, 0.0])
scale = np.array([2.0, 0.5])
rot = float(np.radians(15.0))
skew = np.radians([15.0, 15.0])
dtype = tf.float32
print("Parameters")
print("Center:", center)
print("Trans:", trans)
print("Scale:", scale)
print("Rot:", rot)
print("Skew:", skew)

Parameters
Center: [638. 389.]
Trans: [100.   0.]
Scale: [2.  0.5]
Rot: 0.2617993877991494
Skew: [0.26179939 0.26179939]


In [10]:
testtrack = trackdata.copy().astype(np.float32)
trial = 10

# The 1st call may be slow because of the computation graph construction.
start = time.perf_counter()
mat = get_affine_matrix_2d_tf_eager(center, trans, scale, rot, skew, dtype=dtype)
newtrack = apply_affine_tf_eager(testtrack, mat)
interval = time.perf_counter() - start
print(f"Time of first call:{interval}")

# Evaluate difference.
diff = (np.round(newtrack.numpy()) - np.round(refdata)).sum()

testtrack = trackdata.copy().astype(np.float32)

start = time.perf_counter()
for _ in range(trial):
    mat = get_affine_matrix_2d_tf_eager(center, trans, scale, rot, skew, dtype=dtype)
    newtrack = apply_affine_tf_eager(testtrack, mat)
interval = time.perf_counter() - start
print(f"Average time:{interval / trial}")

print(f"Sum of error:{diff}")

Time of first call:0.33390725200001725
Average time:0.04138189829999987
Sum of error:0.0


In [11]:
testtrack = trackdata.copy().astype(np.float32)
trial = 10

# The 1st call may be slow because of the computation graph construction.
start = time.perf_counter()
mat = get_affine_matrix_2d_tf_eager(center, trans, scale, rot, skew, dtype=dtype)
newtrack = apply_affine_tf_eager(testtrack[:-1], mat)
interval = time.perf_counter() - start
print(f"Time of first call:{interval}")

testtrack = trackdata.copy().astype(np.float32)

start = time.perf_counter()
for _ in range(trial):
    mat = get_affine_matrix_2d_tf_eager(center, trans, scale, rot, skew, dtype=dtype)
    newtrack = apply_affine_tf_eager(testtrack[:-1], mat)
interval = time.perf_counter() - start
print(f"Average time:{interval / trial}")

Time of first call:0.03481286999999611
Average time:0.030859349700000392


# 3.2 Based on define-and-run (graph execution)

In [12]:
@tf.function
def get_affine_matrix_2d_tf(center,
                            trans,
                            scale,
                            rot,
                            skew,
                            dtype=tf.float32):
    center_m = tf.identity([[1.0, 0.0, -center[0]],
                            [0.0, 1.0, -center[1]],
                            [0.0, 0.0, 1.0]])
    scale_m = tf.identity([[scale[0], 0.0, 0.0],
                           [0.0, scale[1], 0.0],
                           [0.0, 0.0, 1.0]])
    _cos = tf.math.cos(rot)
    _sin = tf.math.sin(rot)
    rot_m = tf.identity([[_cos, -_sin, 0.0],
                         [_sin, _cos, 0],
                         [0.0, 0.0, 1.0]])
    _tan = tf.math.tan(skew)
    skew_m = tf.identity([[1.0, _tan[0], 0.0],
                          [_tan[1], 1.0, 0.0],
                          [0.0, 0.0, 1.0]])
    move = center + trans
    trans_m = tf.identity([[1.0, 0.0, move[0]],
                           [0.0, 1.0, move[1]],
                           [0.0, 0.0, 1.0]])
    # Cast to required dtype.
    center_m = tf.cast(center_m, dtype=dtype)
    scale_m = tf.cast(scale_m, dtype=dtype)
    rot_m = tf.cast(rot_m, dtype=dtype)
    skew_m = tf.cast(skew_m, dtype=dtype)
    trans_m = tf.cast(trans_m, dtype=dtype)
    # Make affine matrix.
    mat = tf.eye(3, 3, dtype=dtype)
    mat = tf.linalg.matmul(center_m, mat)
    mat = tf.linalg.matmul(scale_m, mat)
    mat = tf.linalg.matmul(rot_m, mat)
    mat = tf.linalg.matmul(skew_m, mat)
    mat = tf.linalg.matmul(trans_m, mat)
    return tf.cast(mat, dtype=dtype)

In [13]:
@tf.function(input_signature=(
    tf.TensorSpec(shape=[None, None, 4], dtype=tf.float32),
    tf.TensorSpec(shape=[3, 3], dtype=tf.float32),))
def apply_affine_tf(inputs, mat):
    xy = tf.gather(inputs, [0, 1], axis=-1)
    # Can not determine shape.
    # shape = xy.shape
    # ones = tf.ones([xy.shape[0], xy.shape[1], 1], dtype=self.dtype)
    ones = tf.ones_like(xy[:, :, :1])
    xy = tf.concat([xy, ones], axis=-1)
    xy = tf.einsum("...j,ij", xy, mat)
    # Remove homogeneous coordinate.
    xy = xy[:, :, :-1]

    # Transpose for scatter.
    # `[T, J, C] -> [C, T, J]`
    inputs = tf.transpose(inputs, [2, 0, 1])
    xy = tf.transpose(xy, [2, 0, 1])
    indices = tf.constant([0, 1], dtype=tf.int32)[..., None]
    inputs = tf.tensor_scatter_nd_update(inputs, indices, xy)
    # `[C, T, J] -> [T, J, C]`
    inputs = tf.transpose(inputs, [1, 2, 0])
    return inputs

In [14]:
testtrack = trackdata.copy().astype(np.float32)
dtype = tf.float32
trial = 10

# The 1st call may be slow because of the computation graph construction.
start = time.perf_counter()
mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=dtype)
newtrack = apply_affine_tf(testtrack, mat)
interval = time.perf_counter() - start
print(f"Time of first call:{interval}")

# Evaluate difference.
diff = (np.round(newtrack.numpy()) - np.round(refdata)).sum()

testtrack = trackdata.copy().astype(np.float32)

start = time.perf_counter()
for _ in range(trial):
    mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=dtype)
    newtrack = apply_affine_tf(testtrack, mat)
interval = time.perf_counter() - start
print(f"Average time:{interval / trial}")

print(f"Sum of error:{diff}")

Time of first call:1.6754202719999967
Average time:0.030969702300001244
Sum of error:0.0


In [15]:
testtrack = trackdata.copy().astype(np.float32)
trial = 10

# The 1st call may be slow because of the computation graph construction.
start = time.perf_counter()
mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=dtype)
newtrack = apply_affine_tf_eager(testtrack[:-1], mat)
interval = time.perf_counter() - start
print(f"Time of first call:{interval}")

testtrack = trackdata.copy().astype(np.float32)

start = time.perf_counter()
for _ in range(trial):
    mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=dtype)
    newtrack = apply_affine_tf(testtrack[:-1], mat)
interval = time.perf_counter() - start
print(f"Average time:{interval / trial}")

Time of first call:0.05241142500000251
Average time:0.025285513899999047


# 4. Application to randomized transformation

## 4.1. Implementation2: Call JIT function from a python process.

In [16]:
class RandomAffineTransform2D_TF():
    def __init__(self,
                 center_joints,
                 apply_ratio,
                 trans_range,
                 scale_range,
                 rot_range,
                 skew_range,
                 random_seed=None,
                 dtype=tf.float32):

        self.center_joints = center_joints
        if isinstance(self.center_joints, int):
            self.center_joints = [self.center_joints]

        self.apply_ratio = apply_ratio
        self.trans_range = trans_range
        self.scale_range = scale_range
        self.rot_range = np.radians(rot_range).tolist()
        self.skew_range = np.radians(skew_range).tolist()
        self.dtype = dtype
        if random_seed is not None:
            self.rng = tf.random.Generator.from_seed(random_seed)
        else:
            self.rng = tf.random.get_global_generator()

    def __call__(self, inputs):
        if self.rng.uniform((), minval=0.0, maxval=1.0) >= self.apply_ratio:
            return inputs

        temp = tf.gather(inputs, self.center_joints, axis=1)
        shape = temp.shape
        temp = tf.reshape(temp, [-1, len(self.center_joints), shape[-1]])
        mask = tf.where(tf.reduce_sum(temp, axis=(1, 2)) != 0, 1.0, 0.0)
        mask = tf.cast(mask, dtype=self.dtype)

        temp = temp * mask[:, None, None]
        mask_sum = tf.reduce_sum(mask)
        # `[T, J, C] -> [J, C] -> [C]`
        center = tf.reduce_sum(temp, axis=0) / mask_sum
        center = tf.reduce_mean(center, axis=0)
        # Use only x and y.
        center = center[:2]

        trans = self.rng.uniform((2,), minval=self.trans_range[0], maxval=self.trans_range[1])
        scale = self.rng.uniform((2,), self.scale_range[0], self.scale_range[1])
        rot = self.rng.uniform((), self.rot_range[0], self.rot_range[1])
        skew = self.rng.uniform((2,), self.skew_range[0], self.skew_range[1])

        mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=self.dtype)

        # Apply transformation.
        inputs = apply_affine_tf(inputs, mat)
        return inputs

In [17]:
aug_fn = RandomAffineTransform2D_TF(
    center_joints=[11, 12],
    apply_ratio=1.0,
    trans_range=[-100.0, 100.0],
    scale_range=[0.5, 2.0],
    rot_range=[-30.0, 30.0],
    skew_range=[-30.0, 30.0],
    dtype=dtype)

In [18]:
trial = 10
augtracks = []

# The 1st call may be slow because of the computation graph construction.
start = time.perf_counter()
temp = aug_fn(trackdata.copy().astype(np.float32))
interval = time.perf_counter() - start
print(f"Time of first call:{interval}")

start = time.perf_counter()
for _ in range(trial):
  augtracks.append(aug_fn(trackdata.copy().astype(np.float32)))
interval = time.perf_counter() - start
print(f"Average time:{interval / trial}")

Time of first call:0.38601433599998813
Average time:0.03773833449999984


In [19]:
trial = 10
augtracks = []

# The 1st call may be slow because of the computation graph construction.
start = time.perf_counter()
temp = aug_fn(trackdata.copy().astype(np.float32)[:-1])
interval = time.perf_counter() - start
print(f"Time of first call:{interval}")

start = time.perf_counter()
for _ in range(trial):
  augtracks.append(aug_fn(trackdata.copy().astype(np.float32))[:-1])
interval = time.perf_counter() - start
print(f"Average time:{interval / trial}")

Time of first call:0.040777968999975656
Average time:0.04661312759999987


## 4.2. Implementation2: Apply JIT compile to whole affine process.

In [20]:
class RandomAffineTransform2D_TF_JIT():
    def __init__(self,
                 center_joints,
                 apply_ratio,
                 trans_range,
                 scale_range,
                 rot_range,
                 skew_range,
                 random_seed=None,
                 dtype=tf.float32):

        self.center_joints = center_joints
        if isinstance(self.center_joints, int):
            self.center_joints = [self.center_joints]

        self.apply_ratio = apply_ratio
        self.trans_range = trans_range
        self.scale_range = scale_range
        self.rot_range = np.radians(rot_range).tolist()
        self.skew_range = np.radians(skew_range).tolist()
        self.dtype = dtype
        if random_seed is not None:
            self.rng = tf.random.Generator.from_seed(random_seed)
        else:
            self.rng = tf.random.get_global_generator()

    @tf.function(input_signature=(tf.TensorSpec(shape=[None, None, 4], dtype=tf.float32),))
    def __call__(self, inputs):
        if self.rng.uniform((), minval=0.0, maxval=1.0) >= self.apply_ratio:
            retval = inputs
        else:
            temp = tf.gather(inputs, self.center_joints, axis=1)
            shape = temp.shape
            temp = tf.reshape(temp, [-1, len(self.center_joints), shape[-1]])
            mask = tf.where(tf.reduce_sum(temp, axis=(1, 2)) != 0, 1.0, 0.0)
            mask = tf.cast(mask, dtype=self.dtype)

            temp = temp * mask[:, None, None]
            mask_sum = tf.reduce_sum(mask)
            # `[T, J, C] -> [J, C] -> [C]`
            center = tf.reduce_sum(temp, axis=0) / mask_sum
            center = tf.reduce_mean(center, axis=0)
            # Use only x and y.
            center = center[:2]

            trans = self.rng.uniform((2,), minval=self.trans_range[0], maxval=self.trans_range[1])
            scale = self.rng.uniform((2,), self.scale_range[0], self.scale_range[1])
            rot = self.rng.uniform((), self.rot_range[0], self.rot_range[1])
            skew = self.rng.uniform((2,), self.skew_range[0], self.skew_range[1])

            mat = get_affine_matrix_2d_tf(center, trans, scale, rot, skew, dtype=self.dtype)

            # Apply transformation.
            inputs = apply_affine_tf(inputs, mat)
            retval = inputs
        return retval

In [21]:
aug_fn = RandomAffineTransform2D_TF_JIT(
    center_joints=[11, 12],
    apply_ratio=1.0,
    trans_range=[-100.0, 100.0],
    scale_range=[0.5, 2.0],
    rot_range=[-30.0, 30.0],
    skew_range=[-30.0, 30.0],
    dtype=dtype)

In [22]:
trial = 10
augtracks = []

# The 1st call may be slow because of the computation graph construction.
start = time.perf_counter()
temp = aug_fn(trackdata.copy().astype(np.float32))
interval = time.perf_counter() - start
print(f"Time of first call:{interval}")

start = time.perf_counter()
for _ in range(trial):
  augtracks.append(aug_fn(trackdata.copy().astype(np.float32)))
interval = time.perf_counter() - start
print(f"Average time:{interval / trial}")

Time of first call:0.8682217840000135
Average time:0.027948200599999494


In [23]:
trial = 10
augtracks = []

# The 1st call may be slow because of the computation graph construction.
start = time.perf_counter()
temp = aug_fn(trackdata.copy().astype(np.float32)[:-1])
interval = time.perf_counter() - start
print(f"Time of first call:{interval}")

start = time.perf_counter()
for _ in range(trial):
  augtracks.append(aug_fn(trackdata.copy().astype(np.float32))[:-1])
interval = time.perf_counter() - start
print(f"Average time:{interval / trial}")

Time of first call:0.018088719000019182
Average time:0.021224944200000095
