<a href="https://colab.research.google.com/github/takayama-rado/trado_samples/blob/main/colab_files/exp_track_interp_tensorflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Load library

In [1]:
# Standard modules.
import gc
import sys
import time
from functools import partial

# CV/ML.
import numpy as np

import tensorflow as tf

In [2]:
print(f"Python:{sys.version}")
print(f"Numpy:{np.__version__}")
print(f"Tensorflow:{tf.__version__}")

Python:3.10.12 (main, Jun 11 2023, 05:26:28) [GCC 11.4.0]
Numpy:1.23.5
Tensorflow:2.14.0


# 2. Load data

In [3]:
!wget https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static.npy

--2023-10-30 04:05:00--  https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static.npy
Resolving github.com (github.com)... 140.82.114.3
Connecting to github.com (github.com)|140.82.114.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static.npy [following]
--2023-10-30 04:05:01--  https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static.npy
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2300608 (2.2M) [application/octet-stream]
Saving to: ‘finger_far0_non_static.npy’


2023-10-30 04:05:01 (26.4 MB/s) - ‘finger_far0_non_static.npy’ saved [2300608/2300608]



In [4]:
!wget https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static_interp.npy

--2023-10-30 04:05:01--  https://github.com/takayama-rado/trado_samples/raw/main/test_data/finger_far0_non_static_interp.npy
Resolving github.com (github.com)... 140.82.114.3
Connecting to github.com (github.com)|140.82.114.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static_interp.npy [following]
--2023-10-30 04:05:01--  https://raw.githubusercontent.com/takayama-rado/trado_samples/main/test_data/finger_far0_non_static_interp.npy
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2300608 (2.2M) [application/octet-stream]
Saving to: ‘finger_far0_non_static_interp.npy’


2023-10-30 04:05:02 (9.47 MB/s) - ‘finger_far0_non_static_interp.

In [5]:
!ls

finger_far0_non_static_interp.npy  finger_far0_non_static.npy  sample_data


# 3. Evaluation settings

In [6]:
def get_perf_str(val):
    token_si = ["", "m", "µ", "n", "p"]
    exp_si = [1, 1e3, 1e6, 1e9, 1e12]
    perf_str = f"{val:3g}s"
    si = ""
    sval = val
    for token, exp in zip(token_si, exp_si):
        if val * exp > 1.0:
            si = token
            sval = val * exp
            break
    perf_str = f"{sval:3g}{si}s"
    return perf_str

In [7]:
def print_perf_time(intervals, top_k=None):
    if top_k is not None:
        intervals = np.sort(intervals)[:top_k]
    min = intervals.min()
    max = intervals.max()
    mean = intervals.mean()
    std = intervals.std()

    smin = get_perf_str(min)
    smax = get_perf_str(max)
    mean = get_perf_str(mean)
    std = get_perf_str(std)
    if top_k:
        print(f"Top {top_k} summary: Max {smax}, Min {smin}, Mean +/- Std {mean} +/- {std}")
    else:
        print(f"Overall summary: Max {smax}, Min {smin}, Mean +/- Std {mean} +/- {std}")

In [8]:
class PerfMeasure():
    def __init__(self,
                 trials=100,
                 top_k=10):
        self.trials = trials
        self.top_k = top_k

    def __call__(self, func):
        gc.collect()
        gc.disable()
        intervals = []
        for _ in range(self.trials):
            start = time.perf_counter()
            func()
            end = time.perf_counter()
            intervals.append(end - start)
        intervals = np.array(intervals)
        print_perf_time(intervals)
        if self.top_k:
            print_perf_time(intervals, self.top_k)
        gc.enable()
        gc.collect()

In [9]:
TRIALS = 100
TOPK = 10
pmeasure = PerfMeasure(TRIALS, TOPK)

# 4. Tensorflow implementation

## 4.1 Implementation based on define-by-run (eager execution)

In [10]:
def matrix_interp_tf_eager(track):
    orig_shape = tf.shape(track)
    tlength = orig_shape[0]
    mask = track[:, 0, -1] != 0
    valid = tf.reduce_sum(tf.cast(mask, dtype=tf.int32))
    if valid == tlength:
        y = track
    else:
        xs = tf.where(mask)
        xs = tf.reshape(xs, [valid])
        # determine the output data type
        ys = tf.reshape(track, [tlength, -1])
        ys = tf.gather(ys, xs, axis=0)
        x = tf.range(tlength)
        dtype_ys = ys.dtype

        # normalize data types
        xs = tf.cast(xs, dtype_ys)
        x = tf.cast(x, dtype_ys)

        # pad control points for extrapolation
        xs = tf.concat([[xs.dtype.min], xs, [xs.dtype.max]], axis=0)
        ys = tf.concat([ys[:1], ys, ys[-1:]], axis=0)

        # compute slopes, pad at the edges to flatten
        sloops = (ys[1:] - ys[:-1]) / tf.expand_dims((xs[1:] - xs[:-1]), axis=-1)
        sloops = tf.pad(sloops[:-1], [(1, 1), (0, 0)])

        # solve for intercepts
        intercepts = ys - sloops * tf.expand_dims(xs, axis=-1)

        # search for the line parameters at each input data point
        # create a grid of the inputs and piece breakpoints for thresholding
        # rely on argmax stopping on the first true when there are duplicates,
        # which gives us an index into the parameter vectors
        idx = tf.math.argmax(tf.expand_dims(xs, axis=-2) > tf.expand_dims(x, axis=-1), axis=-1)
        sloop = tf.gather(sloops, idx, axis=0)
        intercept = tf.gather(intercepts, idx, axis=0)

        # apply the linear mapping at each input data point
        y = sloop * tf.expand_dims(x, axis=-1) + intercept
        y = tf.cast(y, dtype_ys)
        y = tf.reshape(y, orig_shape)
    return y


def partsbased_interp_tf_eager(trackdata):
    num_joints = trackdata.shape[1]
    trackdata = tf.convert_to_tensor(trackdata)
    pose = tf.gather(trackdata, tf.range(0, 33), axis=1)
    lhand = tf.gather(trackdata, tf.range(33, 33+21), axis=1)
    rhand = tf.gather(trackdata, tf.range(33+21, 33+21+21), axis=1)
    face = tf.gather(trackdata, tf.range(33+21+21, num_joints), axis=1)

    pose = matrix_interp_tf_eager(pose)
    lhand = matrix_interp_tf_eager(lhand)
    rhand = matrix_interp_tf_eager(rhand)
    face = matrix_interp_tf_eager(face)
    return tf.concat([pose, lhand, rhand, face], axis=1)

In [11]:
trackdata = np.load("finger_far0_non_static.npy")
reftrack = np.load("finger_far0_non_static_interp.npy")
# Remove person axis.
trackdata = trackdata[0]
reftrack = reftrack[0]

In [12]:
# Tensorflow.
# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
newtrack = partsbased_interp_tf_eager(trackdata)
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

diff = (reftrack - newtrack.numpy()).sum()
print(f"Sum of error:{diff}")

print("Time after second call.")
target_fn = partial(partsbased_interp_tf_eager, trackdata=trackdata)
pmeasure(target_fn)

Time of first call.
Overall summary: Max 120.998ms, Min 120.998ms, Mean +/- Std 120.998ms +/-   0s
Sum of error:-6.935119145623503e-12
Time after second call.
Overall summary: Max 61.8187ms, Min 11.2679ms, Mean +/- Std 20.8628ms +/- 8.64964ms
Top 10 summary: Max 12.2807ms, Min 11.2679ms, Mean +/- Std 11.7345ms +/- 329.671µs


In [13]:
# Tensorflow.
# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
newtrack = partsbased_interp_tf_eager(trackdata[:-1])
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

diff = (reftrack[:-1] - newtrack.numpy()).sum()
print(f"Sum of error:{diff}")

print("Time after second call.")
target_fn = partial(partsbased_interp_tf_eager, trackdata=trackdata[:-1])
pmeasure(target_fn)

Time of first call.
Overall summary: Max 30.9785ms, Min 30.9785ms, Mean +/- Std 30.9785ms +/-   0s
Sum of error:-6.935119145623503e-12
Time after second call.
Overall summary: Max 75.0154ms, Min 15.6051ms, Mean +/- Std 23.0584ms +/- 10.6375ms
Top 10 summary: Max 17.3485ms, Min 15.6051ms, Mean +/- Std 16.8033ms +/- 521.555µs


# 4.2 Implementation based on define-and-run (tf.function without input_signature)

In [14]:
# If input_signature is omitted, the re-tracing is performed when a tensor's shape is changed.
@tf.function
def matrix_interp_tf(track):
    orig_shape = tf.shape(track)
    tlength = orig_shape[0]
    mask = track[:, 0, -1] != 0
    valid = tf.reduce_sum(tf.cast(mask, dtype=tf.int32))
    if valid == tlength:
        y = track
    else:
        xs = tf.where(mask)
        xs = tf.reshape(xs, [valid])
        # determine the output data type
        ys = tf.reshape(track, [tlength, -1])
        ys = tf.gather(ys, xs, axis=0)
        x = tf.range(tlength)
        dtype_ys = ys.dtype

        # normalize data types
        xs = tf.cast(xs, dtype_ys)
        x = tf.cast(x, dtype_ys)

        # pad control points for extrapolation
        xs = tf.concat([[xs.dtype.min], xs, [xs.dtype.max]], axis=0)
        ys = tf.concat([ys[:1], ys, ys[-1:]], axis=0)

        # compute slopes, pad at the edges to flatten
        sloops = (ys[1:] - ys[:-1]) / tf.expand_dims((xs[1:] - xs[:-1]), axis=-1)
        sloops = tf.pad(sloops[:-1], [(1, 1), (0, 0)])

        # solve for intercepts
        intercepts = ys - sloops * tf.expand_dims(xs, axis=-1)

        # search for the line parameters at each input data point
        # create a grid of the inputs and piece breakpoints for thresholding
        # rely on argmax stopping on the first true when there are duplicates,
        # which gives us an index into the parameter vectors
        idx = tf.math.argmax(tf.expand_dims(xs, axis=-2) > tf.expand_dims(x, axis=-1), axis=-1)
        sloop = tf.gather(sloops, idx, axis=0)
        intercept = tf.gather(intercepts, idx, axis=0)

        # apply the linear mapping at each input data point
        y = sloop * tf.expand_dims(x, axis=-1) + intercept
        y = tf.cast(y, dtype_ys)
        y = tf.reshape(y, orig_shape)
    return y


def partsbased_interp_tf(trackdata):
    num_joints = trackdata.shape[1]
    trackdata = tf.convert_to_tensor(trackdata)
    pose = tf.gather(trackdata, tf.range(0, 33), axis=1)
    lhand = tf.gather(trackdata, tf.range(33, 33+21), axis=1)
    rhand = tf.gather(trackdata, tf.range(33+21, 33+21+21), axis=1)
    face = tf.gather(trackdata, tf.range(33+21+21, num_joints), axis=1)

    pose = matrix_interp_tf(pose)
    lhand = matrix_interp_tf(lhand)
    rhand = matrix_interp_tf(rhand)
    face = matrix_interp_tf(face)
    return tf.concat([pose, lhand, rhand, face], axis=1)

In [15]:
# Tensorflow.
# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
newtrack = partsbased_interp_tf(trackdata)
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

diff = (reftrack - newtrack.numpy()).sum()
print(f"Sum of error:{diff}")

print("Time after second call.")
target_fn = partial(partsbased_interp_tf, trackdata=trackdata)
pmeasure(target_fn)

Time of first call.
Overall summary: Max 1.5834s, Min 1.5834s, Mean +/- Std 1.5834s +/-   0s
Sum of error:-6.935119145623503e-12
Time after second call.
Overall summary: Max 70.1215ms, Min 8.67015ms, Mean +/- Std 21.3508ms +/- 10.4774ms
Top 10 summary: Max 9.71076ms, Min 8.67015ms, Mean +/- Std 9.19527ms +/- 332.419µs


In [16]:
# Tensorflow.
# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
newtrack = partsbased_interp_tf(trackdata[:-1])
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

diff = (reftrack[:-1] - newtrack.numpy()).sum()
print(f"Sum of error:{diff}")

print("Time after second call.")
target_fn = partial(partsbased_interp_tf, trackdata=trackdata[:-1])
pmeasure(target_fn)

Time of first call.
Overall summary: Max 1.73487s, Min 1.73487s, Mean +/- Std 1.73487s +/-   0s
Sum of error:-6.935119145623503e-12
Time after second call.
Overall summary: Max 36.0208ms, Min 7.17009ms, Mean +/- Std 14.598ms +/- 5.84049ms
Top 10 summary: Max 8.39623ms, Min 7.17009ms, Mean +/- Std 7.99962ms +/- 385.734µs


## 4.3 Implementation based on define-and-run (tf.function with input_signature)

In [17]:
# If input_signature is omitted, the re-tracing is performed when a tensor's shape is changed.
@tf.function(input_signature=(tf.TensorSpec(shape=[None, None, 4], dtype=tf.float64),))
def matrix_interp_tf(track):
    orig_shape = tf.shape(track)
    tlength = orig_shape[0]
    mask = track[:, 0, -1] != 0
    valid = tf.reduce_sum(tf.cast(mask, dtype=tf.int32))
    if valid == tlength:
        y = track
    else:
        xs = tf.where(mask)
        xs = tf.reshape(xs, [valid])
        # determine the output data type
        ys = tf.reshape(track, [tlength, -1])
        ys = tf.gather(ys, xs, axis=0)
        x = tf.range(tlength)
        dtype_ys = ys.dtype

        # normalize data types
        xs = tf.cast(xs, dtype_ys)
        x = tf.cast(x, dtype_ys)

        # pad control points for extrapolation
        xs = tf.concat([[xs.dtype.min], xs, [xs.dtype.max]], axis=0)
        ys = tf.concat([ys[:1], ys, ys[-1:]], axis=0)

        # compute slopes, pad at the edges to flatten
        sloops = (ys[1:] - ys[:-1]) / tf.expand_dims((xs[1:] - xs[:-1]), axis=-1)
        sloops = tf.pad(sloops[:-1], [(1, 1), (0, 0)])

        # solve for intercepts
        intercepts = ys - sloops * tf.expand_dims(xs, axis=-1)

        # search for the line parameters at each input data point
        # create a grid of the inputs and piece breakpoints for thresholding
        # rely on argmax stopping on the first true when there are duplicates,
        # which gives us an index into the parameter vectors
        idx = tf.math.argmax(tf.expand_dims(xs, axis=-2) > tf.expand_dims(x, axis=-1), axis=-1)
        sloop = tf.gather(sloops, idx, axis=0)
        intercept = tf.gather(intercepts, idx, axis=0)

        # apply the linear mapping at each input data point
        y = sloop * tf.expand_dims(x, axis=-1) + intercept
        y = tf.cast(y, dtype_ys)
        y = tf.reshape(y, orig_shape)
    return y


def partsbased_interp_tf(trackdata):
    num_joints = trackdata.shape[1]
    trackdata = tf.convert_to_tensor(trackdata)
    pose = tf.gather(trackdata, tf.range(0, 33), axis=1)
    lhand = tf.gather(trackdata, tf.range(33, 33+21), axis=1)
    rhand = tf.gather(trackdata, tf.range(33+21, 33+21+21), axis=1)
    face = tf.gather(trackdata, tf.range(33+21+21, num_joints), axis=1)

    pose = matrix_interp_tf(pose)
    lhand = matrix_interp_tf(lhand)
    rhand = matrix_interp_tf(rhand)
    face = matrix_interp_tf(face)
    return tf.concat([pose, lhand, rhand, face], axis=1)

In [18]:
# Tensorflow.
# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
newtrack = partsbased_interp_tf(trackdata)
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

diff = (reftrack - newtrack.numpy()).sum()
print(f"Sum of error:{diff}")

print("Time after second call.")
target_fn = partial(partsbased_interp_tf, trackdata=trackdata)
pmeasure(target_fn)

Time of first call.
Overall summary: Max 328.611ms, Min 328.611ms, Mean +/- Std 328.611ms +/-   0s
Sum of error:-6.935119145623503e-12
Time after second call.
Overall summary: Max 33.595ms, Min 8.34474ms, Mean +/- Std 10.9117ms +/- 3.95531ms
Top 10 summary: Max 8.8418ms, Min 8.34474ms, Mean +/- Std 8.63372ms +/- 153.86µs


In [19]:
# Tensorflow.
# The 1st call may be slow because of the computation graph construction.
print(f"Time of first call.")
start = time.perf_counter()
newtrack = partsbased_interp_tf(trackdata[:-1])
interval = time.perf_counter() - start
print_perf_time(np.array([interval]))

diff = (reftrack[:-1] - newtrack.numpy()).sum()
print(f"Sum of error:{diff}")

print("Time after second call.")
target_fn = partial(partsbased_interp_tf, trackdata=trackdata[:-1])
pmeasure(target_fn)

Time of first call.
Overall summary: Max 19.1748ms, Min 19.1748ms, Mean +/- Std 19.1748ms +/-   0s
Sum of error:-6.935119145623503e-12
Time after second call.
Overall summary: Max 29.0962ms, Min 5.7848ms, Mean +/- Std 7.69065ms +/- 4.21244ms
Top 10 summary: Max 5.92441ms, Min 5.7848ms, Mean +/- Std 5.87683ms +/- 44.1624µs
