First, lets install Startai via pip and import it alongside the other frameworks we'll be using.

In [None]:
!pip install -q startai

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.8/143.8 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m756.0/756.0 kB[0m [31m20.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import startai
import torch
import tensorflow as tf
import numpy as np

Here we create a basic TensorFlow Keras model containing a single LSTM layer as an example.

We can then convert this model to PyTorch by transpiling with startai.transpile and providing some input arguments for the model.

In [None]:
with tf.device("/CPU:0"):
  sample_input = tf.random.uniform((5, 2, 40))

  # build the lstm keras model
  tf_lstm = tf.keras.Sequential([tf.keras.layers.LSTM(40)])
  tf_lstm.build(sample_input.shape)

  # transpile to torch
  torch_lstm = startai.transpile(tf_lstm, source="tensorflow", to="torch", args=(sample_input,))

  return _transpile(
  return _transpile(


Now we've transpiling the model to PyTorch, lets verify that the results produced by the new PyTorch model are identical to those produced by the original Keras model.

We'll use an input tensor with different shape to the input the model was transpiled with, to verify that the transpiled model is compatible with dynamic input shapes.

In [None]:
# identical input tensors for torch and tf
torch_input = torch.rand((10, 100, 40))
tf_input = tf.constant(torch_input.cpu().detach().numpy())

# compile the original tensorflow model
tf_lstm = tf.function(tf_lstm)

# get output of the original and transpiled models
tf_output = tf_lstm(tf_input)
torch_output = torch_lstm(torch_input)

# verify the outputs are the same (with some tolerance)
np.allclose(tf_output[0].numpy(), torch_output[0].cpu().detach().numpy(), atol=1e-6)

True

Finally, lets benchmark the transpiled torch model compared to the original. Here we benchmark on both CPU and GPU over 1000 inference runs.

In [None]:
# run some benchmarks

from time import perf_counter

N_RUNS = 1000

tf_inputs = tf.random.normal([10, 100, 40])
torch_inputs = torch.from_numpy(tf_inputs.numpy())


# benchmark on CPU
with tf.device("/CPU:0"):
  torch_lstm = torch_lstm.to("cpu")

  tf_inputs = tf.random.normal([10, 100, 40])
  torch_inputs = torch.from_numpy(tf_inputs.numpy()).to("cpu")

  # time the tf lstm
  s = perf_counter()
  for _ in range(N_RUNS):
    tf_lstm(tf_inputs)
  tf_time = round(perf_counter() - s, 4)

  # time the transpiled torch lstm
  s = perf_counter()
  for _ in range(N_RUNS):
    torch_lstm(torch_inputs)
  torch_time = round(perf_counter() - s, 4)

  print(f'(CPU)  tensorflow lstm time: {tf_time} seconds  transpiled torch lstm time: {torch_time} seconds')

  cpu_speedup = round(tf_time / torch_time, 3)


# benchmark on GPU
with tf.device("/GPU:0"):
  torch_lstm = torch_lstm.cuda()

  tf_inputs = tf.random.normal([10, 100, 40])
  torch_inputs = torch.from_numpy(tf_inputs.numpy()).cuda()

  # time the original tf lstm
  s = perf_counter()
  for _ in range(N_RUNS):
    tf_lstm(tf_inputs)
  tf_time = round(perf_counter() - s, 4)

  # time the transpiled torch lstm
  s = perf_counter()
  for _ in range(N_RUNS):
    torch_lstm(torch_inputs)
  torch_time = round(perf_counter() - s, 4)

  print(f'(GPU)  tensorflow lstm time: {tf_time} seconds  transpiled torch lstm time: {torch_time} seconds')

  gpu_speedup = round(tf_time / torch_time, 3)


# the transpiled torch lstm is faster than tensorflow's lstm layer on both cpu and gpu
print(f'\ntranspiled torch lstm is {cpu_speedup}x faster than tensorflow\'s lstm on CPU')
print(f'transpiled torch lstm is {gpu_speedup}x faster than tensorflow\'s lstm on GPU')

(CPU)  tensorflow lstm time: 5.5017 seconds  transpiled torch lstm time: 2.1101 seconds
(GPU)  tensorflow lstm time: 1.7519 seconds  transpiled torch lstm time: 0.901 seconds

transpiled torch lstm is 2.607x faster than tensorflow's lstm on CPU
transpiled torch lstm is 1.944x faster than tensorflow's lstm on GPU


We can see that the results of the transpiled PyTorch model are significantly faster than the original Keras model on both CPU and GPU :)