In [None]:
!pip install -q startai

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.4/16.4 MB[0m [31m53.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.6/44.6 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.8/143.8 kB[0m [31m18.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m756.0/756.0 kB[0m [31m58.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m44.8 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import startai
import torch
import tensorflow as tf
import numpy as np

In [None]:
# create torch lstm layer
torch_lstm = torch.nn.LSTM(2, 2, 1).to("cuda")

# transpile lstm layer to tensorflow
x = torch.rand((5, 2, 2)).cuda()
tf_lstm = startai.transpile(torch_lstm, source="torch", to="tensorflow", args=(x,))



In [None]:
# get output of original torch lstm layer
x = torch.rand((20, 32, 2)).cuda()
original_output = torch_lstm(x)

# get output of transpiled tf lstm layer with the same input
x = tf.constant(x.cpu().numpy())
transpiled_output = tf_lstm(x)

# verify the outputs are the same (with some tolerance)
np.allclose(original_output[0].detach().cpu(), transpiled_output[0].numpy(), atol=1e-7)

True

In [None]:
# run some benchmarks
from time import perf_counter

x = torch.rand((20, 32, 2)).cuda()
N_RUNS = 1000

# time the original torch lstm
s = perf_counter()
for _ in range(N_RUNS):
  torch_lstm(x)
original_torch_time = perf_counter() - s

# compile transpiled tf lstm
x = tf.constant(x.cpu().numpy())
tf_lstm = tf.autograph.experimental.do_not_convert(tf_lstm)
compiled_tf_lstm = tf.function(tf_lstm)
compiled_tf_lstm(x)

# time the transpiled tf lstm
s = perf_counter()
for _ in range(N_RUNS):
  compiled_tf_lstm(x)
transpiled_tf_time = perf_counter() - s

# time tf's own lstm layer (also compiled) for comparison
original_tf_lstm = tf.keras.layers.LSTM(2, time_major=True, return_sequences=True)
original_tf_lstm = tf.function(original_tf_lstm)
original_tf_lstm(x)

s = perf_counter()
for _ in range(N_RUNS):
  original_tf_lstm(x)
original_tf_time = perf_counter() - s

# as we can see, the transpiled tf lstm has comparable performance to tf's own lstm layer
print(f'transpiled tf time is {transpiled_tf_time / original_torch_time}x slower than torch\'s lstm')
print(f'original tf lstm time is {original_tf_time / original_torch_time}x slower than torch\'s lstm')

transpiled tf time is 4.480074623755541x slower than torch's lstm
original tf lstm time is 2.362692848996253x slower than torch's lstm
