In [1]:
import numpy as np
import numpy.typing as npt

In [2]:
#necessary to run in colab, without it crushes, see: https://github.com/taichi-dev/taichi/issues/235
import os, json, signal, time
if 'libtcmalloc' in os.environ.get('LD_PRELOAD', ''):
  kernel_fn = '/usr/local/share/jupyter/kernels/python3/kernel.json'
  spec = json.load(open(kernel_fn))
  spec['env'] = {'LD_PRELOAD': ''}
  json.dump(spec, open(kernel_fn, 'w'))
  print("Installed Taichi workaround. Don't wait for this cell to finish,")
  print('just REFRESH the browser tab and RUN this cell again.', flush=True)
  time.sleep(0.5)
  # killing the kernel manager so that specs get reloaded
  os.kill(os.getppid(), signal.SIGTERM)
else:
  print('Kernel is Taichi-ready!')

Kernel is Taichi-ready!


In [3]:
!pip install taichi

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting taichi
  Downloading taichi-1.4.0-cp38-cp38-manylinux_2_27_x86_64.whl (32.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.6/32.6 MB[0m [31m35.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorama
  Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Collecting rich
  Downloading rich-13.1.0-py3-none-any.whl (238 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m238.4/238.4 KB[0m [31m27.5 MB/s[0m eta [36m0:00:00[0m
Collecting commonmark<0.10.0,>=0.9.0
  Downloading commonmark-0.9.1-py2.py3-none-any.whl (51 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.1/51.1 KB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: commonmark, rich, colorama, taichi
Successfully installed colorama-0.4.6 commonmark-0.9.1 rich-13.1.0 taichi-1.4.0


In [4]:
a = np.random.random((10000, 10000))

In [5]:
def strange_conv(a: npt.NDArray[np.float64]) -> np.float64:
    x = a.shape[0]
    y = a.shape[1]

    t = 1.
    for y in range(y - 5):
        for x in range(x - 3):
            c = 1.5 * a[y+1, x+2] - a[y+5, x+3] * a[y, x] + 0.2 * a[y+4, x]
            t = 0.2 * t + 0.8 * c

    return t

In [6]:
import taichi as ti
ti.reset()
ti.init(arch=ti.cuda)

[Taichi] version 1.4.0, llvm 15.0.4, commit fbe92fd8, linux, python 3.8.16
[Taichi] Starting on arch=cuda


In [7]:
#fixed 
@ti.kernel
def strange_conv_taichi(a: ti.types.ndarray()) -> ti.types.float64:
    x = a.shape[0]
    y = a.shape[1]

    t = ti.f64(1.)

    x_stop = x - 3
    ti.loop_config(serialize=True)
    for y_i in range(y - 5):
        for x_i in range(x_stop):
            c = ti.f64(1.5) * ti.f64(a[y_i+1, x_i+2]) - ti.f64(a[y_i+5, x_i+3]) * ti.f64(a[y_i, x_i]) + ti.f64(0.2) * ti.f64(a[y_i+4, x_i])
            t = ti.f64(0.2) * t + ti.f64(0.8) * c
        x_stop -= 4

    return t

In [8]:
def compare_result(arr):
    result_orig = strange_conv(arr)
    result_taichi = strange_conv_taichi(arr)
    return result_orig, result_taichi, result_orig == result_taichi


In [11]:
compare_result(a)

(0.17600231102452576, 0.17600231102452576, True)

In [12]:
%%timeit
strange_conv(a)

14.9 s ± 506 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%%timeit
strange_conv_taichi(a)

1.82 s ± 39.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
