In [None]:
#|hide
#|eval: false
! [ -e /content ] && pip install -Uqq fastrl['dev'] pyvirtualdisplay && \
                     apt-get install -y xvfb python-opengl > /dev/null 2>&1 
# NOTE: IF YOU SEE VERSION ERRORS, IT IS SAFE TO IGNORE THEM. COLAB IS BEHIND IN SOME OF THE PACKAGE VERSIONS

In [None]:
#|hide
#|eval: false
from fastcore.imports import in_colab
# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev.showdoc import *
    from nbdev.imports import *
    if not os.environ.get("IN_TEST", None):
        assert IN_NOTEBOOK
        assert not IN_COLAB
        assert IN_IPYTHON
else:
    # Virutual display is needed for colab
    from pyvirtualdisplay import Display
    display = Display(visible=0, size=(400, 300))
    display.start()

In [None]:
# Python native modules
import os
from copy import deepcopy
# Third party libs
from fastcore.all import *
import numpy as np
# Local modules


# Speed
> Some obvious / not so obvious notes on speed

## Numpy to Tensor Performance

In [None]:
img=np.random.randint(0,255,size=(240, 320, 3))

In [None]:
%%timeit
#|eval: false
img=np.random.randint(0,255,size=(240, 320, 3))

1.61 ms ± 23.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
%%timeit
#|eval: false
deepcopy(img)

240 µs ± 4.64 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
%%timeit
#|eval: false
Tensor(img)

79.2 µs ± 3.19 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [None]:
%%timeit
#|eval: false
Tensor([img])

  


135 ms ± 3.56 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


You will notice that if you wrap a numpy in a list, it completely kills the performance. The solution is to
just add a batch dim to the existing array and pass it directly.

In [None]:
%%timeit
#|eval: false
Tensor(np.expand_dims(img,0))

85.6 µs ± 4.48 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In fact we can just test this with python lists...

In [None]:
%%timeit
#|eval: false
Tensor([[1]])

6.75 µs ± 95.6 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [None]:
test_arr=[[1]*270000]

In [None]:
%%timeit
#|eval: false
Tensor(test_arr)

9.55 ms ± 221 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
test_arr=np.array([[1]*270000])

In [None]:
%%timeit
#|eval: false
Tensor(test_arr)

88 µs ± 5.93 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


This is horrifying just how made of a performance hit this causes... So we will be avoiding python list inputs 
to Tensors for now on...

In [None]:
#|hide
#|eval: false
from fastcore.imports import in_colab

# Since colab still requires tornado<6, we don't want to import nbdev if we don't have to
if not in_colab():
    from nbdev import nbdev_export
    nbdev_export()