In [1]:
from tensorflow.keras.datasets import mnist

(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
# data slicing
my_slice = train_images[10:100]
print(my_slice.shape)

my_slice = train_images[:, 14:, 14:]
print(my_slice.shape)

(90, 28, 28)
(60000, 14, 14)


# Tensor operations

## Element-wise operations

Operations like `relu` and `+` are element wise operations---apply the operator to each element in the tensor. This opens up parallelization opportunities.

This is what we will compare next: a naive sequential implementation vs the optimized concurrent implementation.

In [2]:
# A naive relu implementation for 2D tensors
def naive_relu(x):
    assert len(x.shape) == 2

    x = x.copy()

    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            x[i, j] = max(x[i,j], 0)

    return x

def naive_add(x, y):
    assert len(x.shape) == 2
    assert x.shape == y.shape

    x = x.copy()

    for i in range(x.shape[0]):
        for j in range(x.shape[1]):
            x[i,j] += y[i, j]

    return x

# NumPy has highly optimized versions of these functions
# utilizes low-level BLAS (Basic Linear Algebra Subprograms) library

# compare naive_relu with numpy's relu
import time
import numpy as np

x = np.random.random((200, 1000))
y = np.random.random((200, 1000))

t0 = time.time()

for _ in range(100):
    z = x + y
    z = np.maximum(z, 0)

print("NumPy operations took: ", time.time() - t0)

t0 = time.time()

for _ in range(100):
    z = naive_add(x, y)
    z = naive_relu(z)

print("Naive implementation took: ", time.time() - t0)

NumPy operations took:  0.11495780944824219
