# Manipulating tensors

## Basic operations: +, -, *, /
**tf.add(), tf.subtract(), tf.multiply(), tf.divide()**

In [2]:
import tensorflow as tf
tensor = tf.constant([[10, 7], [3, 4]])
tensor

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[10,  7],
       [ 3,  4]], dtype=int32)>

In [3]:
tensor + 10

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[20, 17],
       [13, 14]], dtype=int32)>

In [4]:
tensor * 10

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[100,  70],
       [ 30,  40]], dtype=int32)>

In [5]:
tensor - 10

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[ 0, -3],
       [-7, -6]], dtype=int32)>

In [6]:
tensor / 10

<tf.Tensor: shape=(2, 2), dtype=float64, numpy=
array([[1. , 0.7],
       [0.3, 0.4]])>

In [7]:
# alternatively use the built-in functions
# IMPORTANT: These are much fast on GPU/TPU

In [8]:
tf.add(tensor, 10)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[20, 17],
       [13, 14]], dtype=int32)>

In [9]:
tf.multiply(tensor, 10)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[100,  70],
       [ 30,  40]], dtype=int32)>

In [10]:
tf.subtract(tensor, 10)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[ 0, -3],
       [-7, -6]], dtype=int32)>

In [11]:
tf.divide(tensor, 10)

<tf.Tensor: shape=(2, 2), dtype=float64, numpy=
array([[1. , 0.7],
       [0.3, 0.4]])>

## Matrix multiplication: tf.matmul
In machine learning, matrix multiplication is one of the most common tensor operations

See: http://matrixmultiplication.xyz/
See: https://www.mathsisfun.com/algebra/matrix-multiplying.html


In [12]:
# this example comes from http://matrixmultiplication.xyz/
matrix_1 = tf.constant([
    [1, 2, 1],
    [0, 1, 0],
    [2, 3, 4]
])
matrix_2 = tf.constant([
    [2, 5],
    [6, 7],
    [1, 8]
])
tf.matmul(matrix_1, matrix_2)

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[15, 27],
       [ 6,  7],
       [26, 63]], dtype=int32)>

In [13]:
# alternatively use @ symbol (Python symbol for matrix multiplication)
matrix_1 @ matrix_2

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[15, 27],
       [ 6,  7],
       [26, 63]], dtype=int32)>

**Matrix Rules**
1. The inner dimensions must match. E.g. (3x3)(3x2) The inner dimmensions(3, 3) match
2. The resulting shape is the same as the outside dimensions. E.g. 3x2 in our example above


## Reshaping tensors: tf.reshape(), tf.transpose()

In [14]:
matrix = tf.constant([
    [1, 2, 3],
    [4, 5, 6]
])
matrix

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)>

In [15]:
tf.reshape(matrix, shape=(3, 2)) # reshapes the matrix using the values from left to right, top to bottom

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[1, 2],
       [3, 4],
       [5, 6]], dtype=int32)>

In [16]:
tf.transpose(matrix) # flips the matrix so that x is y and y is x

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[1, 4],
       [2, 5],
       [3, 6]], dtype=int32)>

## Dot Product

Watch  http://matrixmultiplication.xyz/
Notice how the first step is to flip the axes of the second matrix. That's a transpose.
Then notice that the dot product is applied.

Thus, you can do matrix multiplication with these two steps

In [17]:
matrix_1

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[1, 2, 1],
       [0, 1, 0],
       [2, 3, 4]], dtype=int32)>

In [18]:
matrix_2

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[2, 5],
       [6, 7],
       [1, 8]], dtype=int32)>

In [19]:
tf.matmul(matrix_1, matrix_2)

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[15, 27],
       [ 6,  7],
       [26, 63]], dtype=int32)>

In [20]:
matrix_2_transposed = tf.transpose(matrix_2)
tf.tensordot(matrix_1, matrix_2_transposed, axes=[[1], [1]])

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[15, 27],
       [ 6,  7],
       [26, 63]], dtype=int32)>

In [21]:
# alternatively you can use the axes parameter to do matrix multiplication in one step using the dot product
tf.tensordot(matrix_1, matrix_2, axes=1)

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[15, 27],
       [ 6,  7],
       [26, 63]], dtype=int32)>

## Changing data types

In [22]:
B = tf.constant([1.7, 7.4])
B.dtype

tf.float32

In [25]:
C = tf.constant([1, 2])
C.dtype

tf.int32

In [26]:
D = tf.constant(['a', 'b'])
D.dtype

tf.string

In [27]:
# Modern accelerators can run 16-bit dtypes much faster than 32-bit
# 16-bit dtype takes up half the memory
# when dealing with millions of data points this can be quite significant
B_16 = tf.cast(B, dtype=tf.float16)
B_16.dtype

tf.float16

In [29]:
C_16 = tf.cast(C, dtype=tf.int16)
C_16.dtype

tf.int16

## Aggregating tensors: minimum, maximum, mean, sum

In [30]:
# Get the absolute values
D = tf.constant([-7, -10])
D

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([ -7, -10], dtype=int32)>

In [31]:
tf.abs(D)

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([ 7, 10], dtype=int32)>

In [33]:
import numpy as np
Large = tf.constant(np.random.randint(0, 100, size=50))
tf.size(Large), Large.shape, Large.ndim

(<tf.Tensor: shape=(), dtype=int32, numpy=50>, TensorShape([50]), 1)

In [34]:
Large

<tf.Tensor: shape=(50,), dtype=int64, numpy=
array([28,  6,  6, 17, 40, 11, 42, 51, 90, 35, 82,  9, 92, 49, 63, 33, 18,
       66, 20, 73, 39, 69,  7,  2, 13, 66, 44,  7, 40, 83, 77, 86, 69, 37,
       50, 12, 98, 50, 50, 40, 26, 59, 98, 46, 39, 92, 11, 19, 11, 38])>

In [35]:
tf.reduce_min(Large)

<tf.Tensor: shape=(), dtype=int64, numpy=2>

In [36]:
tf.reduce_max(Large)

<tf.Tensor: shape=(), dtype=int64, numpy=98>

In [37]:
tf.reduce_mean(Large)

<tf.Tensor: shape=(), dtype=int64, numpy=44>

In [38]:
tf.reduce_sum(Large)

<tf.Tensor: shape=(), dtype=int64, numpy=2209>

## Variance and Standard Deviation

In [41]:
import tensorflow_probability as tfp
tfp.stats.variance(Large)

<tf.Tensor: shape=(), dtype=int64, numpy=792>

In [47]:
# alternatively we don't need a separate module, but note that we must first convert integers to floats
tf.math.reduce_variance(tf.cast(Large, dtype=tf.float32))

<tf.Tensor: shape=(), dtype=float32, numpy=792.9076>

In [43]:
tf.math.reduce_std(tf.cast(Large, dtype=tf.float32)) # Notice that the integers must be cast to float for standard deviation to be computed

<tf.Tensor: shape=(), dtype=float32, numpy=28.158615>

## Positional minimum and maximum: argmax, argmin

In [48]:
# What index does the maximum value occur?
tf.random.set_seed(42)
F = tf.random.uniform(shape=[50])
F

<tf.Tensor: shape=(50,), dtype=float32, numpy=
array([0.6645621 , 0.44100678, 0.3528825 , 0.46448255, 0.03366041,
       0.68467236, 0.74011743, 0.8724445 , 0.22632635, 0.22319686,
       0.3103881 , 0.7223358 , 0.13318717, 0.5480639 , 0.5746088 ,
       0.8996835 , 0.00946367, 0.5212307 , 0.6345445 , 0.1993283 ,
       0.72942245, 0.54583454, 0.10756552, 0.6767061 , 0.6602763 ,
       0.33695042, 0.60141766, 0.21062577, 0.8527372 , 0.44062173,
       0.9485276 , 0.23752594, 0.81179297, 0.5263394 , 0.494308  ,
       0.21612847, 0.8457197 , 0.8718841 , 0.3083862 , 0.6868038 ,
       0.23764038, 0.7817228 , 0.9671384 , 0.06870162, 0.79873943,
       0.66028714, 0.5871513 , 0.16461694, 0.7381023 , 0.32054043],
      dtype=float32)>

In [50]:
argmax = tf.argmax(F) # the index of the largest value
argmax

<tf.Tensor: shape=(), dtype=int64, numpy=42>

In [51]:
F[argmax]

<tf.Tensor: shape=(), dtype=float32, numpy=0.9671384>

In [54]:
tf.reduce_max(F)

<tf.Tensor: shape=(), dtype=float32, numpy=0.9671384>

In [55]:
tf.argmin(F) # the index location of the smallest value

<tf.Tensor: shape=(), dtype=int64, numpy=16>

## Squeezing tensors (removing 1-dimensional axes)

In [56]:
G = tf.constant(tf.random.uniform(shape=[50]), shape=(1, 1, 1, 1, 50))
G # notice how the 50 values are all in the fifth dimension and all prior dimensions are 1 dimensional: shape (1, 1, 1, 1, 50)

<tf.Tensor: shape=(1, 1, 1, 1, 50), dtype=float32, numpy=
array([[[[[0.68789124, 0.48447883, 0.9309944 , 0.252187  , 0.73115396,
           0.89256823, 0.94674826, 0.7493341 , 0.34925628, 0.54718256,
           0.26160395, 0.69734323, 0.11962581, 0.53484344, 0.7148968 ,
           0.87501776, 0.33967495, 0.17377627, 0.4418521 , 0.9008261 ,
           0.13803864, 0.12217975, 0.5754491 , 0.9417181 , 0.9186585 ,
           0.59708476, 0.6109482 , 0.82086265, 0.83269787, 0.8915849 ,
           0.01377225, 0.49807465, 0.57503664, 0.6856195 , 0.75972784,
           0.908944  , 0.40900218, 0.8765154 , 0.53890026, 0.42733097,
           0.401173  , 0.66623247, 0.16348064, 0.18220246, 0.97040176,
           0.06139731, 0.53034747, 0.9869994 , 0.4746945 , 0.8646754 ]]]]],
      dtype=float32)>

In [57]:
G.shape

TensorShape([1, 1, 1, 1, 50])

In [58]:
G_squeezed = tf.squeeze(G)
G_squeezed

<tf.Tensor: shape=(50,), dtype=float32, numpy=
array([0.68789124, 0.48447883, 0.9309944 , 0.252187  , 0.73115396,
       0.89256823, 0.94674826, 0.7493341 , 0.34925628, 0.54718256,
       0.26160395, 0.69734323, 0.11962581, 0.53484344, 0.7148968 ,
       0.87501776, 0.33967495, 0.17377627, 0.4418521 , 0.9008261 ,
       0.13803864, 0.12217975, 0.5754491 , 0.9417181 , 0.9186585 ,
       0.59708476, 0.6109482 , 0.82086265, 0.83269787, 0.8915849 ,
       0.01377225, 0.49807465, 0.57503664, 0.6856195 , 0.75972784,
       0.908944  , 0.40900218, 0.8765154 , 0.53890026, 0.42733097,
       0.401173  , 0.66623247, 0.16348064, 0.18220246, 0.97040176,
       0.06139731, 0.53034747, 0.9869994 , 0.4746945 , 0.8646754 ],
      dtype=float32)>

## One-Hot Encoding: Turning categories into numbers
Example:

red, green, blue

1, 0, 0

0, 1, 0

0, 0, 1

In [59]:
some_list = [0, 1, 2, 3] # example 0 could be red, 1 could be green, 2 could be blue, and 3 could be purple
tf.one_hot(some_list, depth=len(some_list))

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]], dtype=float32)>

In [61]:
# let's say we have 100 categories. We can quickly create the indexes and then one-hot encode them
num_categories = 100
indices = tf.range(0, 100)

<tf.Tensor: shape=(100,), dtype=int32, numpy=
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
      dtype=int32)>

In [62]:
one_hot=tf.one_hot(indices, depth=num_categories)
one_hot

<tf.Tensor: shape=(100, 100), dtype=float32, numpy=
array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], dtype=float32)>

## More tensor math: See tf.math for all possibilities
See https://www.tensorflow.org/api_docs/python/tf/math

In [63]:
tf.square(indices)

<tf.Tensor: shape=(100,), dtype=int32, numpy=
array([   0,    1,    4,    9,   16,   25,   36,   49,   64,   81,  100,
        121,  144,  169,  196,  225,  256,  289,  324,  361,  400,  441,
        484,  529,  576,  625,  676,  729,  784,  841,  900,  961, 1024,
       1089, 1156, 1225, 1296, 1369, 1444, 1521, 1600, 1681, 1764, 1849,
       1936, 2025, 2116, 2209, 2304, 2401, 2500, 2601, 2704, 2809, 2916,
       3025, 3136, 3249, 3364, 3481, 3600, 3721, 3844, 3969, 4096, 4225,
       4356, 4489, 4624, 4761, 4900, 5041, 5184, 5329, 5476, 5625, 5776,
       5929, 6084, 6241, 6400, 6561, 6724, 6889, 7056, 7225, 7396, 7569,
       7744, 7921, 8100, 8281, 8464, 8649, 8836, 9025, 9216, 9409, 9604,
       9801], dtype=int32)>

In [64]:
tf.sqrt(tf.cast(indices, dtype=tf.float32))

<tf.Tensor: shape=(100,), dtype=float32, numpy=
array([0.       , 1.       , 1.4142135, 1.7320508, 2.       , 2.236068 ,
       2.4494898, 2.6457512, 2.828427 , 3.       , 3.1622777, 3.3166249,
       3.4641016, 3.6055512, 3.7416575, 3.8729835, 4.       , 4.1231055,
       4.2426405, 4.358899 , 4.472136 , 4.582576 , 4.690416 , 4.7958317,
       4.8989797, 5.       , 5.0990195, 5.196152 , 5.2915025, 5.3851647,
       5.477226 , 5.5677643, 5.656854 , 5.7445626, 5.8309517, 5.91608  ,
       6.       , 6.0827627, 6.164414 , 6.244998 , 6.3245554, 6.4031243,
       6.4807405, 6.5574384, 6.6332498, 6.708204 , 6.78233  , 6.8556547,
       6.928203 , 7.       , 7.071068 , 7.1414285, 7.2111025, 7.28011  ,
       7.3484693, 7.4161983, 7.483315 , 7.5498343, 7.615773 , 7.6811457,
       7.745967 , 7.81025  , 7.8740077, 7.937254 , 8.       , 8.062258 ,
       8.124039 , 8.185352 , 8.246211 , 8.306623 , 8.3666   , 8.426149 ,
       8.485281 , 8.5440035, 8.602325 , 8.6602545, 8.717798 , 8.774964 ,
   