<a href="https://colab.research.google.com/github/noamsw/TensorFlow/blob/main/TensorFlow2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import tensorflow as tf

In [None]:
# learning how to index tensors
tensor = tf.constant([1,2,3,4,5,6,7,8,9])
tensor

<tf.Tensor: shape=(9,), dtype=int32, numpy=array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int32)>

In [None]:
# tensors are indexed like regular arrays, i.e. 0 indexed
# each index will be a tensor itself
tensor[0]

<tf.Tensor: shape=(), dtype=int32, numpy=1>

In [None]:
tensor[0:4]

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([1, 2, 3, 4], dtype=int32)>

In [None]:
# we can create tensors with ranges of numbers
# the range acts the same as python ranges
# range(startindex (included), endindex(not included), steps)
range_tensor = tf.range(0,5)
range_tensor

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([0, 1, 2, 3, 4], dtype=int32)>

In [None]:
# exploring how indexing works in 2D tensors
tensor_2D = tf.random.uniform([4,3], minval=-100, maxval=100, dtype=tf.int32)
tensor_2D

<tf.Tensor: shape=(4, 3), dtype=int32, numpy=
array([[ 96, -60,  24],
       [ 91,  39, -67],
       [-70, -97, -78],
       [ 65,  64,  57]], dtype=int32)>

In [None]:
# suppose we want the first 3 rows and 2 cols:
sub_tensor = tensor_2D[0:3, 0:2]
sub_tensor

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[38, 51],
       [12,  6],
       [25, 51]], dtype=int32)>

In [None]:
# select only a specific row
subrow_tensor = tensor_2D[2,:]
subrow_tensor

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([25, 51,  6], dtype=int32)>

In [None]:
#  similiarly for a column
subcol_tensor = tensor_2D[:, 1]
subcol_tensor

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([51,  6, 51, 22], dtype=int32)>

In [None]:
# another way to select an entire row/col is to use three dots ...
subcol_tensor = tensor_2D[...,1]
subcol_tensor

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([51,  6, 51, 22], dtype=int32)>

In [None]:
# indicies with 3d tensors
tensor_3D = tf.random.uniform([4,2,3], minval=0, maxval=100, dtype=tf.int32)
tensor_3D

<tf.Tensor: shape=(4, 2, 3), dtype=int32, numpy=
array([[[17, 35, 64],
        [85, 91, 77]],

       [[18, 27, 87],
        [25, 26, 64]],

       [[14, 90, 10],
        [79, 42, 17]],

       [[ 2, 47, 76],
        [70, 48, 95]]], dtype=int32)>

In [None]:
# in essence the selection is the same, however there are now 3 selections to specify
subtensor_3D = tensor_3D[0:2, :, 0:2]
subtensor_3D

<tf.Tensor: shape=(2, 2, 2), dtype=int32, numpy=
array([[[17, 35],
        [85, 91]],

       [[18, 27],
        [25, 26]]], dtype=int32)>

In [None]:
# the same type of three dot selection works
subtensor_3D = tensor_3D[:,...,1]
subtensor_3D

<tf.Tensor: shape=(4, 2), dtype=int32, numpy=
array([[35, 91],
       [27, 26],
       [90, 42],
       [47, 48]], dtype=int32)>

In [None]:
# lets start exploring some basic math functions
# start with abs function
# this simply turns all values into postive values
tensor_abs = tf.abs(tensor_2D)
print(tensor_2D)
print(tensor_abs)

tf.Tensor(
[[ 96 -60  24]
 [ 91  39 -67]
 [-70 -97 -78]
 [ 65  64  57]], shape=(4, 3), dtype=int32)
tf.Tensor(
[[96 60 24]
 [91 39 67]
 [70 97 78]
 [65 64 57]], shape=(4, 3), dtype=int32)


In [None]:
# abs value will also turn complex numbers into a noncomplex number
# return the magnitude of the complex number
complex_abs = tf.abs(tf.constant([4+3j]))
print(complex_abs)

tf.Tensor([5.], shape=(1,), dtype=float64)


In [None]:
# addition function wil perform element wise addition
tensor1 = tf.constant([1,2,3,4])
tensor2 = tf.constant([5,6,7,8])
tensor_sum = tf.add(tensor1, tensor2)
print(tensor_sum)
# division
print(tf.divide(tensor2, tensor1))
# there is multiplication as well

tf.Tensor([ 6  8 10 12], shape=(4,), dtype=int32)
tf.Tensor([5.         3.         2.33333333 2.        ], shape=(4,), dtype=float64)


In [None]:
# divide no Nan, which will turn infinity into 0
tensor1 = tf.constant([1,2,3,0])
tensor2 = tf.constant([5,6,7,8])
# regulare divide will return inf
print(tf.divide(tensor2, tensor1))
print(tf.math.divide_no_nan(tensor2, tensor1))

tf.Tensor([5.         3.         2.33333333        inf], shape=(4,), dtype=float64)
tf.Tensor([5.         3.         2.33333333 0.        ], shape=(4,), dtype=float64)


In [None]:
# when dividing shapes must be compatable
tensor3 = tf.constant([4])
tensor3_stretched = tf.constant([4,4,4,4])
print(tf.divide(tensor1, tensor3))
print(tf.divide(tensor1, tensor3_stretched))
tensor4 = tf.constant([2,4])
tensor5 = tf.constant([[2,1], [4,5]])
print(tf.divide(tensor5, tensor4))

tf.Tensor([0.25 0.5  0.75 0.  ], shape=(4,), dtype=float64)
tf.Tensor([0.25 0.5  0.75 0.  ], shape=(4,), dtype=float64)
tf.Tensor(
[[1.   0.25]
 [2.   1.25]], shape=(2, 2), dtype=float64)


In [None]:
# what is happening is tf is broadcasting the smaller tensor
# stretching it out in a way that makes sense
tensor = tf.constant([[3],[4],[5]])
tensor2 = tf.constant([1,1,1])
multiplied = tf.multiply(tensor2, tensor)
print(multiplied)
print(tensor.shape)
print(tensor2.shape)
print(multiplied.shape)

tf.Tensor(
[[3 3 3]
 [4 4 4]
 [5 5 5]], shape=(3, 3), dtype=int32)
(3, 1)
(3,)
(3, 3)


In [None]:
# maiximum, which will also broadcast
print(tf.math.maximum(tensor, tensor2))

tf.Tensor(
[[3 3 3]
 [4 4 4]
 [5 5 5]], shape=(3, 3), dtype=int32)


In [None]:
# argmax, return the index of the max argument along an axis
tensor = tf.constant([[ 96, -60,  24],
                      [ 91,  39, -67],
                      [-70, -97, -78],
                      [ 65,  64,  57]])
print(tensor.shape)
# this is not very intuitive
# it works a little backwards to how i would think of it
# perhaps think of it like 0 means we want a row that contains the index of max value in each row
# along columns:
argmax = tf.math.argmax(tensor, 0)
print(argmax)
# along rows:
argmax = tf.math.argmax(tensor, 1)
print(argmax)

(4, 3)
tf.Tensor([0 3 3], shape=(3,), dtype=int64)
tf.Tensor([0 0 0 0], shape=(4,), dtype=int64)


In [None]:
# when working on higher dimensional arrays, it gets a little more complicated
# for instance in a 3 dimensional array,
# arg max on 1 will give back a array with a array of max index in each row
tensor = tf.constant([[[2,32,2],
                       [9,2,20]],
                      [[1,1,1],
                       [21,1,9]],
                      [[1,1,1],
                       [21,1,90]]])
print(tensor.shape)
print(tf.argmax(tensor, 1))

(3, 2, 3)
tf.Tensor(
[[1 0 1]
 [1 0 1]
 [1 0 1]], shape=(3, 3), dtype=int64)


In [None]:
# equal method which checks if the values are the same element wise in each tensor
tensor1 = tf.constant([2,4])
tensor2 = tf.constant([2,5])
print(tf.math.equal(tensor1, tensor2))

tf.Tensor([ True False], shape=(2,), dtype=bool)


In [None]:
# raise each element in the tensor to a power element wise
print(tf.pow(tensor1, 2))
# works on tensors as well, raising each elemnt in t1 to each element at the same index in t2
print(tf.math.pow(tensor1, tensor2))

tf.Tensor([ 4 16], shape=(2,), dtype=int32)
tf.Tensor([   4 1024], shape=(2,), dtype=int32)


In [None]:
# reduce sum takes a tensor and a computes the sum of elements along an axis
print(tensor)
print(tf.reduce_sum(tensor, 0))
# if no axis, the sum of all elements is returned
print(tf.reduce_sum(tensor))

tf.Tensor(
[[[ 2 32  2]
  [ 9  2 20]]

 [[ 1  1  1]
  [21  1  9]]

 [[ 1  1  1]
  [21  1 90]]], shape=(3, 2, 3), dtype=int32)
tf.Tensor(
[[  4  34   4]
 [ 51   4 119]], shape=(2, 3), dtype=int32)
tf.Tensor(216, shape=(), dtype=int32)


In [68]:
# reduce max will return the actual max values
tensor = tf.constant([[ 96, -60,  24],
                      [ 91,  39, -67],
                      [-70, -97, -78],
                      [ 65,  64,  57]], dtype=tf.float32)
print(tf.math.reduce_max(tensor))
print(tf.math.reduce_max(tensor, 0))
# calculate the mean
print(tf.math.reduce_mean(tensor, 0))

tf.Tensor(96.0, shape=(), dtype=float32)
tf.Tensor([96. 64. 57.], shape=(3,), dtype=float32)
tf.Tensor([ 45.5 -13.5 -16. ], shape=(3,), dtype=float32)


In [71]:
# calculating the sigmoid function:
# sigmoid(x) = 1 / 1 + exp(-x)
tensor = tf.constant([0,0],dtype=tf.float32)
print(tf.math.sigmoid(tensor))

tf.Tensor([0.5 0.5], shape=(2,), dtype=float32)


In [73]:
# top k returns the top k values along an axis
# ot returns an arrays of values and indices
tensor = tf.constant([[1,2,3],
                      [5,6,7]])
print(tf.math.top_k(tensor, k=2))

TopKV2(values=<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[3, 2],
       [7, 6]], dtype=int32)>, indices=<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[2, 1],
       [2, 1]], dtype=int32)>)


In [77]:
# explores some linear algebra operation
# the first of them is math mul
# this is dufferent then tf.mul which was element wise
mat1 = tf.constant([[1,1,1],
                    [1,1,1]])
mat2 = tf.constant([[2,4,3],
                    [1,1,1]])
print(tf.linalg.matmul(mat1, mat2, transpose_b=True))
# there is also adjoint parameter, to tell linal.matmul if we are multiplying the adjoint

tf.Tensor(
[[9 3]
 [9 3]], shape=(2, 2), dtype=int32)


In [80]:
# another way to compute matrix mul is @
print(mat1@tf.transpose(mat2))

tf.Tensor(
[[9 3]
 [9 3]], shape=(2, 2), dtype=int32)


In [82]:
# how to multiply higher dimension matrices?
tensor1 = tf.constant([[[2,32],
                       [9,2]],
                      [[1,1],
                       [21,1]],
                      [[1,1],
                       [21,1]]])
tensor2 = tf.constant([[[2,32,2],
                       [9,2,20]],
                      [[1,1,1],
                       [21,1,9]],
                      [[1,1,1],
                       [21,1,90]]])
# when multipling higher dimension it goes batch wise
# i.e. the first matrix in the first tensor multiplies the first in the second
# and so forth
print(tf.linalg.matmul(tensor1, tensor2))

tf.Tensor(
[[[292 128 644]
  [ 36 292  58]]

 [[ 22   2  10]
  [ 42  22  30]]

 [[ 22   2  91]
  [ 42  22 111]]], shape=(3, 2, 3), dtype=int32)


In [84]:
# we can also broadcast as well
tensor1 = tf.constant([[[2,1],
                       [9,2]]])
print(tf.linalg.matmul(tensor1, tensor2))

tf.Tensor(
[[[ 13  66  24]
  [ 36 292  58]]

 [[ 23   3  11]
  [ 51  11  27]]

 [[ 23   3  92]
  [ 51  11 189]]], shape=(3, 2, 3), dtype=int32)


In [None]:
# if your matrix is sparse, specify that in the argument
# this will allow for optimization
# you can also calculate the adjoint very easily
# band part, which is a indicator funtion that works elemtn wise
# S.T. if the condition is not meant, the elemnt is zerod
#  condition: (lower < 0 or m-n <= lower) and (upper < 0 or n-m <= upper)
# where m is row and n is column and upper and lower are arguments
# this creates a band matrix
# special cases:
# Diagonal: upper,lower = 0,0
# Upper: upper,lower = 0,-1
# Lower: upper,lower = -1,0

In [5]:
tensor = tf.constant([[ 96, -60,  24],
                      [ 91,  39, -67],
                      [-70, -97, -78]], dtype=tf.float32)
print(tf.linalg.band_part(tensor, 0, 0))
print(tf.linalg.band_part(tensor, 0, -1))
print(tf.linalg.band_part(tensor, -1, 0))

tf.Tensor(
[[ 96.   0.   0.]
 [  0.  39.   0.]
 [  0.   0. -78.]], shape=(3, 3), dtype=float32)
tf.Tensor(
[[ 96. -60.  24.]
 [  0.  39. -67.]
 [  0.   0. -78.]], shape=(3, 3), dtype=float32)
tf.Tensor(
[[ 96.   0.   0.]
 [ 91.  39.   0.]
 [-70. -97. -78.]], shape=(3, 3), dtype=float32)


In [None]:
# there are many important and interesting methods
# inverses, cross product, determinant, decompositions, trace etc..

In [8]:
# let us examine the einsum operator
# first let us look at regular matrix multiplication
import numpy as np
A = np.array([[2,6,5,2],
              [2,-2,2,3],
              [1,5,4,0]])
B = np.array([[2,9,0,3,0],
              [3,6,8,-2,2],
              [1,3,5,0,1],
              [3,0,2,0,5]])
# notice that A is of shape 3,4 and B 4,5
# TF we can multiply them
print(A.shape)
print(B.shape)
print('Matml C:')
print(np.matmul(A,B))

(3, 4)
(4, 5)
Matml C:
[[33 69 77 -6 27]
 [ 9 12  0 10 13]
 [21 51 60 -7 14]]


In [9]:
# how can we replace this with einsum?
print('Einsum C')
# here we tell the operator that we are taking a matrix of shape ij (3,4)
# and another of shape jk (4,5), and we want an output of ik (3,5)
print(np.einsum("ij,jk -> ik", A,B))
# the result is the same, however, the einsum operator can be much easier to handle

Einsum C
[[33 69 77 -6 27]
 [ 9 12  0 10 13]
 [21 51 60 -7 14]]


In [13]:
# we can alos use the einsum operator to perform element wise multiplication
# the matrices must be the same size for this
A = np.array([[2,6,5,2],
              [2,-2,2,3],
              [1,5,4,0]])
B = np.array([[2,9,0,3],
              [3,6,8,-2],
              [1,3,5,0]])
# notice that A is of shape 3,4 and B 4,5
# TF we can multiply them
print(A.shape)
print(B.shape)
print('Hardamond C:')
print(np.einsum("ij,ij -> ij", A,B))
print('notice it is the same as element wise multiplication')
print(A*B)

(3, 4)
(3, 4)
Hardamond C:
[[  4  54   0   6]
 [  6 -12  16  -6]
 [  1  15  20   0]]
notice it is the same as element wise multiplication|
[[  4  54   0   6]
 [  6 -12  16  -6]
 [  1  15  20   0]]


In [15]:
# transpose with einsum
A = np.array([[2,6,5,2],
              [2,-2,2,3],
              [1,5,4,0]])
print(np.einsum("ij->ji", A))
# the einsum is the same in tf

[[ 2  2  1]
 [ 6 -2  5]
 [ 5  2  4]
 [ 2  3  0]]


In [19]:
# working with 3D arrays
# usually, high dimension multiplication is done 'element wise'
# for instance, if you have a (2,3,4) array, and a (2,4,3) array,
# the first element in the first array will mutliply the first in the second
# and the second element in the second element will multiple the second in the second
# remeber that broadcasting can be done
A = tf.random.uniform([2,3,4], minval=0, maxval=20, dtype=tf.int32)
print(A)
B = tf.random.uniform([2,4,4], minval=0, maxval=20, dtype=tf.int32)
print(B)
print(A.shape, B.shape)

tf.Tensor(
[[[ 8  6  4  6]
  [ 3 14 19  4]
  [ 2 14  9  0]]

 [[10 10  3  8]
  [19  0 17  0]
  [ 7 18  1 18]]], shape=(2, 3, 4), dtype=int32)
tf.Tensor(
[[[ 6 16  5 14]
  [14 11 17 11]
  [18  2 18  5]
  [12 12 19 14]]

 [[ 3  6 17 16]
  [ 5 11 16 15]
  [19 19 10  6]
  [16 11  0 16]]], shape=(2, 4, 4), dtype=int32)
(2, 3, 4) (2, 4, 4)


In [20]:
# regular matrix multiplication and einsum are again the same
print('Batch multiplication')
print(np.matmul(A,B))
print('einsum')
print(np.einsum('bij,bjk -> bik',A,B))

Batch multiplication
[[[276 274 328 282]
  [604 288 671 347]
  [370 204 410 227]]

 [[265 315 360 456]
  [380 437 493 406]
  [418 457 417 676]]]
einsum
[[[276 274 328 282]
  [604 288 671 347]
  [370 204 410 227]]

 [[265 315 360 456]
  [380 437 493 406]
  [418 457 417 676]]]


In [23]:
# using eisum to sum:
A = np.array([[2,6,5,2],
              [2,-2,2,3],
              [1,5,4,0]])
print(np.sum(A))
print(np.einsum("ij->", A))

30
30


In [28]:
# sum along an axis:
# along columns:
print(np.sum(A,axis=0))
print(np.einsum("ik -> k",A))
# along rows
print(np.sum(A,axis=1))
print(np.einsum("ik -> i",A))

[ 5  9 11  5]
[ 5  9 11  5]
[15  5 10]
[15  5 10]


In [30]:
# let us take a real like exampe
# imagine a query and key,
# with query of size batchsize, s_q, and modelsize bqm
# with key of size batchsize, s_k, and modelsize bkm
Q = np.random.randn(32, 64, 512)
K = np.random.randn(32, 128, 512)

In [32]:
np.einsum('bqm,bkm -> bqk', Q,K).shape

(32, 64, 128)