# Tensorflow cheat sheet 

In [2]:
import tensorflow as tf




In [3]:
# Check the tf version
tf.__version__

'2.15.0'

In [None]:
constant = tf.constant(43)
#### Once initialized we cant change the values of the constants 
constant_matrix = tf.constant([[1,2,3],[4,5,6]])  # constant_matrix = tf.constant([[1,2,3],[4,5,6]],dtype=tf.float32)
constant_matrix.shape
constant_matrix[0][0]

tf.ones(shape=(2,3))  # -1*tf.ones(shape=(2,3))  : multiply all the items by -1
tf.zeros(shape=(2,3))  ###  We can use these as placeholders to store weights 

tf.random.normal(shape=(2,2) , mean=0 , stddev=1.0)
tf.random.uniform(shape=(2,2) , minval=0 , maxval=20)


tf_var = tf.Variable([[2,3,4],[5,6,7]])  ## Here we cant change the values => tf_var[0,0].assign(100)
tf.Variable(43)
tensor = tf.Variable([[[10, 11, 12], [13, 15, 18]],[[10, 11, 12], [13, 15, 18]]])
tensor 
'''
output :
array([[[10, 11, 12],
        [13, 15, 18]],

       [[10, 11, 12],
        [13, 15, 18]]])>

'''

tensor[0,:,1:] 
'''
<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[11, 12],
       [15, 18]])>

'''

################   Reshaping
tensor = tf.Variable([[22,2,3],[4,5,6]])
tensor.shape
tf.reshape(tensor,[3,2])  ## Transpose
tf.reshape(tensor ,[6,1])

########## squared the values 
# tf.square(var1)


########## Rank of tensors (no of dimensions ) 
tensor = tf.Variable([[22,2,3],[4,5,6]])
tf.rank(tensor)  # output :  <tf.Tensor: shape=(), dtype=int32, numpy=2>


######## Basic Operations 
const1 = tf.constant([[1,2,3],[4,5,6]],dtype=tf.float32)
const2 = tf.constant([[2,4,5],[9,8,2]],dtype=tf.float32)
tf.add(const1,const2)
tf.square(const1)
tf.exp(const1)
const1*const2  ## Element wise multiplication


#################################################################################
##########################   Broadcasting #######################################
################################################################################
scalar = 4
scalar*tensor
scalar+tensor
scalar-tensor


################################################################################
##########################  Matrix multiplication  #############################
################################################################################
mat_u = tf.constant([[6,7,7]])
mat_v = tf.constant([[3,4,3]])
tf.transpose(mat_u)
mat_u.numpy()  # to convert to an numpy array
tf.matmul(mat_u , tf.transpose(mat_v))
tf.matmul(tf.transpose(mat_v), mat_u)
tf.transpose(mat_v) @ mat_u ### <<< Matrix mul
mat_u @ tf.transpose(mat_v) ### <<< Matrix mul
mat_u * mat_v # element wise multiplication


varx = [1,2,3,4,5,6]
vary = 2
tf.math.squared_difference(varx,vary)   ######### Squared difference 

num = tf.constant([[1,2] , [3,4]])
tf.reduce_mean(num) ## total mean
tf.reduce_mean(num , axis=1)
tf.reduce_mean(num , axis=0)
tf.reduce_max(num)
tf.reduce_min(num)
tf.reduce_sum(num)
tf.reduce_prod(num)


############################################################################
################  Ragged tensors (nested arrays with varying lengths ) ####
###########################################################################
ragged = tf.ragged.constant([[1,2,3,4,5],[1],[135,1]])
ragged[0]
ragged[1]
ragged[2]

#  CheckPointing ( restore matrix values ) 

In [24]:
var1 = tf.Variable(5*tf.ones((5,5)))
ckpt = tf.train.Checkpoint(var=var1)
savepath = ckpt.save('vars.ckpt')
var1

<tf.Variable 'Variable:0' shape=(5, 5) dtype=float32, numpy=
array([[5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.]], dtype=float32)>

In [25]:
var1.assign(tf.zeros((5,5)))

<tf.Variable 'UnreadVariable' shape=(5, 5) dtype=float32, numpy=
array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]], dtype=float32)>

In [26]:
ckpt.restore(savepath)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x235919a13d0>

In [27]:
var1

<tf.Variable 'Variable:0' shape=(5, 5) dtype=float32, numpy=
array([[5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.],
       [5., 5., 5., 5., 5.]], dtype=float32)>

# Tensorflow functions 

$ z = (x^3)*6 + y^3 $ 

In [31]:
@tf.function  ## tf decorator function
def f2(x,y):
    input_var = tf.multiply(x**3,6) + y**3
    return tf.reduce_mean(input_tensor = input_var)
## Decorator is a more of a like wrapper and it provides more functionalities to the function without 
## changing the definition

x = tf.constant([3.,-4.])
y = tf.constant([1.,4.])
f2(x,y)

<tf.Tensor: shape=(), dtype=float32, numpy=-78.5>

# Slicing 

# Gradients 

In [None]:
x = tf.random.normal(shape=(2,2))
y = tf.random.normal(shape=(2,2))

$ f(x,y) = \sqrt{x^2 + y^2 } $
<br>
$\nabla f(x,y) = \frac{\partial f}{\partial x}\hat{\imath} + \frac{\partial f}{\partial y}\hat{\jmath}$

In [None]:
#### Partial derivative of f with respect to x 
with tf.GradientTape() as tape:
  tape.watch(x) ### <<< I want to calculate grad wrt x
  f = tf.sqrt(tf.square(x) + tf.square(y))

  df_dx = tape.gradient(f, x)   #  partial derivative of f with respect to x 

  print(df_dx)



#### partial derivative of f with respect to y 
with tf.GradientTape() as tape:
  tape.watch(y) ### <<< I want to calculate grad wrt y
  f = tf.sqrt(tf.square(x) + tf.square(y))

  df_dy = tape.gradient(f, y)  #  partial derivative of f with respect to y

  print(df_dy)



######## Get both the partial derivatives at once 
with tf.GradientTape() as tape:
  tape.watch(y) ### <<< I want to calculate grad wrt y
  tape.watch(x) ### <<< I want to calculate grad wrt x
  f = tf.sqrt(tf.square(x) + tf.square(y))

  df_dx, df_dy = tape.gradient(f, [x, y]) 
    ## partial diff wrt x and y  , we can use this to find the gradient 

  print(df_dx)
  print(df_dy)


In [32]:
## tf.watch is only needed when X and Y are not variables.Because we have defined x and y using 
# tf.random.normal(shape=(2,2))  and it creates a constant . If we define the x and y in the following way
# we dont have to use tape.watch()
x= tf.Variable(tf.random.normal(shape=(2,2)))
y = tf.Variable(tf.random.normal(shape=(2,2)))

with tf.GradientTape() as tape:
  f = tf.sqrt(tf.square(x) + tf.square(y))

  df_dx, df_dy = tape.gradient(f, [x, y]) ## partial diff wrt x and y

  print(df_dx)
  print(df_dy)

tf.Tensor(
[[-0.8190673   0.22711253]
 [-0.9788309  -0.9413712 ]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[ 0.57369745  0.9738686 ]
 [-0.20467056  0.33737263]], shape=(2, 2), dtype=float32)


In [33]:
x/tf.sqrt(tf.square(x) + tf.square(y))

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-0.8190673 ,  0.22711253],
       [-0.97883093, -0.9413712 ]], dtype=float32)>

In [34]:
y/tf.sqrt(tf.square(x) + tf.square(y))

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[ 0.57369745,  0.97386855],
       [-0.20467058,  0.33737263]], dtype=float32)>

## Jarcobians 

In [38]:
x = tf.linspace(-10.0, 10.0, 5)
print('x :' , x )
delta = tf.Variable(0.0)

with tf.GradientTape() as tape:
  y = tf.nn.sigmoid(x+delta)    ##  This is our function . As earlier we used f here we use y to define our function 
  print('y :' , y)
    
dy_dx = tape.jacobian(y, delta)
print(dy_dx)

x : tf.Tensor([-10.  -5.   0.   5.  10.], shape=(5,), dtype=float32)
y : tf.Tensor([4.539787e-05 6.692851e-03 5.000000e-01 9.933072e-01 9.999546e-01], shape=(5,), dtype=float32)
tf.Tensor([4.5395806e-05 6.6480567e-03 2.5000000e-01 6.6480329e-03 4.5416677e-05], shape=(5,), dtype=float32)


# Simple linear regression

$ f(x) = W.x + b $

In [None]:
######  First lets create a data set 
TRUE_W = 3.0   # weights 
TRUE_B = 2.0   # bias 
## those are the exact answers for weights and bias 

NUM_EXAMPLES = 1000

x = tf.random.normal(shape=[NUM_EXAMPLES])

noise = tf.random.normal(shape=[NUM_EXAMPLES])
y = x * TRUE_W + TRUE_B + noise

* In Python, super() is a function that provides a way to access methods and properties from a parent or superclass within a subclass. It's commonly used in object-oriented programming (OOP) to call methods of the parent class in cases where the subclass overrides those methods. 
* Accessing Parent Class Methods: When a method is overridden in a subclass, you might still want to call the parent class's version of the method from within the subclass. super() allows you to do this.

In [None]:
'''
class Parent:
    def method(self):
        print("Parent method")

class Child(Parent):
    def method(self):
        super().method()  # Calls the method of the parent class
        print("Child method")

child = Child()
child.method()
''' 

In [None]:
class MyModel(tf.Module):
  def __init__(self, **kwargs):
    super().__init__(**kwargs)

    # initial weights
    self.w = tf.Variable(5.0)
    self.b = tf.Variable(0.0)

  def __call__(self, x):
    return self.w*x + self.b

In [None]:
model = MyModel()

In [None]:
########  Lets define a loss function 
def MSE_loss(target_y, predicted_y):
  error = target_y - predicted_y
  squared_error = tf.square(error)
  mse = tf.reduce_mean(squared_error)
  return mse

In [None]:
def train(model, x, y, learning_rate):

  with tf.GradientTape() as tape:
    current_loss = MSE_loss(y, model(x))

  dc_dw, dc_db = tape.gradient(current_loss, [model.w, model.b])

  model.w.assign_sub(learning_rate * dc_dw)
  #assign_sub is a TensorFlow operation used to update the value of a variable by 
  # subtracting another tensor from it. It is typically used in scenarios where you want
  # to perform in-place subtraction to update the value of a variable.
  model.b.assign_sub(learning_rate * dc_db)

In [None]:
model = MyModel()

Ws, bs = [], []

epochs = 10*2

learning_rate = 0.1

w = model.w.numpy()
b = model.b.numpy()

init_loss = MSE_loss(y, model(x)).numpy()

print(f"Initial W: {w}, initial bias: {b}, initial_loss: {init_loss}")

In [None]:
for epoch in range(epochs):
  train(model, x, y, learning_rate)

  Ws.append(model.w.numpy())
  bs.append(model.b.numpy())

  current_loss = MSE_loss(y, model(x))

  print(f"For epoch: {epoch}, W: {Ws[-1]}, b: {bs[-1]}, current_loss: {current_loss}")

In [None]:
plt.plot(range(epochs), Ws, 'r', range(epochs), bs, "b")

plt.plot([TRUE_W] * epochs, "r--", [TRUE_B] * epochs, "b--")

plt.legend(["W", "b", "True W", "True B"])

plt.show()


 # 3 by 3 Tensors

In [4]:
# Example tensors
tensor1 = tf.constant([[1, 2, 3],
                       [4, 5, 6],
                       [7, 8, 9]])

tensor2 = tf.constant([[9, 8, 7],
                       [6, 5, 4],
                       [3, 2, 1]])

# Compute dot product
dot_product = tf.tensordot(tensor1, tensor2, axes=1)
dot_product

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 30,  24,  18],
       [ 84,  69,  54],
       [138, 114,  90]])>

In [5]:
####################################     2D convolution   ##########################################
####################################################################################################
#####################################################################################################
#####################################################################################################


# Example input tensor (4D tensor: batch_size, height, width, channels)
input_tensor = tf.constant([
    [
        [[1.0], [2.0], [3.0]],
        [[4.0], [5.0], [6.0]],
        [[7.0], [8.0], [9.0]]
    ]
], dtype=tf.float32)

# Example filter/kernel tensor (4D tensor: height, width, input_channels, output_channels)
filter_tensor = tf.constant([
    [
        [[1.0]], 
        [[0.0]], 
        [[-1.0]]
    ],
    [
        [[1.0]], 
        [[0.0]], 
        [[-1.0]]
    ],
    [
        [[1.0]], 
        [[0.0]], 
        [[-1.0]]
    ]
], dtype=tf.float32)

# Perform convolution
convolution_result = tf.nn.conv2d(input=input_tensor, filters=filter_tensor, strides=[1, 1, 1, 1], padding='VALID')

In [6]:
###################################     3D convolution   ##########################################
####################################################################################################
#####################################################################################################
#####################################################################################################


# Example input tensor (5D tensor: batch_size, depth, height, width, channels)
input_tensor = tf.constant([[[[[1.0], [2.0], [3.0]],
                              [[4.0], [5.0], [6.0]],
                              [[7.0], [8.0], [9.0]]]]], dtype=tf.float32)

# Example filter/kernel tensor (5D tensor: depth, height, width, input_channels, output_channels)
filter_tensor = tf.constant([[[[[1.0]], 
                                [[0.0]], 
                                [[-1.0]]],

                               [[[1.0]], 
                                [[0.0]], 
                                [[-1.0]]],

                               [[[1.0]], 
                                [[0.0]], 
                                [[-1.0]]]]], dtype=tf.float32)

# Perform 3D convolution
convolution_result = tf.nn.conv3d(input=input_tensor, filters=filter_tensor, strides=[1, 1, 1, 1, 1], padding='VALID')
convolution_result


<tf.Tensor: shape=(1, 1, 1, 1, 1), dtype=float32, numpy=array([[[[[-6.]]]]], dtype=float32)>

In [7]:
####################################     Pooling   ##########################################
####################################################################################################
#####################################################################################################
#####################################################################################################
# Example input tensor
input_tensor = tf.constant([[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]])

# Perform max pooling
pooled_output = tf.nn.pool(input=input_tensor, window_shape=[2, 2], pooling_type='MAX', padding='VALID', strides=[2, 2])
pooled_output

<tf.Tensor: shape=(1, 1, 1, 2), dtype=float32, numpy=array([[[[7., 8.]]]], dtype=float32)>

In [None]:
####################################     padding    ##########################################
####################################################################################################
#####################################################################################################
#####################################################################################################
# Example input tensor
input_tensor = tf.constant([[[[1.0, 2.0], [3.0, 4.0]], [[5.0, 6.0], [7.0, 8.0]]]])

# Perform max pooling with padding
pooled_output = tf.nn.pool(input=input_tensor, window_shape=[2, 2], pooling_type='MAX', padding='SAME', strides=[2, 2])

In [None]:
####################################     Dilation   ##########################################
####################################################################################################
#####################################################################################################
#####################################################################################################
# Example input tensor
input_tensor = tf.constant([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]])

# Example filter/kernel tensor
filter_tensor = tf.constant([[[[1.0]], [[0.0]], [[-1.0]]]])

# Perform 2D convolution with dilation
convolution_result = tf.nn.conv2d(input=input_tensor, filters=filter_tensor, strides=[1, 1, 1, 1], padding='VALID', dilations=[1, 2, 2, 1])

# Additional 

### Ragged tensors

In [None]:
# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
tf.RaggedTensor.from_row_splits(
    values=[3, 1, 4, 1, 5, 9, 2, 6], row_splits=[0, 4, 4, 7, 8, 8]
)

# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
tf.RaggedTensor.from_row_lengths(
    values=[3, 1, 4, 1, 5, 9, 2, 6], row_lengths=[4, 0, 3, 1, 0]
)

# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
tf.RaggedTensor.from_value_rowids(
    values=[3, 1, 4, 1, 5, 9, 2, 6], value_rowids=[0, 0, 0, 0, 2, 2, 2, 3], nrows=5
)

# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
tf.RaggedTensor.from_row_starts(
    values=[3, 1, 4, 1, 5, 9, 2, 6], row_starts=[0, 4, 4, 7, 8]
)

# The tensor value is <tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], []]>
tf.RaggedTensor.from_row_limits(
    values=[3, 1, 4, 1, 5, 9, 2, 6], row_limits=[4, 4, 7, 8, 8]
)

# The tensor value is <tf.RaggedTensor [[3, 1], [4, 1], [5, 9], [2, 6]]>
tf.RaggedTensor.from_uniform_row_length(
    values=[3, 1, 4, 1, 5, 9, 2, 6], uniform_row_length=2
)

### Sparse tensors 

In [None]:
# Defines a sparse tensor representing the following dense tensor:
# [[1, 0, 0, 0]
#  [0, 0, 2, 0]
#  [0, 0, 0, 0]]
SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4])

### Variables 

In [None]:
# Create a variable.
v = tf.Variable(1.)

# Assign 2.0 to the variable.
v.assign(2.)

# Add 0.5 to the variable.
v.assign_add(0.5)

# Substract 0.5 from the variable.
v.assign_sub(0.5)

# Matmul a variable and a constant tensor.
w = tf.Variable([[1.], [2.]])
x = tf.constant([[3., 4.]])
tf.matmul(w, x)

# Variable can only be created once within a tf.function.
class M(tf.Module):
    @tf.function
    def __call__(self, x):
        if not hasattr(self, "v"):    # Or set self.v to None in __init__
            self.v = tf.Variable(x)
        return self.v * x

###  tf.data 

In [None]:
# Load dataset using range.
tf.data.Dataset.range(5)                               # [0, 1, 2, 3, 4]
tf.data.Dataset.range(2, 5)                            # [2, 3, 4]
tf.data.Dataset.range(1, 5, 2)                         # [1, 3]
tf.data.Dataset.range(1, 5, -2)                        # []
tf.data.Dataset.range(5, 1)                            # []
tf.data.Dataset.range(5, 1, -2)                        # [5, 3]
tf.data.Dataset.range(2, 5, output_type=tf.int32)      # [2, 3, 4]
tf.data.Dataset.range(1, 5, 2, output_type=tf.float32) # [1.0, 3.0]

# Load tf data from python array
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])

# Load dataset from txt files.
dataset = tf.data.TextLineDataset(["file1.txt", "file2.txt"])

# Load data from tfrecords files.
dataset = tf.data.TFRecordDataset(["file1.tfrecords", "file2.tfrecords"])

# Create a dataset using all files matching a pattern.
dataset = tf.data.Dataset.list_files("/path/*.txt")

# Split dataset into batches.
dataset = tf.data.Dataset.range(8)
dataset = dataset.batch(3)  # The dataset value is [[0, 1, 2], [3, 4, 5], [6, 7]]

# Transform a dataset.
dataset = dataset.map(lambda x: x*2)

# Prefetch a dataset.
dataset = tf.data.Dataset.range(3)
dataset = dataset.prefetch(2)

# Repeat a dataset.
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3])
dataset = dataset.repeat(3)  # [1, 2, 3, 1, 2, 3, 1, 2, 3]

# Shuttle a dataset.
dataset = tf.data.Dataset.range(3)
dataset = dataset.shuffle(3, reshuffle_each_iteration=False)
dataset = dataset.repeat(2)  # [1, 0, 2, 1, 0, 2]

# Concat a dataset.
a = tf.data.Dataset.range(1, 4) # [1, 2, 3]
b = tf.data.Dataset.range(4, 8) # [4, 5, 6, 7]
ds = a.concatenate(b)           # [1, 2, 3, 4, 5, 6, 7]

# Zip two datasets.
a = tf.data.Dataset.range(1, 4)  # [1, 2, 3]
b = tf.data.Dataset.range(4, 7)  # [4, 5, 6]
ds = tf.data.Dataset.zip((a, b)) # [(1, 4), (2, 5), (3, 6)]

# Iterating data in tf.data.
for element in dataset:
    print(element)

### tf.math 

In [None]:
# Get absolute values.
x = tf.constant([-2.25, 3.25])
tf.abs(x) # [2.25, 3.25]

# Add a scalar and a list.
tf.add([1, 2, 3, 4, 5], 1) # [2, 3, 4, 5, 6]

# Add two tensors.
x = tf.convert_to_tensor([1, 2, 3, 4, 5])
y = tf.convert_to_tensor(1)
z = x + y # [2, 3, 4, 5, 6]

# Add a list and a tensor.
x = [1, 2, 3, 4, 5]
y = tf.constant([1, 2, 3, 4, 5])
tf.add(x, y)

# Add n tensors.
a = tf.constant([[3, 5], [4, 8]])
b = tf.constant([[1, 6], [2, 9]])
tf.math.add_n([a, b, a]) # [[7, 16], [10, 25]]

# Get the cumulative sum.
x = tf.constant([2, 4, 6, 8])
tf.cumsum(x) # [2, 6, 12, 20]

# Get the cumulative sum for certain axis.
y = tf.constant([[2, 4, 6, 8], [1, 3, 5, 7]])
tf.cumsum(y, axis=0) # [[2, 4, 6, 8], [3, 7, 11, 15]]
tf.cumsum(y, axis=1) # [[2, 6, 12, 20], [1, 4, 9, 16]]

# Get the exclusive cumulative sum.
x = tf.constant([2, 4, 6, 8])
tf.cumsum(x, exclusive=True) # [0, 2, 6, 12]

# Get the reverse cumulative sum.
x = tf.constant([2, 4, 6, 8])
tf.cumsum(x, reverse=True) # [18, 14, 8, 0]

# Divide tensors.
x = tf.constant([16, 12, 11])
y = tf.constant([4, 6, 2])
tf.divide(x, y) # [4.0, 2.0, 5.5]

# Get tensor equals.
x = tf.constant([2, 4])
y = tf.constant(2)
tf.math.equal(x, y) # [True, False]

# Get tensor equals.
x = tf.constant([2, 4])
y = tf.constant([2, 4])
tf.math.equal(x, y) # [True, True]

# Multiply tensors.
x = tf.constant(([1, 2, 3, 4]))
tf.math.multiply(x, x) # [1, 4, 9, 16]

# Multiple tensors of different shapes with broadcast.
x = tf.ones([1, 2]);
y = tf.ones([2, 1]);
x * y  # [[1.0, 1.0], [1.0, 1.0]]

# Compute the power of one value to another.
x = tf.constant([[2, 2], [3, 3]])
y = tf.constant([[8, 16], [2, 3]])
tf.pow(x, y)  # [[256, 65536], [9, 27]]

# Compute sigmoid of a tensor.
x = tf.constant([0.0, 1.0, 50.0, 100.0])
tf.math.sigmoid(x) # [0.5, 0.7310586, 1.0, 1.0]

### tf.linalg

In [None]:
# Transpose a matrix.
x = tf.constant([[1, 2, 3], [4, 5, 6]])
tf.linalg.matrix_transpose(x)  # [[1, 4], [2, 5], [3, 6]]

# Matmul two tensors.
a = tf.constant([[1, 2, 3], [4, 5, 6]])
b = tf.constant([[7, 8], [9, 10], [11, 12]])
c = tf.matmul(a, b) # [[58, 64], [139, 154]]

### tf.distribute

In [None]:
################################################################################
# Define a mirrored strategy, and create a variable in it.
# The variable will be mirrored on both GPU:0 and GPU:1.
################################################################################
strategy = tf.distribute.MirroredStrategy(["GPU:0", "GPU:1"])
with strategy.scope():
    x = tf.Variable(1.)


################################################################################
# Variables (e.g., x in this example) created in tf.function is still mirrored.
################################################################################
x = []
@tf.function  # Wrap the function with tf.function.
def create_variable():
    if not x:
        x.append(tf.Variable(1.))
    return x[0]
strategy = tf.distribute.MirroredStrategy(["GPU:0", "GPU:1"])
with strategy.scope():
    _ = create_variable()


################################################################################
# Dataset can also be mirrored to multiple devices within the MirroredStrategy.
################################################################################
my_strategy = tf.distribute.MirroredStrategy()
with my_strategy.scope():
  @tf.function
  def distribute_train_epoch(dataset):
    def replica_fn(input):
      # process input and return result
      return result

    total_result = 0
    for x in dataset:
      per_replica_result = my_strategy.run(replica_fn, args=(x,))
      total_result += my_strategy.reduce(tf.distribute.ReduceOp.SUM,
                                         per_replica_result, axis=None)
    return total_result

  dist_dataset = my_strategy.experimental_distribute_dataset(dataset)
  for _ in range(EPOCHS):
    train_result = distribute_train_epoch(dist_dataset)


################################################################################
# MultiWorkerMirroredStrategy is used for distributed training.
################################################################################
strategy = tf.distribute.MultiWorkerMirroredStrategy()

@tf.function
def train_step(iterator):
    def step_fn(inputs):
        features, labels = inputs
        with tf.GradientTape() as tape:
            logits = model(features, training=True)
            loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits)

        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

    strategy.run(step_fn, args=(next(iterator),))

for _ in range(NUM_STEP):
    train_step(iterator)


################################################################################
# Use TPUStrategy to train a model on TPUs.
################################################################################
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
tf.tpu.experimental.initialize_tpu_system(resolver)
strategy = tf.distribute.TPUStrategy(resolver)

with strategy.scope():
    model = tf.keras.Sequential([tf.keras.layers.Dense(2, input_shape=(5,))])
    optimizer = tf.keras.optimizers.SGD(learning_rate=0.1)

def dataset_fn(ctx):
    x = np.random.random((2, 5)).astype(np.float32)
    y = np.random.randint(2, size=(2, 1))
    dataset = tf.data.Dataset.from_tensor_slices((x, y))
    return dataset.repeat().batch(1, drop_remainder=True)

dist_dataset = strategy.distribute_datasets_from_function(dataset_fn)
iterator = iter(dist_dataset)

@tf.function()
def train_step(iterator):
    def step_fn(inputs):
        features, labels = inputs
        with tf.GradientTape() as tape:
            logits = model(features, training=True)
            loss = tf.keras.losses.sparse_categorical_crossentropy(labels, logits)

        grads = tape.gradient(loss, model.trainable_variables)
        optimizer.apply_gradients(zip(grads, model.trainable_variables))

    strategy.run(step_fn, args=(next(iterator),))

train_step(iterator)