<a href="https://colab.research.google.com/github/prikshit-2000/Tensorflow/blob/main/GPU_Multi_Mirrored_Strategy_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import tensorflow as tf
import os
import tensorflow_datasets as tfds
import numpy as np

In [4]:
os.environ['TF_MIN_GPU_MULTIPROCESSOR_COUNT'] = '4'
strategy = tf.distribute.MirroredStrategy(cross_device_ops=tf.distribute.HierarchicalCopyAllReduce())
print("Num of devices : {}".format(strategy.num_replicas_in_sync))

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:GPU:0',)
Num of devices : 1


In [6]:
# Get the data
fashion_mnist = tf.keras.datasets.fashion_mnist
(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

# Adding a dimension to the array -> new shape == (28, 28, 1)
# We are doing this because the first layer in our model is a convolutional
# layer and it requires a 4D input (batch_size, height, width, channels).
# batch_size dimension will be added later on.
train_images = train_images[..., None]
test_images = test_images[..., None]

# Normalize the images to [0, 1] range.
train_images = train_images / np.float32(255)
test_images = test_images / np.float32(255)

# Batch the input data
BUFFER_SIZE = len(train_images)
BATCH_SIZE_PER_REPLICA = 64
GLOBAL_BATCH_SIZE = BATCH_SIZE_PER_REPLICA * strategy.num_replicas_in_sync

# Create Datasets from the batches
train_dataset = tf.data.Dataset.from_tensor_slices((train_images, train_labels)).shuffle(BUFFER_SIZE).batch(GLOBAL_BATCH_SIZE)
test_dataset = tf.data.Dataset.from_tensor_slices((test_images, test_labels)).batch(GLOBAL_BATCH_SIZE)

# Create Distributed Datasets from the datasets
train_dist_dataset = strategy.experimental_distribute_dataset(train_dataset)
test_dist_dataset = strategy.experimental_distribute_dataset(test_dataset)

In [7]:
# Create the model architecture
def create_model():
  model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(32, 3, activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Conv2D(64, 3, activation='relu'),
      tf.keras.layers.MaxPooling2D(),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(64, activation='relu'),
      tf.keras.layers.Dense(10)
    ])
  return model

In [8]:
with strategy.scope():
  loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True,reduction = tf.keras.losses.Reduction.NONE)

  def compute_loss(labels,predictions):

    per_example_loss = loss_object(labels,predictions)

    print(per_example_loss)
    return tf.nn.compute_average_loss(per_example_loss , global_batch_size=GLOBAL_BATCH_SIZE)

  test_loss = tf.keras.metrics.Mean(name = 'test_loss')

  train_accuracy = tf.keras.metrics.SparseTopKCategoricalAccuracy()
  test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()
  optimizer = tf.keras.optimizers.Adam()

  model  = create_model()
  

  

INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Reduce to /job:localhost/replica:0/task:0/device:CPU:0 then broadcast to ('/job:localhost/replica:0/task:0/device:CPU:0',).
INFO:tensorflow:Redu

In [21]:
@tf.function
def distributed_train_step(dataset_inputs):
  per_replica_loss = strategy.run(train_step , args = (dataset_inputs , ))
  return strategy.reduce(tf.distribute.ReduceOp.SUM,per_replica_loss,axis = None)


def train_step(inputs):
  images,labels = inputs
  with tf.GradientTape() as tape:
    predictions = model(images , training = True)
    loss = compute_loss(labels,predictions)

  grads = tape.gradient(loss, model.trainable_variables)

  optimizer.apply_gradients(zip(grads, model.trainable_variables))

  train_accuracy.update_state(labels,predictions)
  return loss


@tf.function

def distributed_test_step(dataset_inputs):
  return strategy.run(test_step , args = (dataset_inputs , ))

def test_step(inputs):
  images,labels = inputs
  predictions = model(images, training=False)
  t_loss = loss_object(labels, predictions)

  test_loss.update_state(t_loss)
  test_accuracy.update_state(labels, predictions)

In [22]:
EPOCHS = 10
for epoch in range(EPOCHS):
  # Do Training
  total_loss = 0.0
  num_batches = 0
  for batch in train_dist_dataset:
    total_loss += distributed_train_step(batch)
    num_batches += 1
  train_loss = total_loss / num_batches

  # Do Testing
  for batch in test_dist_dataset:
    distributed_test_step(batch)

  template = ("Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, " "Test Accuracy: {}")

  print (template.format(epoch+1, train_loss, train_accuracy.result()*100, test_loss.result(), test_accuracy.result()*100))

  test_loss.reset_states()
  train_accuracy.reset_states()
  test_accuracy.reset_states()

Tensor("sparse_categorical_crossentropy/weighted_loss/Mul:0", shape=(64,), dtype=float32, device=/job:localhost/replica:0/task:0/device:GPU:0)
Tensor("sparse_categorical_crossentropy/weighted_loss/Mul:0", shape=(32,), dtype=float32, device=/job:localhost/replica:0/task:0/device:GPU:0)
Epoch 1, Loss: 0.2855414152145386, Accuracy: 99.64778137207031, Test Loss: 0.30135786533355713, Test Accuracy: 89.25
Epoch 2, Loss: 0.2560100257396698, Accuracy: 99.90333557128906, Test Loss: 0.30213630199432373, Test Accuracy: 89.11000061035156
Epoch 3, Loss: 0.23390589654445648, Accuracy: 99.94166564941406, Test Loss: 0.2914637625217438, Test Accuracy: 89.45000457763672
Epoch 4, Loss: 0.21510154008865356, Accuracy: 99.95166778564453, Test Loss: 0.2709631621837616, Test Accuracy: 90.34000396728516
Epoch 5, Loss: 0.19550323486328125, Accuracy: 99.961669921875, Test Loss: 0.2542589604854584, Test Accuracy: 91.16999816894531
Epoch 6, Loss: 0.17982305586338043, Accuracy: 99.97000122070312, Test Loss: 0.25812