# Distributed Training On Tensorflow

In [11]:
import numpy as np
import tensorflow as tf
from tensorflow import keras

Load the dataset

In [3]:
(X_train,y_train),(X_test,y_test)=tf.keras.datasets.cifar10.load_data()

In [4]:
X_train.shape

(50000, 32, 32, 3)

In [5]:
tf.config.experimental.list_physical_devices()

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]

In [6]:
classes=['airplane','automobile','bird','cat','deer','dog','frog','horse','ship','truck']

In [7]:
y_train[:5]

array([[6],
       [9],
       [9],
       [4],
       [1]], dtype=uint8)

In [19]:
X_train_Scaled=X_train/255
X_test_Scaled=X_test/255

In [20]:
y_train_categorical=keras.utils.to_categorical(y_train,num_classes=10,dtype='float32')
y_test_categorical=keras.utils.to_categorical(y_test,num_classes=10,dtype='float32')

In [21]:
y_train_categorical[:5]

array([[0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.]], dtype=float32)

In [22]:
def get_model():
    model=keras.Sequential([
        keras.layers.Flatten(input_shape=(32,32,3)),
        keras.layers.Dense(3000, activation='relu'),
        keras.layers.Dense(1000, activation='relu'),
        keras.layers.Dense(10,   activation='sigmoid'),
    ])
    
    model.compile(optimizer='SGD',loss='categorical_crossentropy',metrics=['accuracy'])
    
    return model

In [24]:
#Create a datset Base on tensorflow
train_tf_dataset=tf.data.Dataset.from_tensor_slices((X_train_Scaled,y_train_categorical))
test_tf_dataset=tf.data.Dataset.from_tensor_slices((X_test_Scaled,y_test_categorical))

In [25]:
type(train_tf_dataset)

tensorflow.python.data.ops.from_tensor_slices_op.TensorSliceDataset

In [26]:
strategy=tf.distribute.MirroredStrategy()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)


In [27]:
strategy.num_replicas_in_sync

1

In [28]:
BATCH_SIZE_PER_REPLICA=250
BATCH_SIZE=BATCH_SIZE_PER_REPLICA*strategy.num_replicas_in_sync

In [29]:
train_dataset=train_tf_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [31]:
test_dataset=test_tf_dataset.batch(BATCH_SIZE)

In [32]:
#Run Distributed Training

In [None]:
%%timeit -n1 -r1
with strategy.scope():
    gpu_model=get_model()
    gpu_model.fit(train_dataset,epochs=50 )

In [33]:
#the total time for run by GPU and distributed Training is 47.6 S 

In [None]:
%%timeit -n1 -r1
with tf.device('/CPU:0'):
    cpu_model=get_model()
    cpu_model.fit(train_dataset,epochs=50 )

In [None]:
#the total time for run by CPU and is 47.6 S 1 minute and 57s