In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers

# Set the GPU device to use (optional)
# Uncomment and modify the following line accordingly:
# tf.config.set_visible_devices(tf.config.list_physical_devices('GPU')[<GPU_ID>], 'GPU')

# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# Define the model architecture
model = tf.keras.Sequential()
model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
model.fit(x_train, y_train, epochs=10, batch_size=32)

# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test)
print('Test accuracy:', test_acc)

# Save the trained model
model.save('my_model.h5')


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test accuracy: 0.98580002784729


**Distributed Computing in TensorFlow:**

tf.distribute.experimental.MultiWorkerMirroredStrategy. This strategy is suitable for training models on multiple machines with multiple GPUs.

Next, we load and preprocess the MNIST dataset as before. Then, we create a TensorFlow tf.data.Dataset object from the training data and batch it.

Within the strategy.scope(), we define the model architecture and compile the model. The strategy.scope() ensures that the model and optimizer are replicated across the available workers.

We fit the model to the training dataset using the distributed strategy by passing in the train_dataset directly to the model.fit() function.

Finally, we save the trained model for future use.

To run this code with distributed computing, you'll need to set up a TensorFlow distributed cluster with multiple workers. Each worker should have access to GPUs for improved performance. The TensorFlow cluster configuration and setup depend on your specific infrastructure and deployment environment.

Please note that the example above assumes a distributed setup with multiple workers, and the code may need further modifications to match your specific distributed environment.

Remember to install the necessary dependencies and ensure that TensorFlow and the required packages are correctly set up before running the code.

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers

# Define the distributed strategy
strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()

# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# Distribute the training data
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(64)

# Define the model architecture
with strategy.scope():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

# Train the model with distributed strategy
model.fit(train_dataset, epochs=10)

# Save the trained model
model.save('distributed_model.h5')




ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/IPython/core/interactiveshell.py", line 3553, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-5-eaca38ed509b>", line 31, in <cell line: 31>
    model.fit(train_dataset, epochs=10)
  File "/usr/local/lib/python3.10/dist-packages/keras/utils/traceback_utils.py", line 65, in error_handler
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/keras/engine/training.py", line 1685, in fit
    tmp_logs = self.train_function(iterator)
  File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/util/traceback_utils.py", line 150, in error_handler
    return fn(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_function/polymorphic_function.py", line 894, in __call__
    result = self._call(*args, **kwds)
  File "/usr/local/lib/python3.10/dist-packages/tensorflow/python/eager/polymorphic_functi

TypeError: ignored

Example of tf.distribute

In this example, we first define the distributed strategy using tf.distribute.MirroredStrategy(). This strategy is suitable for training models on multiple GPUs within a single machine.

Next, we load and preprocess the MNIST dataset as before. Then, we create a TensorFlow tf.data.Dataset object from the training data and batch it.

Within the strategy.scope(), we define the model architecture and compile the model. The strategy.scope() ensures that the model and optimizer are replicated across the available GPUs.

We fit the model to the training dataset using the distributed strategy by passing in the train_dataset directly to the model.fit() function.

Finally, we save the trained model for future use.

To run this code with distributed computing on multiple GPUs, make sure you have multiple GPUs available on your machine. TensorFlow will automatically assign each GPU to a separate replica of the model during training.

Please note that the example above assumes a single machine with multiple GPUs, and the code may need further modifications to match your specific distributed environment.

Ensure that you have the necessary dependencies installed, including TensorFlow and the required packages, and that your GPUs are properly configured before running the code.

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers

# Define the distributed strategy
strategy = tf.distribute.MirroredStrategy()

# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# Distribute the training data
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(64)

# Define the model architecture
with strategy.scope():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

# Train the model with distributed strategy
model.fit(train_dataset, epochs=1)

# Save the trained model
model.save('distributed_model.h5')


Epoch 9/10
Epoch 10/10


**Distributed training across multiple CPUs**


In this example, we use tf.distribute.OneDeviceStrategy("CPU:0") to define the distributed strategy, specifying that we want to distribute the training across a single CPU.

Next, we load and preprocess the MNIST dataset as before. Then, we create a TensorFlow tf.data.Dataset object from the training data and batch it.

Within the strategy.scope(), we define the model architecture and compile the model. The strategy.scope() ensures that the model and optimizer are placed on the specified CPU.

We fit the model to the training dataset using the distributed strategy by passing in the train_dataset directly to the model.fit() function.

Finally, we save the trained model for future use.

To run this code with distributed training across multiple CPUs, make sure you have multiple CPU cores available on your machine. TensorFlow will automatically distribute the training across the specified CPUs.

Please note that the example above assumes a single machine with multiple CPUs, and the code may need further modifications to match your specific distributed environment.

Ensure that you have the necessary dependencies installed, including TensorFlow and the required packages, and that your CPUs are properly configured before running the code.

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers

# Define the distributed strategy
strategy = tf.distribute.OneDeviceStrategy("CPU:0")

# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# Distribute the training data
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(64)

# Define the model architecture
with strategy.scope():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

# Train the model with distributed strategy
model.fit(train_dataset, epochs=1)

# Save the trained model
model.save('distributed_model.h5')


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10



**Distributed training across multiple GPUs**

In this example, we use tf.distribute.MirroredStrategy() to define the distributed strategy, which will distribute the training across multiple GPUs.

Next, we load and preprocess the MNIST dataset as before. Then, we create a TensorFlow tf.data.Dataset object from the training data and batch it.

Within the strategy.scope(), we define the model architecture and compile the model. The strategy.scope() ensures that the model and optimizer are replicated across the available GPUs.

We fit the model to the training dataset using the distributed strategy by passing in the train_dataset directly to the model.fit() function.

Finally, we save the trained model for future use.

To run this code with distributed training across multiple GPUs, make sure you have multiple GPUs available on your machine. TensorFlow will automatically assign each GPU to a separate replica of the model during training.

Please note that the example above assumes a single machine with multiple GPUs, and the code may need further modifications to match your specific distributed environment.

Ensure that you have the necessary dependencies installed, including TensorFlow and the required packages, and that your GPUs are properly configured before running the code.

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from tensorflow.keras import layers

# Define the distributed strategy
strategy = tf.distribute.MirroredStrategy()

# Load and preprocess the MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# Distribute the training data
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(64)

# Define the model architecture
with strategy.scope():
    model = tf.keras.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(10, activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])

# Train the model with distributed strategy
model.fit(train_dataset, epochs=1)

# Save the trained model
model.save('distributed_model.h5')




