In [2]:
import tensorflow_datasets as tfds
import pydoop.hdfs as pydoop
from hops import hdfs
    
data_dir = hdfs.project_path()
tf_datasets = pydoop.path.abspath(data_dir + "TourData/")

datasets, info = tfds.load(name='mnist',
                                data_dir=tf_datasets, 
                                with_info=True,
                                as_supervised=True)

[1mDownloading and preparing dataset mnist/3.0.0 (download: 11.06 MiB, generated: Unknown size, total: 11.06 MiB) to hdfs://10.128.0.3:8020/Projects/demo_deep_learning_admin000/TourData/mnist/3.0.0...[0m

[1mDataset mnist downloaded and prepared to hdfs://10.128.0.3:8020/Projects/demo_deep_learning_admin000/TourData/mnist/3.0.0. Subsequent calls will reuse this data.[0m
Dl Completed...: 100%|##########| 4/4 [00:04<00:00,  1.15s/ file]

In [18]:
import tensorflow_datasets as tfds
import tensorflow as tf

from hops import hdfs
import pydoop.hdfs as pydoop

dataset_dir = hdfs.project_path()
data_dir = pydoop.path.abspath(dataset_dir + "TourData/mnist/3.0.0")

datasets, info = tfds.load(name='mnist',
                                with_info=True,
                                as_supervised=True,
                                download=True,
                                data_dir = data_dir)

In [19]:
#for elem in datasets['test']:
#  print(elem)

datasets_train = datasets['train'].batch(10)

it = iter(datasets_train)
print(next(it)[1].numpy().shape)

print(tf.convert_to_tensor([0, 1, 2]).numpy().shape)

(10,)
(3,)


In [27]:
data_dir = hdfs.project_path()
train_filenames = pydoop.path.abspath(data_dir + "TourData/mnist/train/df-mnist_train.tfrecord")
train_filenames = tf.io.gfile.glob(train_filenames + "/part-r-*")
validation_filenames = pydoop.path.abspath(data_dir + "TourData/mnist/validation/df-mnist_test.tfrecord")
validation_filenames = tf.io.gfile.glob(validation_filenames + "/part-r-*")

BATCH_SIZE = 32 
SHUFFLE_SIZE = BATCH_SIZE * 4

num_classes = 10
epochs = 3
kernel = 3
pool = 2
dropout = 0.5    

# Input image dimensions
img_rows, img_cols = 28, 28
input_shape = (img_rows, img_cols, 1)


def input_fn(filenames, BATCH_SIZE):
      
    
      def _parser(serialized_example):
            """Parses a single tf.Example into image and label tensors."""
            features = tf.io.parse_single_example(
                serialized_example,
                features={
                    'image_raw': tf.io.FixedLenFeature([img_rows * img_cols], tf.float32),                    
                    'label': tf.io.FixedLenFeature([], tf.int64),
                })
            
            image = features['image_raw']
            label = features['label']   
                
            return image, label
      
    
      def _normalize_img(image, label):
            """Normalizes images"""
            image = tf.cast(image, tf.float32) / 255 #* (1. / 255) - 0.5
            label = tf.cast(label, tf.int32)        
            return image, label

      def _reshape_img(image, label):
        image = tf.reshape(image, [28, 28, 1])
        label = label #tf.one_hot(label, num_classes)
        return image, label
        
        
      # Import MNIST data
      dataset = tf.data.TFRecordDataset(filenames)
        
      # Map the parser over dataset, and batch results by up to batch_size
      dataset = dataset.map(_parser, num_parallel_calls=tf.data.experimental.AUTOTUNE)

        
      dataset = dataset.map(
        _normalize_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)

      dataset = dataset.map(
        _reshape_img, num_parallel_calls=tf.data.experimental.AUTOTUNE)

      dataset = dataset.repeat(500*100)
      dataset = dataset.cache()
      dataset = dataset.shuffle(SHUFFLE_SIZE)
      dataset = dataset.batch(BATCH_SIZE)
      return dataset

In [28]:
val_data = input_fn(validation_filenames, BATCH_SIZE)    
it = iter(val_data)
print(next(it)[1].numpy().shape)

(32,)


In [9]:
def wrapper():
    import tensorflow_datasets as tfds
    import tensorflow as tf
    from hops import tensorboard
    from hops import hdfs
    import pydoop.hdfs as pydoop
    
    #strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
    strategy = tf.distribute.MirroredStrategy()
    
    BUFFER_SIZE = 10000
    BATCH_SIZE = 64
    
    import os
    
    dataset_dir = hdfs.project_path()
    data_dir = pydoop.path.abspath(dataset_dir + "TourData/mnist/3.0.0")

    def make_datasets_unbatched():
      # Scaling MNIST data from (0, 255] to (0., 1.]
      def scale(image, label):
        image = tf.cast(image, tf.float32)
        image /= 255
        return image, label

      datasets, info = tfds.load(name='mnist',
                                with_info=True,
                                as_supervised=True,
                                download=True,
                                data_dir = data_dir)

      return datasets['train'].map(scale).cache().shuffle(BUFFER_SIZE)

    train_datasets = make_datasets_unbatched().batch(BATCH_SIZE)
    
    print ("#############################")
    print (train_datasets)
    print ("#############################")
    
    def build_and_compile_cnn_model():
      model = tf.keras.Sequential([
          tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28, 28, 1)),
          tf.keras.layers.MaxPooling2D(),
          tf.keras.layers.Flatten(),
          tf.keras.layers.Dense(64, activation='relu'),
          tf.keras.layers.Dense(10)
      ])
      model.compile(
          loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
          optimizer=tf.keras.optimizers.SGD(learning_rate=0.001),
          metrics=['accuracy'])
      return model
    
    NUM_WORKERS = 3
    # Here the batch size scales up by number of workers since 
    # `tf.data.Dataset.batch` expects the global batch size. Previously we used 64, 
    # and now this becomes 128.
    GLOBAL_BATCH_SIZE = 64 * NUM_WORKERS

    # Creation of dataset needs to be after MultiWorkerMirroredStrategy object
    # is instantiated.
    train_datasets = make_datasets_unbatched().batch(GLOBAL_BATCH_SIZE)
    with strategy.scope():
      # Model building/compiling need to be within `strategy.scope()`.
      multi_worker_model = build_and_compile_cnn_model()

    # Keras' `model.fit()` trains the model with specified number of epochs and
    # number of steps per epoch. Note that the numbers here are for demonstration
    # purposes only and may not sufficiently produce a model with good quality.
    tb_callback = tf.keras.callbacks.TensorBoard(log_dir=tensorboard.logdir())
    model_callback = tf.keras.callbacks.ModelCheckpoint(tensorboard.logdir())

    multi_worker_model.fit(x=train_datasets, epochs=150, steps_per_epoch=50, callbacks=[tb_callback, model_callback])

In [10]:
from hops import experiment
#experiment.mirrored(wrapper, evaluator=True)
experiment.mirrored(wrapper)

Finished Experiment 

('hdfs://10.128.0.3:8020/Projects/demo_deep_learning_admin000/Experiments/application_1586507110993_0012_5', {'log': 'Experiments/application_1586507110993_0012_5/chief_0_output.log'})