In [1]:
from enum import Enum
import numpy as np
import os
import time

import tensorflow as tf
import tensorflow.keras as keras

from sklearn.model_selection import train_test_split

%load_ext tensorboard

2022-07-13 20:47:17.589017: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
tf.config.list_physical_devices("GPU")

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
tf.config.list_logical_devices("GPU")

2022-07-13 20:47:18.908961: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


[LogicalDevice(name='/device:GPU:0', device_type='GPU')]

2022-07-13 20:47:19.326109: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1532] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22307 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:68:00.0, compute capability: 8.6


In [4]:
tf.config.threading.get_inter_op_parallelism_threads()

0

In [5]:
tf.config.threading.get_intra_op_parallelism_threads()

0

In [6]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=5000)

In [8]:
print(X_train.dtype)
print(y_train.dtype)

uint8
uint8


In [9]:
class SplitType(Enum):
  train = 1
  validation = 2

def base_path():
  return os.path.join(os.curdir, "fashion_mnist_dataset")

def filepath(split_type, chunk):
  return os.path.join(base_path(), f"fashion_mnist_{split_type.name}_{chunk}.tfrecord")

def preprocess_encoded_example(record_bytes):
  parsed = tf.io.parse_single_example(
      record_bytes,  
      {
        "image": tf.io.FixedLenFeature([], tf.string, default_value=""),
        "label": tf.io.FixedLenFeature([], tf.string, default_value=""),
      },
    )
  x, y = tf.ensure_shape(tf.io.parse_tensor(parsed["image"], tf.uint8), (28, 28)), tf.ensure_shape(tf.io.parse_tensor(parsed["label"], tf.uint8), ())
  # x, y = tf.cast(x, dtype=tf.float32), tf.cast(y, dtype=tf.float32)
  y = tf.cast(y, dtype=tf.float32)
  return x, y

def to_example(x, y):
  serialized_image = tf.io.serialize_tensor(tf.constant(x))
  serialized_label = tf.io.serialize_tensor(tf.constant(y))
  return tf.train.Example(
      features=tf.train.Features(
        feature={
            "image": tf.train.Feature(bytes_list=tf.train.BytesList(value=[serialized_image.numpy()])),
            "label": tf.train.Feature(bytes_list=tf.train.BytesList(value=[serialized_label.numpy()]))
        }         
      )
    )

def write_np_array(X, y, split_type, split_size=10000):
  n = len(X)
  assert n == len(y)
  os.makedirs(base_path(), exist_ok=True)
  for chunk, offset in enumerate(range(0, len(X), split_size)):
    begin, end = offset, min(offset + split_size, n)
    X_chunk, y_chunk = X[begin:end], y[begin:end]
    fname = filepath(split_type, chunk)
    with tf.io.TFRecordWriter(fname) as f:
      for features, label in zip(X_chunk, y_chunk):
        example = to_example(features, label)
        f.write(example.SerializeToString())

def read_tf_dataset(split_type, repeat=1, n_reader_threads=16, read_buffer_bytes=20*(2**20), shuffle_buffer_size=20*(2**20), n_parse_threads=16, batch_size=32):
  b = base_path()
  paths = [
           os.path.join(b, p) for p in os.listdir(b) 
           if split_type.name in p and ".tfrecord" in p
          ]
  return tf.data.TFRecordDataset(paths, num_parallel_reads=n_reader_threads, buffer_size=read_buffer_bytes) \
    .shuffle(shuffle_buffer_size, reshuffle_each_iteration=True) \
    .repeat(repeat) \
    .map(preprocess_encoded_example, num_parallel_calls=n_parse_threads) \
    .batch(batch_size) \
    .prefetch(tf.data.AUTOTUNE)

In [10]:
write_np_array(X_train, y_train, SplitType.train)
write_np_array(X_val, y_val, SplitType.validation)

In [11]:
print("\n".join(sorted(p for p in os.listdir(base_path()) if ".tfrecord" in p)))

fashion_mnist_train_0.tfrecord
fashion_mnist_train_1.tfrecord
fashion_mnist_train_2.tfrecord
fashion_mnist_train_3.tfrecord
fashion_mnist_train_4.tfrecord
fashion_mnist_train_5.tfrecord
fashion_mnist_validation_0.tfrecord


In [12]:
train_dataset = read_tf_dataset(SplitType.train)
val_dataset = read_tf_dataset(SplitType.validation)

for (x, y) in train_dataset.take(1):
  print("x", x.shape)
  print("y", y.shape)
  print(np.min(x), np.max(x), np.mean(x), np.std(x))

for (x, y) in val_dataset.take(1):
  print("x", x.shape)
  print("y", y.shape)
  print(np.min(x), np.max(x), np.mean(x), np.std(x))

x (32, 28, 28)
y (32,)
0 255 77.888671875 94.53973029944581
x (32, 28, 28)
y (32,)
0 255 72.70244738520408 92.46514006533015


In [13]:
def base_log_dir():
  return os.path.join(os.curdir, ".tflogs")

def get_tensorboard_cb(profile_batch=0):
    base_dir = base_log_dir()
    run_id = time.strftime("run_%Y_%m_%d_%H_%M_%S")
    run_dir = os.path.join(base_dir, run_id)
    file_writer = tf.summary.create_file_writer(run_dir)
    file_writer.set_as_default()
    return keras.callbacks.TensorBoard(run_dir, profile_batch=profile_batch) 

In [14]:
train_dataset = read_tf_dataset(SplitType.train)
val_dataset = read_tf_dataset(SplitType.validation)

model = keras.models.Sequential(
    [
     keras.layers.Normalization(name="normalization", axis=None),
     keras.layers.Flatten(input_shape=X_train.shape[1:], name="flatten"),
     keras.layers.Dense(300, activation="relu", name="dense1"),
     keras.layers.Dense(100, activation="relu", name="dense2"),
     keras.layers.Dense(10, activation="softmax", name="dense3"),
    ]
)

normalization_layer = model.get_layer("normalization")

normalization_layer.adapt(train_dataset.take(100).map(lambda X, _: X))

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Nadam(),
    metrics=["accuracy"],
)


model.fit(
    x=train_dataset,
    callbacks=[get_tensorboard_cb(profile_batch="100, 110")],
    validation_data=val_dataset,
    epochs=1,
)

2022-07-13 20:47:38.775522: I tensorflow/core/profiler/lib/profiler_session.cc:99] Profiler session initializing.
2022-07-13 20:47:38.775541: I tensorflow/core/profiler/lib/profiler_session.cc:114] Profiler session started.
2022-07-13 20:47:38.775563: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1665] Profiler found 1 GPUs
2022-07-13 20:47:38.934822: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session tear down.
2022-07-13 20:47:38.937266: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1799] CUPTI activity buffer flushed


     31/Unknown - 2s 5ms/step - loss: 1.0185 - accuracy: 0.6613 

2022-07-13 20:47:40.946696: I tensorflow/stream_executor/cuda/cuda_blas.cc:1786] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


     98/Unknown - 2s 6ms/step - loss: 0.7712 - accuracy: 0.7347

2022-07-13 20:47:41.563567: I tensorflow/core/profiler/lib/profiler_session.cc:99] Profiler session initializing.
2022-07-13 20:47:41.563621: I tensorflow/core/profiler/lib/profiler_session.cc:114] Profiler session started.


    108/Unknown - 3s 9ms/step - loss: 0.7552 - accuracy: 0.7396

2022-07-13 20:47:41.943045: I tensorflow/core/profiler/lib/profiler_session.cc:66] Profiler session collecting data.
2022-07-13 20:47:41.946428: I tensorflow/core/profiler/internal/gpu/cupti_tracer.cc:1799] CUPTI activity buffer flushed
2022-07-13 20:47:42.019737: I tensorflow/core/profiler/internal/gpu/cupti_collector.cc:521]  GpuTracer has collected 2208 callback api events and 2177 activity events. 
2022-07-13 20:47:42.054892: I tensorflow/core/profiler/lib/profiler_session.cc:126] Profiler session tear down.
2022-07-13 20:47:42.087294: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./.tflogs/run_2022_07_13_20_47_38/plugins/profile/2022_07_13_20_47_42

2022-07-13 20:47:42.115833: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for trace.json.gz to ./.tflogs/run_2022_07_13_20_47_38/plugins/profile/2022_07_13_20_47_42/desktop.trace.json.gz


    140/Unknown - 3s 10ms/step - loss: 0.7247 - accuracy: 0.7440

2022-07-13 20:47:42.152245: I tensorflow/core/profiler/rpc/client/save_profile.cc:136] Creating directory: ./.tflogs/run_2022_07_13_20_47_38/plugins/profile/2022_07_13_20_47_42

2022-07-13 20:47:42.156985: I tensorflow/core/profiler/rpc/client/save_profile.cc:142] Dumped gzipped tool data for memory_profile.json.gz to ./.tflogs/run_2022_07_13_20_47_38/plugins/profile/2022_07_13_20_47_42/desktop.memory_profile.json.gz
2022-07-13 20:47:42.157716: I tensorflow/core/profiler/rpc/client/capture_profile.cc:251] Creating directory: ./.tflogs/run_2022_07_13_20_47_38/plugins/profile/2022_07_13_20_47_42
Dumped tool data for xplane.pb to ./.tflogs/run_2022_07_13_20_47_38/plugins/profile/2022_07_13_20_47_42/desktop.xplane.pb
Dumped tool data for overview_page.pb to ./.tflogs/run_2022_07_13_20_47_38/plugins/profile/2022_07_13_20_47_42/desktop.overview_page.pb
Dumped tool data for input_pipeline.pb to ./.tflogs/run_2022_07_13_20_47_38/plugins/profile/2022_07_13_20_47_42/desktop.input_pipeline.pb
Dum



<keras.callbacks.History at 0x7fe12e5144c0>