In [1]:
# !pip install tensorboard

# https://www.tensorflow.org/install/pip
# !pip install tensorflow-cpu

https://www.tensorflow.org/tensorboard/get_started

In [2]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

In [3]:
import tensorflow as tf
import datetime

In [12]:
import os 
log_dir = os.path.join('logs','gradient_tape', datetime.datetime.now().strftime('%Y%m%d-%H%M%S'))
print(log_dir)


!powershell rm -Force -R logs


os.makedirs(log_dir, exist_ok=True)
!powershell dir $log_dir

logs\gradient_tape\20200323-143745


In [13]:
mnist = tf.keras.datasets.mnist

(x_train,y_train),(x_test,y_test) = mnist.load_data()
x_train, x_test = x_train/255.0, x_test/255.0

def create_model():
    return tf.keras.Sequential([
        tf.keras.layers.Flatten(input_shape = (28,28)),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10,activation='softmax')
    ])

train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
test_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))

train_dataset = train_dataset.shuffle(60000).batch(64)
test_dataset = test_dataset.batch(64)

In [14]:
model = create_model()

loss_object = tf.keras.losses.SparseCategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam()

# Define our metrics
train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('train_accuracy')
test_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy('test_accuracy')

def train_step(model, optimizer, x_train, y_train):
  with tf.GradientTape() as tape:
    predictions = model(x_train, training=True)
    loss = loss_object(y_train, predictions)
  grads = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(grads, model.trainable_variables))

  train_loss(loss)
  train_accuracy(y_train, predictions)
    
def test_step(model, x_test, y_test):
  predictions = model(x_test)
  loss = loss_object(y_test, predictions)

  test_loss(loss)
  test_accuracy(y_test, predictions)

In [17]:
train_log_dir = os.path.join(log_dir,'train')
test_log_dir = os.path.join(log_dir,'test')
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)


model = create_model() # reset our model

EPOCHS = 5

for epoch in range(EPOCHS):
  for (x_train, y_train) in train_dataset:
    train_step(model, optimizer, x_train, y_train)
  with train_summary_writer.as_default():
    tf.summary.scalar('loss', train_loss.result(), step=epoch)
    tf.summary.scalar('accuracy', train_accuracy.result(), step=epoch)

  for (x_test, y_test) in test_dataset:
    test_step(model, x_test, y_test)
  with test_summary_writer.as_default():
    tf.summary.scalar('loss', test_loss.result(), step=epoch)
    tf.summary.scalar('accuracy', test_accuracy.result(), step=epoch)
  
  template = 'Epoch {}, Loss: {}, Accuracy: {}, Test Loss: {}, Test Accuracy: {}'
  print (template.format(epoch+1,
                         train_loss.result(), 
                         train_accuracy.result()*100,
                         test_loss.result(), 
                         test_accuracy.result()*100))

  # Reset metrics every epoch
  train_loss.reset_states()
  test_loss.reset_states()
  train_accuracy.reset_states()
  test_accuracy.reset_states()



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



Epoch 1, Loss: 0.24486394226551056, Accuracy: 92.85166931152344, Test Loss: 0.12080260366201401, Test Accuracy: 96.3800048828125
Epoch 2, Loss: 0.10387572646141052, Accuracy: 96.89166259765625, Test Loss: 0.08313678950071335, Test Accuracy: 97.3699951171875
Epoch 3, Loss: 0.0713011771440506, Accuracy: 97.8516616821289, Test Loss: 0.06803753226995468, Test Accuracy: 97.83999633789062
Epoch 4, Loss: 0.0551285520195961, Accuracy: 98.30833435058594, Test Loss: 0.06910602003335953, Test Accuracy: 97.93999481201172
Epoch 5, Loss: 0.04369373992085457, Accuracy: 98.60333251953125, Test Loss: 0.060588810592889786, Test Accuracy: 98.13999938964844


# Start tensorboard

## Issues and resolution
Observed that once tensorflow goes into a bad state, it throws problem everytime after because 

1. It does not kill previous processes automatically
2. It uses previous states while starting the dashboard

Steps to mitigate the bad state:

1. kill all running tensorboard processes.
2. Clear previous tensorboard state.



If it times out in jupyter, then go to http://localhost:6006/#scalars in the browser and check

In [21]:
! powershell "echo 'checking for existing tensorboard processes'"
! powershell "ps | Where-Object {$_.ProcessName -eq 'tensorboard'}"
! powershell "echo 'killing existing tensorboard processes'"
! powershell "ps | Where-Object {$_.ProcessName -eq 'tensorboard'}| %{kill $_}"

! powershell "echo 'cleaning tensorboard temp dir'"
! powershell "rm $env:TEMP\.tensorboard-info\*"

! powershell "ps | Where-Object {$_.ProcessName -eq 'tensorboard'}"


%tensorboard --logdir="logs" --host localhost #quotes are important in windows
# If it times out in jupyter, then go to http://localhost:6006/#scalars in the browser and check

checking for existing tensorboard processes

Handles  NPM(K)    PM(K)      WS(K)     CPU(s)     Id  SI ProcessName          
-------  ------    -----      -----     ------     --  -- -----------          
     87       6      940       4056       0.02   1760   1 tensorboard          


killing existing tensorboard processes
cleaning tensorboard temp dir


ERROR: Timed out waiting for TensorBoard to start. It may still be running as pid 20828.

## References:

1. No dashboard active error:

https://stackoverflow.com/questions/47113472/tensorboard-error-no-dashboards-are-active-for-current-data-set


2. [Windows] tensorboard - needs to be started from same drive as logdir 

https://github.com/tensorflow/tensorflow/issues/7856

3. localhost refused to connect.

https://github.com/tensorflow/tensorboard/issues/2481
