# Example of how to use tensorboard using JupyterLab

https://youtu.be/KsyPZcoL_jY

integrated with the class video as well for week3

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tqdm.notebook import tqdm
np.set_printoptions(suppress=True)

(x_train, y_train),(x_test, y_test) = tf.keras.datasets.mnist.load_data()

y_input = tf.keras.utils.to_categorical(y_train)
x_input = (np.reshape(x_train, (x_train.shape[0], 784)) / 255.0).astype(np.float32)

In [2]:
W = tf.Variable(tf.zeros([784,10]),name='W')
b = tf.Variable(tf.zeros([10]),name='b')

# predicted probability for each class
def y_pred(x):
    return tf.nn.softmax(tf.matmul(x,W) + b)

# cross entropy loss function
@tf.function
def loss(x,y):
    y_ = y_pred(x)
    return tf.reduce_mean(-tf.reduce_sum(y * tf.math.log(y_), axis=[1]))

2023-05-30 17:03:02.567311: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-30 17:03:03.091720: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1525] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 14660 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0001:00:00.0, compute capability: 7.5


In [3]:
# %load_ext tensorboard                   this for Colab only

## Static graph

In [4]:
# Set up logging jupiterhub tensorboard
logdir = '/home/jovyan/logs'
writer = tf.summary.create_file_writer(logdir)

# Bracket the function call with trace_on and trace_export
tf.summary.trace_on()
# Call only once tf.function when tracing.
z = loss(x_input, y_input)     #this compiles the graph
with writer.as_default():
    tf.summary.trace_export(name='graph', step=0) #iteration of training

Check graph in tensorboard

## Now let's see how the loss is changing over time

In [5]:
# now add in the training bits of the graph
train_steps = 500

# learning rate
lr = 1e-1

# gradient descent optimizer
optimizer = tf.optimizers.SGD(lr)

In [6]:
with writer.as_default(): # use the contex manager to specify the writer
    for i in tqdm(range(train_steps)):
        with tf.GradientTape() as tape:
            current_loss = loss(x_input,y_input)
        gradients = tape.gradient(current_loss, [W, b])
        optimizer.apply_gradients(zip(gradients, [W ,b]))
        # write the value to tensorboard summary stats
        if i%20 ==0: #every 20 time steps
            tf.summary.scalar('loss', current_loss, step=i)  #this is a single number

  0%|          | 0/500 [00:00<?, ?it/s]

### Same with an Adam optimizer

In [11]:
?tf.optimizers.Adam

[0;31mInit signature:[0m
[0mtf[0m[0;34m.[0m[0moptimizers[0m[0;34m.[0m[0mAdam[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mlearning_rate[0m[0;34m=[0m[0;36m0.001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbeta_1[0m[0;34m=[0m[0;36m0.9[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mbeta_2[0m[0;34m=[0m[0;36m0.999[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mepsilon[0m[0;34m=[0m[0;36m1e-07[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mamsgrad[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m=[0m[0;34m'Adam'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m     
Optimizer that implements the Adam algorithm.

Adam optimization is a stochastic gradient descent method that is based on
adaptive estimation of first-order and second-order moments.

According to
[Kingma et al., 2014](http://arxiv.org/abs/1412.6980)

In [8]:
# now add in the training bits of the graph
train_steps = 500

# learning rate
lr = 1e-3

# gradient descent optimizer
optimizer = tf.optimizers.Adam(lr)

W = tf.Variable(tf.zeros([784,10]),name='W')
b = tf.Variable(tf.zeros([10]),name='b')

# predicted probability for each class
def y_pred(x):
    return tf.nn.softmax(tf.matmul(x,W) + b)

# cross entropy loss function
@tf.function
def loss(x,y):
    y_ = y_pred(x)
    return tf.reduce_mean(-tf.reduce_sum(y * tf.math.log(y_), axis=[1]))

In [9]:
pbar = tqdm(range(train_steps))
with writer.as_default(): # use the contex manager to specify the writer
    for i in pbar:
        with tf.GradientTape() as tape:
            current_loss = loss(x_input,y_input)
        gradients = tape.gradient(current_loss, [W, b])
        optimizer.apply_gradients(zip(gradients, [W ,b]))
        tf.summary.scalar('lossadam', current_loss, step=i)  #this is a single number
        pbar.set_description_str(str(current_loss.numpy()))
        

  0%|          | 0/500 [00:00<?, ?it/s]

## Output slice of the weights value which we can visualise in 2 dimensions

In [12]:
# now add in the training bits of the graph
train_steps = 500

# learning rate
lr = 1e-1

# gradient descent optimizer
optimizer = tf.optimizers.SGD(lr)

W = tf.Variable(tf.zeros([784,10]),name='W')
b = tf.Variable(tf.zeros([10]),name='b')

# predicted probability for each class
def y_pred(x):
    return tf.nn.softmax(tf.matmul(x,W) + b)

# cross entropy loss function
@tf.function
def loss(x,y):
    y_ = y_pred(x)
    return tf.reduce_mean(-tf.reduce_sum(y * tf.math.log(y_), axis=[1]))

In [9]:
with writer.as_default(): # use the contex manager to specify the writer
    for i in range(train_steps):
        with tf.GradientTape() as tape:
            current_loss = loss(x_input,y_input)
        gradients = tape.gradient(current_loss, [W, b])
        optimizer.apply_gradients(zip(gradients, [W ,b]))
        # write the value to tensorboard summary stats
        if i%20 ==0: 
            W_slice = tf.reshape(W[:,0],[1,28,28,1])    # reshaping; extra 1 are paddings
            tf.summary.image('image', W_slice, step=i)


In [None]:
#we can also use it to visualise an image while it's training