# Experiment Workbench

In [1]:
import os, sys, math, datetime
import pathlib
import numpy as np
import random
from matplotlib import pyplot as plt
import PIL
import PIL.Image

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D, MaxPooling2D, GlobalAveragePooling2D, BatchNormalization, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from dotenv import load_dotenv
import boto3
import wandb
from wandb.keras import WandbCallback
import mlflow


In [2]:

print("Tensorflow version " + tf.__version__)
AUTOTUNE = tf.data.AUTOTUNE

# Confirm that TensorFlow can access GPU
device_name = tf.test.gpu_device_name()
if not device_name:
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Tensorflow version 2.10.0
Found GPU at: /device:GPU:0


In [3]:
# Start a Tensorboard session
%load_ext tensorboard

In [4]:
os.environ['WANDB_NOTEBOOK_NAME'] = 'Experiment Workbench'



BATCH_SIZE = 128
EPOCHS = 5
LOGGING_STEPS = 10
LR = 0.001

PROJECT = "Tiny CNN"
MODELNAME = "Simple_Net"
EXPERIMENT = "MNIST"
RUN_NAME = "Run_8"

logdir = os.path.join("logs", MODELNAME, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))

root_logdir = os.getcwd()

# Preparing the data

In [5]:
# Load some data
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
assert x_train.shape == (50000, 32, 32, 3)
assert x_test.shape == (10000, 32, 32, 3)
assert y_train.shape == (50000, 1)
assert y_test.shape == (10000, 1)

x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0
#y_train = keras.utils.to_categorical(y_train, 10)
#y_test = keras.utils.to_categorical(y_test, 10)

input_shape = x_train.shape[1:]
classes = 10
labels = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

In [6]:
def data_preprocessing(image, label):
    # this function can be extended if any pre-processing or augmentation is needed
    return (image, label)

In [7]:

# Create a tf.data pipeline of augmented images (and their labels)
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
#train_dataset = train_dataset.batch(BATCH_SIZE).map(lambda x, y: (data_augmentation(x), y))
train_dataset = train_dataset.batch(BATCH_SIZE).map(data_preprocessing).prefetch(tf.data.AUTOTUNE)

val_dataset = tf.data.Dataset.from_tensor_slices((x_test, y_test))
#train_dataset = train_dataset.batch(BATCH_SIZE).map(lambda x, y: (data_augmentation(x), y))
val_dataset = val_dataset.batch(BATCH_SIZE).map(data_preprocessing).prefetch(tf.data.AUTOTUNE)


# Define the model

In [8]:
def create_model(input_shape, classes):
  return tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=input_shape),
    #tf.keras.layers.Rescaling(1.0 / 255),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(classes, activation='softmax')
  ])

In [9]:
def train_model():

        # solve issue from: https://github.com/wandb/wandb/issues/3536
        if len(wandb.patched["tensorboard"]) > 0:
                wandb.tensorboard.unpatch()
                
        # Configure Tensorboard root log directory to read the debugging information
        
        wandb.tensorboard.patch(root_logdir=root_logdir)
        # wandb.tensorboard.patch(root_logdir="wandb.run.dir")
        
        wandb.init(
                # Set the project where this run will be logged
                project=PROJECT, 
                # Track hyperparameters and run metadata
                #config={
                #"learning_rate": LR,
                #"epochs": EPOCHS,
                #},
                sync_tensorboard=True
                )


        config = wandb.config
        # Specify the configuration variables
        config.dropout = 0.2
        #config.hidden_layer_size = 128
        #config.layer_1_size  = 16
        #config.layer_2_size = 32
        config.learn_rate = LR
        #config.decay = 1e-6
        #config.momentum = 0.9
        config.epochs = EPOCHS
        config.classes = classes

        # enable Tensorflow Debugging
        #tf.debugging.experimental.enable_dump_debug_info("./logs/debug", 
        #        tensor_debug_mode="FULL_HEALTH", circular_buffer_size=-1)

        
        model = create_model(input_shape, classes)
        model.compile(optimizer='adam',
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])

        logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir= wandb.run.dir, histogram_freq=1, profile_batch="10, 20")
        #tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir= logdir, histogram_freq=1)
        wandb_callback = WandbCallback(input_type="image", labels=labels)

        early_stopping = EarlyStopping(monitor="val_accuracy", patience=3)

        checkpoint = ModelCheckpoint("my_tiny_model", save_weights_only=True)
        history = model.fit(train_dataset,
                epochs=config.epochs, 
                validation_data=val_dataset, 
                callbacks=[tensorboard_callback, wandb_callback, checkpoint, early_stopping])

        wandb.finish()
        return history, model


In [10]:
history, model = train_model()

[34m[1mwandb[0m: Currently logged in as: [33msusbrock[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch 1/5
INFO:tensorflow:Assets written to: i:\tinyml\tiny_cnn\wandb\run-20221023_192354-2eukd0t9\files\model-best\assets


[34m[1mwandb[0m: Adding directory to artifact (i:\tinyml\tiny_cnn\wandb\run-20221023_192354-2eukd0t9\files\model-best)... Done. 0.1s


Epoch 2/5
INFO:tensorflow:Assets written to: i:\tinyml\tiny_cnn\wandb\run-20221023_192354-2eukd0t9\files\model-best\assets


[34m[1mwandb[0m: Adding directory to artifact (i:\tinyml\tiny_cnn\wandb\run-20221023_192354-2eukd0t9\files\model-best)... Done. 0.1s


Epoch 3/5
INFO:tensorflow:Assets written to: i:\tinyml\tiny_cnn\wandb\run-20221023_192354-2eukd0t9\files\model-best\assets


[34m[1mwandb[0m: Adding directory to artifact (i:\tinyml\tiny_cnn\wandb\run-20221023_192354-2eukd0t9\files\model-best)... Done. 0.1s


Epoch 4/5
INFO:tensorflow:Assets written to: i:\tinyml\tiny_cnn\wandb\run-20221023_192354-2eukd0t9\files\model-best\assets


[34m[1mwandb[0m: Adding directory to artifact (i:\tinyml\tiny_cnn\wandb\run-20221023_192354-2eukd0t9\files\model-best)... Done. 0.1s


Epoch 5/5
INFO:tensorflow:Assets written to: i:\tinyml\tiny_cnn\wandb\run-20221023_192354-2eukd0t9\files\model-best\assets


[34m[1mwandb[0m: Adding directory to artifact (i:\tinyml\tiny_cnn\wandb\run-20221023_192354-2eukd0t9\files\model-best)... Done. 0.1s




0,1
accuracy,▁▅▆▇█
epoch,▁▃▅▆█
loss,█▄▃▂▁
val_accuracy,▁▄▇█▇
val_loss,█▅▃▁▁

0,1
accuracy,0.40038
best_epoch,4.0
best_val_loss,1.61559
epoch,4.0
loss,1.67666
val_accuracy,0.4194
val_loss,1.61559


In [None]:
def run_multiple_training_runs(epochs, lrs):
    for epoch in epochs:
        for lr in lrs:
            run_training_run(epoch, lr)

# Try different values for the learning rate
epochs = [100, 120, 140]
lrs = [0.1, 0.01, 0.001, 0.0001]
run_multiple_training_runs(epochs, lrs)